From 14a8aa4964e01aafcd684499315d3d4ea1de428f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 30 Jan 2020 21:45:17 -0800 Subject: tools/power/x86/intel-speed-select: Fix display for turbo-freq auto mode When mailbox command for the turbo-freq enable fails, then don't display result for auto-mode. When turbo-freq enable fails, there is no point to set CPU priorities. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 2b2b8167c65b..0cf3548681f8 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -42,6 +42,7 @@ static int out_format_json; static int cmd_help; static int force_online_offline; static int auto_mode; +static int fact_enable_fail; /* clos related */ static int current_clos = -1; @@ -1527,6 +1528,8 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, disp_results: if (status) { isst_display_result(cpu, outf, "turbo-freq", "enable", ret); + if (ret) + fact_enable_fail = ret; } else { /* Since we modified TRL during Fact enable, restore it */ isst_set_trl_from_current_tdp(cpu, fact_trl); @@ -1568,7 +1571,7 @@ static void set_fact_enable(int arg) NULL, &enable); isst_ctdp_display_information_end(outf); - if (enable && auto_mode) { + if (!fact_enable_fail && enable && auto_mode) { /* * When we adjust CLOS param, we have to set for siblings also. * So for the each user specified CPU, also add the sibling -- cgit v1.2.3 From 3b0fe3bab31fa7b320667e3d001eaa3f23590c05 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 30 Jan 2020 21:45:18 -0800 Subject: tools/power/x86/intel-speed-select: Avoid duplicate names for json parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the command "intel-speed-select perf-profile info": There are two instances of “speed-select-turbo-freq” underneath “perf-profile-level-0” for each package. When we load the output into python with json.load(), the second instance overwrites the first. Result is that we can only access: "speed-select-turbo-freq": { "bucket-0": { "high-priority-cores-count": "2", "high-priority-max-frequency(MHz)": "3000", "high-priority-max-avx2-frequency(MHz)": "2800", "high-priority-max-avx512-frequency(MHz)": "2600" }, Because it is a duplicate of "speed-select-turbo-freq": "disabled" Same is true for "speed-select-base-freq". To avoid this add "-properties" suffix for the second instance to differentiate. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 4fb0c1d49d64..3d70be2a9f61 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -178,7 +178,7 @@ static void _isst_pbf_display_information(int cpu, FILE *outf, int level, char header[256]; char value[256]; - snprintf(header, sizeof(header), "speed-select-base-freq"); + snprintf(header, sizeof(header), "speed-select-base-freq-properties"); format_and_print(outf, disp_level, header, NULL); snprintf(header, sizeof(header), "high-priority-base-frequency(MHz)"); @@ -224,7 +224,7 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level, char value[256]; int j; - snprintf(header, sizeof(header), "speed-select-turbo-freq"); + snprintf(header, sizeof(header), "speed-select-turbo-freq-properties"); format_and_print(outf, base_level, header, NULL); for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) { if (fact_bucket != 0xff && fact_bucket != j) -- cgit v1.2.3 From 901cff7cb96140a658a848a568b606ba764239bc Mon Sep 17 00:00:00 2001 From: Topi Miettinen Date: Thu, 23 Jan 2020 14:58:38 +0200 Subject: firmware_loader: load files from the mount namespace of init I have an experimental setup where almost every possible system service (even early startup ones) runs in separate namespace, using a dedicated, minimal file system. In process of minimizing the contents of the file systems with regards to modules and firmware files, I noticed that in my system, the firmware files are loaded from three different mount namespaces, those of systemd-udevd, init and systemd-networkd. The logic of the source namespace is not very clear, it seems to depend on the driver, but the namespace of the current process is used. So, this patch tries to make things a bit clearer and changes the loading of firmware files only from the mount namespace of init. This may also improve security, though I think that using firmware files as attack vector could be too impractical anyway. Later, it might make sense to make the mount namespace configurable, for example with a new file in /proc/sys/kernel/firmware_config/. That would allow a dedicated file system only for firmware files and those need not be present anywhere else. This configurability would make more sense if made also for kernel modules and /sbin/modprobe. Modules are already loaded from init namespace (usermodehelper uses kthreadd namespace) except when directly loaded by systemd-udevd. Instead of using the mount namespace of the current process to load firmware files, use the mount namespace of init process. Link: https://lore.kernel.org/lkml/bb46ebae-4746-90d9-ec5b-fce4c9328c86@gmail.com/ Link: https://lore.kernel.org/lkml/0e3f7653-c59d-9341-9db2-c88f5b988c68@gmail.com/ Signed-off-by: Topi Miettinen Link: https://lore.kernel.org/r/20200123125839.37168-1-toiwoton@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 6 +- fs/exec.c | 26 ++++ include/linux/fs.h | 2 + tools/testing/selftests/firmware/Makefile | 9 +- tools/testing/selftests/firmware/fw_namespace.c | 151 +++++++++++++++++++++++ tools/testing/selftests/firmware/fw_run_tests.sh | 4 + 6 files changed, 190 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/firmware/fw_namespace.c (limited to 'tools') diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 57133a9dad09..59d1dc322080 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -493,8 +493,10 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv, } fw_priv->size = 0; - rc = kernel_read_file_from_path(path, &buffer, &size, - msize, id); + + /* load firmware files from the mount namespace of init */ + rc = kernel_read_file_from_path_initns(path, &buffer, + &size, msize, id); if (rc) { if (rc != -ENOENT) dev_warn(device, "loading %s failed with error %d\n", diff --git a/fs/exec.c b/fs/exec.c index db17be51b112..688c824cdac8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -985,6 +985,32 @@ int kernel_read_file_from_path(const char *path, void **buf, loff_t *size, } EXPORT_SYMBOL_GPL(kernel_read_file_from_path); +int kernel_read_file_from_path_initns(const char *path, void **buf, + loff_t *size, loff_t max_size, + enum kernel_read_file_id id) +{ + struct file *file; + struct path root; + int ret; + + if (!path || !*path) + return -EINVAL; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + + file = file_open_root(root.dentry, root.mnt, path, O_RDONLY, 0); + path_put(&root); + if (IS_ERR(file)) + return PTR_ERR(file); + + ret = kernel_read_file(file, buf, size, max_size, id); + fput(file); + return ret; +} +EXPORT_SYMBOL_GPL(kernel_read_file_from_path_initns); + int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size, enum kernel_read_file_id id) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..f814ccd8d929 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3012,6 +3012,8 @@ extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); extern int kernel_read_file_from_path(const char *, void **, loff_t *, loff_t, enum kernel_read_file_id); +extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, loff_t, + enum kernel_read_file_id); extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 012b2cf69c11..40211cd8f0e6 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -1,13 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only # Makefile for firmware loading selftests - -# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" -all: +CFLAGS = -Wall \ + -O2 TEST_PROGS := fw_run_tests.sh TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh +TEST_GEN_FILES := fw_namespace include ../lib.mk - -# Nothing to clean up. -clean: diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c new file mode 100644 index 000000000000..5ebc1aec7923 --- /dev/null +++ b/tools/testing/selftests/firmware/fw_namespace.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Test triggering of loading of firmware from different mount + * namespaces. Expect firmware to be always loaded from the mount + * namespace of PID 1. */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 +#endif + +static char *fw_path = NULL; + +static void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + if (fw_path) + unlink(fw_path); + umount("/lib/firmware"); + exit(EXIT_FAILURE); +} + +static void trigger_fw(const char *fw_name, const char *sys_path) +{ + int fd; + + fd = open(sys_path, O_WRONLY); + if (fd < 0) + die("open failed: %s\n", + strerror(errno)); + if (write(fd, fw_name, strlen(fw_name)) != strlen(fw_name)) + exit(EXIT_FAILURE); + close(fd); +} + +static void setup_fw(const char *fw_path) +{ + int fd; + const char fw[] = "ABCD0123"; + + fd = open(fw_path, O_WRONLY | O_CREAT, 0600); + if (fd < 0) + die("open failed: %s\n", + strerror(errno)); + if (write(fd, fw, sizeof(fw) -1) != sizeof(fw) -1) + die("write failed: %s\n", + strerror(errno)); + close(fd); +} + +static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_fw_in_parent_ns) +{ + pid_t child; + + if (block_fw_in_parent_ns) + if (mount("test", "/lib/firmware", "tmpfs", MS_RDONLY, NULL) == -1) + die("blocking firmware in parent ns failed\n"); + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + if (block_fw_in_parent_ns) + umount("/lib/firmware"); + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) == -1) + die("remount root in child ns failed\n"); + + if (!block_fw_in_parent_ns) { + if (mount("test", "/lib/firmware", "tmpfs", MS_RDONLY, NULL) == -1) + die("blocking firmware in child ns failed\n"); + } else + umount("/lib/firmware"); + + trigger_fw(fw_name, sys_path); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + const char *fw_name = "test-firmware.bin"; + char *sys_path; + if (argc != 2) + die("usage: %s sys_path\n", argv[0]); + + /* Mount tmpfs to /lib/firmware so we don't have to assume + that it is writable for us.*/ + if (mount("test", "/lib/firmware", "tmpfs", 0, NULL) == -1) + die("mounting tmpfs to /lib/firmware failed\n"); + + sys_path = argv[1]; + asprintf(&fw_path, "/lib/firmware/%s", fw_name); + + setup_fw(fw_path); + + setvbuf(stdout, NULL, _IONBF, 0); + /* Positive case: firmware in PID1 mount namespace */ + printf("Testing with firmware in parent namespace (assumed to be same file system as PID1)\n"); + if (!test_fw_in_ns(fw_name, sys_path, false)) + die("error: failed to access firmware\n"); + + /* Negative case: firmware in child mount namespace, expected to fail */ + printf("Testing with firmware in child namespace\n"); + if (test_fw_in_ns(fw_name, sys_path, true)) + die("error: firmware access did not fail\n"); + + unlink(fw_path); + free(fw_path); + umount("/lib/firmware"); + exit(EXIT_SUCCESS); +} diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh index 8e14d555c197..777377078d5e 100755 --- a/tools/testing/selftests/firmware/fw_run_tests.sh +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -61,6 +61,10 @@ run_test_config_0003() check_mods check_setup +echo "Running namespace test: " +$TEST_DIR/fw_namespace $DIR/trigger_request +echo "OK" + if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then run_test_config_0001 run_test_config_0002 -- cgit v1.2.3 From 034c7678dd2c05fb08e6fa3b0ac527ead8f1b11b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Jan 2020 13:32:34 -0800 Subject: selftests/resctrl: Add README for resctrl tests resctrl tests will be implemented. README is added for the tool first. Co-developed-by: Babu Moger Signed-off-by: Babu Moger Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/README | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tools/testing/selftests/resctrl/README (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README new file mode 100644 index 000000000000..6e5a0ffa18e8 --- /dev/null +++ b/tools/testing/selftests/resctrl/README @@ -0,0 +1,53 @@ +resctrl_tests - resctrl file system test suit + +Authors: + Fenghua Yu + Sai Praneeth Prakhya , + +resctrl_tests tests various resctrl functionalities and interfaces including +both software and hardware. + +Currently it supports Memory Bandwidth Monitoring test and Memory Bandwidth +Allocation test on Intel RDT hardware. More tests will be added in the future. +And the test suit can be extended to cover AMD QoS and ARM MPAM hardware +as well. + +BUILD +----- + +Run "make" to build executable file "resctrl_tests". + +RUN +--- + +To use resctrl_tests, root or sudoer privileges are required. This is because +the test needs to mount resctrl file system and change contents in the file +system. + +Executing the test without any parameter will run all supported tests: + + sudo ./resctrl_tests + +OVERVIEW OF EXECUTION +--------------------- + +A test case has four stages: + + - setup: mount resctrl file system, create group, setup schemata, move test + process pids to tasks, start benchmark. + - execute: let benchmark run + - verify: get resctrl data and verify the data with another source, e.g. + perf event. + - teardown: umount resctrl and clear temporary files. + +ARGUMENTS +--------- + +Parameter '-h' shows usage information. + +usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits] + -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf + -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat + -n no_of_bits: run cache tests using specified no of bits in cache bit mask + -p cpu_no: specify CPU number to run the test. 1 is default + -h: help -- cgit v1.2.3 From 591a6e8588fc1dbdc04355e8ad7b0be43d221c9c Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 16 Jan 2020 13:32:35 -0800 Subject: selftests/resctrl: Add basic resctrl file system operations and data The basic resctrl file system operations and data are added for future usage by resctrl selftest tool. Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/Makefile | 12 + tools/testing/selftests/resctrl/resctrl.h | 50 +++ tools/testing/selftests/resctrl/resctrlfs.c | 462 ++++++++++++++++++++++++++++ 3 files changed, 524 insertions(+) create mode 100644 tools/testing/selftests/resctrl/Makefile create mode 100644 tools/testing/selftests/resctrl/resctrl.h create mode 100644 tools/testing/selftests/resctrl/resctrlfs.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile new file mode 100644 index 000000000000..76bbd6d3e4a8 --- /dev/null +++ b/tools/testing/selftests/resctrl/Makefile @@ -0,0 +1,12 @@ +CC = $(CROSS_COMPILE)gcc +CFLAGS = -g -Wall +SRCS=$(wildcard *.c) +OBJS=$(SRCS:.c=.o) + +$(OBJS): $(SRCS) + $(CC) $(CFLAGS) -c $(SRCS) + +.PHONY: clean + +clean: + $(RM) $(OBJS) diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h new file mode 100644 index 000000000000..ba98cd6efc64 --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE +#ifndef RESCTRL_H +#define RESCTRL_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RESCTRL_PATH "/sys/fs/resctrl" +#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" + +#define PARENT_EXIT(err_msg) \ + do { \ + perror(err_msg); \ + kill(ppid, SIGKILL); \ + exit(EXIT_FAILURE); \ + } while (0) + +pid_t bm_pid, ppid; + +int remount_resctrlfs(bool mum_resctrlfs); +int get_resource_id(int cpu_no, int *resource_id); +int validate_bw_report_request(char *bw_report); +bool validate_resctrl_feature_request(char *resctrl_val); +char *fgrep(FILE *inf, const char *str); +int taskset_benchmark(pid_t bm_pid, int cpu_no); +void run_benchmark(int signum, siginfo_t *info, void *ucontext); +int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, + char *resctrl_val); +int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, + char *resctrl_val); +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags); +int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush, + int op, char *resctrl_va); + +#endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c new file mode 100644 index 000000000000..1b125f9d8e5d --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic resctrl file system operations + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" + +int tests_run; + +static int find_resctrl_mount(char *buffer) +{ + FILE *mounts; + char line[256], *fs, *mntpoint; + + mounts = fopen("/proc/mounts", "r"); + if (!mounts) { + perror("/proc/mounts"); + return -ENXIO; + } + while (!feof(mounts)) { + if (!fgets(line, 256, mounts)) + break; + fs = strtok(line, " \t"); + if (!fs) + continue; + mntpoint = strtok(NULL, " \t"); + if (!mntpoint) + continue; + fs = strtok(NULL, " \t"); + if (!fs) + continue; + if (strcmp(fs, "resctrl")) + continue; + + fclose(mounts); + if (buffer) + strncpy(buffer, mntpoint, 256); + + return 0; + } + + fclose(mounts); + + return -ENOENT; +} + +/* + * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl + * @mum_resctrlfs: Should the resctrl FS be remounted? + * + * If not mounted, mount it. + * If mounted and mum_resctrlfs then remount resctrl FS. + * If mounted and !mum_resctrlfs then noop + * + * Return: 0 on success, non-zero on failure + */ +int remount_resctrlfs(bool mum_resctrlfs) +{ + char mountpoint[256]; + int ret; + + ret = find_resctrl_mount(mountpoint); + if (ret) + strcpy(mountpoint, RESCTRL_PATH); + + if (!ret && mum_resctrlfs && umount(mountpoint)) { + printf("not ok unmounting \"%s\"\n", mountpoint); + perror("# umount"); + tests_run++; + } + + if (!ret && !mum_resctrlfs) + return 0; + + ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL); + printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "", + RESCTRL_PATH); + if (ret) + perror("# mount"); + + tests_run++; + + return ret; +} + +int umount_resctrlfs(void) +{ + if (umount(RESCTRL_PATH)) { + perror("# Unable to umount resctrl"); + + return errno; + } + + return 0; +} + +/* + * get_resource_id - Get socket number/l3 id for a specified CPU + * @cpu_no: CPU number + * @resource_id: Socket number or l3_id + * + * Return: >= 0 on success, < 0 on failure. + */ +int get_resource_id(int cpu_no, int *resource_id) +{ + char phys_pkg_path[1024]; + FILE *fp; + + sprintf(phys_pkg_path, "%s%d/topology/physical_package_id", + PHYS_ID_PATH, cpu_no); + fp = fopen(phys_pkg_path, "r"); + if (!fp) { + perror("Failed to open physical_package_id"); + + return -1; + } + if (fscanf(fp, "%d", resource_id) <= 0) { + perror("Could not get socket number or l3 id"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu + * @bm_pid: PID that should be binded + * @cpu_no: CPU number at which the PID would be binded + * + * Return: 0 on success, non-zero on failure + */ +int taskset_benchmark(pid_t bm_pid, int cpu_no) +{ + cpu_set_t my_set; + + CPU_ZERO(&my_set); + CPU_SET(cpu_no, &my_set); + + if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) { + perror("Unable to taskset benchmark"); + + return -1; + } + + return 0; +} + +/* + * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) + * in specified signal. Direct benchmark stdio to /dev/null. + * @signum: signal number + * @info: signal info + * @ucontext: user context in signal handling + * + * Return: void + */ +void run_benchmark(int signum, siginfo_t *info, void *ucontext) +{ + unsigned long span; + int operation, ret; + char **benchmark_cmd; + FILE *fp; + + benchmark_cmd = info->si_ptr; + + /* + * Direct stdio of child to /dev/null, so that only parent writes to + * stdio (console) + */ + fp = freopen("/dev/null", "w", stdout); + if (!fp) + PARENT_EXIT("Unable to direct benchmark status to /dev/null"); + + if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { + /* Execute default fill_buf benchmark */ + span = strtoul(benchmark_cmd[1], NULL, 10); + operation = atoi(benchmark_cmd[4]); + if (run_fill_buf(span, 1, 1, operation, NULL)) + fprintf(stderr, "Error in running fill buffer\n"); + } else { + /* Execute specified benchmark */ + ret = execvp(benchmark_cmd[0], benchmark_cmd); + if (ret) + perror("wrong\n"); + } + + fclose(stdout); + PARENT_EXIT("Unable to run specified benchmark"); +} + +/* + * create_grp - Create a group only if one doesn't exist + * @grp_name: Name of the group + * @grp: Full path and name of the group + * @parent_grp: Full path and name of the parent group + * + * Return: 0 on success, non-zero on failure + */ +static int create_grp(const char *grp_name, char *grp, const char *parent_grp) +{ + int found_grp = 0; + struct dirent *ep; + DIR *dp; + + /* Check if requested grp exists or not */ + dp = opendir(parent_grp); + if (dp) { + while ((ep = readdir(dp)) != NULL) { + if (strcmp(ep->d_name, grp_name) == 0) + found_grp = 1; + } + closedir(dp); + } else { + perror("Unable to open resctrl for group"); + + return -1; + } + + /* Requested grp doesn't exist, hence create it */ + if (found_grp == 0) { + if (mkdir(grp, 0) == -1) { + perror("Unable to create group"); + + return -1; + } + } + + return 0; +} + +static int write_pid_to_tasks(char *tasks, pid_t pid) +{ + FILE *fp; + + fp = fopen(tasks, "w"); + if (!fp) { + perror("Failed to open tasks file"); + + return -1; + } + if (fprintf(fp, "%d\n", pid) < 0) { + perror("Failed to wr pid to tasks file"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * write_bm_pid_to_resctrl - Write a PID (i.e. benchmark) to resctrl FS + * @bm_pid: PID that should be written + * @ctrlgrp: Name of the control monitor group (con_mon grp) + * @mongrp: Name of the monitor group (mon grp) + * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + * + * If a con_mon grp is requested, create it and write pid to it, otherwise + * write pid to root con_mon grp. + * If a mon grp is requested, create it and write pid to it, otherwise + * pid is not written, this means that pid is in con_mon grp and hence + * should consult con_mon grp's mon_data directory for results. + * + * Return: 0 on success, non-zero on failure + */ +int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, + char *resctrl_val) +{ + char controlgroup[128], monitorgroup[512], monitorgroup_p[256]; + char tasks[1024]; + int ret = 0; + + if (strlen(ctrlgrp)) + sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp); + else + sprintf(controlgroup, "%s", RESCTRL_PATH); + + /* Create control and monitoring group and write pid into it */ + ret = create_grp(ctrlgrp, controlgroup, RESCTRL_PATH); + if (ret) + goto out; + sprintf(tasks, "%s/tasks", controlgroup); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; + + /* Create mon grp and write pid into it for "mbm" test */ + if ((strcmp(resctrl_val, "mbm") == 0)) { + if (mongrp) { + sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); + sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); + ret = create_grp(mongrp, monitorgroup, monitorgroup_p); + if (ret) + goto out; + + sprintf(tasks, "%s/mon_groups/%s/tasks", + controlgroup, mongrp); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; + } + } + +out: + printf("%sok writing benchmark parameters to resctrl FS\n", + ret ? "not " : ""); + if (ret) + perror("# writing to resctrlfs"); + + tests_run++; + + return ret; +} + +/* + * write_schemata - Update schemata of a con_mon grp + * @ctrlgrp: Name of the con_mon grp + * @schemata: Schemata that should be updated to + * @cpu_no: CPU number that the benchmark PID is binded to + * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + * + * Update schemata of a con_mon grp *only* if requested resctrl feature is + * allocation type + * + * Return: 0 on success, non-zero on failure + */ +int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) +{ + char controlgroup[1024], schema[1024], reason[64]; + int resource_id, ret = 0; + FILE *fp; + + if (strcmp(resctrl_val, "mba") != 0) + return -ENOENT; + + if (!schemata) { + printf("# Skipping empty schemata update\n"); + + return -1; + } + + if (get_resource_id(cpu_no, &resource_id) < 0) { + sprintf(reason, "Failed to get resource id"); + ret = -1; + + goto out; + } + + if (strlen(ctrlgrp) != 0) + sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp); + else + sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); + + sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); + + fp = fopen(controlgroup, "w"); + if (!fp) { + sprintf(reason, "Failed to open control group"); + ret = -1; + + goto out; + } + + if (fprintf(fp, "%s\n", schema) < 0) { + sprintf(reason, "Failed to write schemata in control group"); + fclose(fp); + ret = -1; + + goto out; + } + fclose(fp); + +out: + printf("%sok Write schema \"%s\" to resctrl FS%s%s\n", + ret ? "not " : "", schema, ret ? " # " : "", + ret ? reason : ""); + tests_run++; + + return ret; +} + +char *fgrep(FILE *inf, const char *str) +{ + char line[256]; + int slen = strlen(str); + + while (!feof(inf)) { + if (!fgets(line, 256, inf)) + break; + if (strncmp(line, str, slen)) + continue; + + return strdup(line); + } + + return NULL; +} + +/* + * validate_resctrl_feature_request - Check if requested feature is valid. + * @resctrl_val: Requested feature + * + * Return: 0 on success, non-zero on failure + */ +bool validate_resctrl_feature_request(char *resctrl_val) +{ + FILE *inf = fopen("/proc/cpuinfo", "r"); + bool found = false; + char *res; + + if (!inf) + return false; + + res = fgrep(inf, "flags"); + + if (res) { + char *s = strchr(res, ':'); + + found = s && !strstr(s, resctrl_val); + free(res); + } + fclose(inf); + + return found; +} + +int validate_bw_report_request(char *bw_report) +{ + if (strcmp(bw_report, "reads") == 0) + return 0; + if (strcmp(bw_report, "writes") == 0) + return 0; + if (strcmp(bw_report, "nt-writes") == 0) { + strcpy(bw_report, "writes"); + return 0; + } + if (strcmp(bw_report, "total") == 0) + return 0; + + fprintf(stderr, "Requested iMC B/W report type unavailable\n"); + + return -1; +} + +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags) +{ + int ret; + + ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, + group_fd, flags); + return ret; +} -- cgit v1.2.3 From 1d3f08687d76c0a80be35bbab6d64b1a8f2730e8 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 16 Jan 2020 13:32:36 -0800 Subject: selftests/resctrl: Read memory bandwidth from perf IMC counter and from resctrl file system Total memory bandwidth can be monitored from perf IMC counter and from resctrl file system. Later the two will be compared to verify the total memory bandwidth read from resctrl is correct. Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/resctrl_val.c | 117 ++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 tools/testing/selftests/resctrl/resctrl_val.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c new file mode 100644 index 000000000000..0ca4c9252516 --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory bandwidth monitoring and allocation library + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" + +#define UNCORE_IMC "uncore_imc" +#define READ_FILE_NAME "events/cas_count_read" +#define WRITE_FILE_NAME "events/cas_count_write" +#define DYN_PMU_PATH "/sys/bus/event_source/devices" +#define SCALE 0.00006103515625 +#define MAX_IMCS 20 +#define MAX_TOKENS 5 +#define READ 0 +#define WRITE 1 +#define CON_MON_MBM_LOCAL_BYTES_PATH \ + "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" + +#define CON_MBM_LOCAL_BYTES_PATH \ + "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" + +#define MON_MBM_LOCAL_BYTES_PATH \ + "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" + +#define MBM_LOCAL_BYTES_PATH \ + "%s/mon_data/mon_L3_%02d/mbm_local_bytes" + +struct membw_read_format { + __u64 value; /* The value of the event */ + __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ + __u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ + __u64 id; /* if PERF_FORMAT_ID */ +}; + +struct imc_counter_config { + __u32 type; + __u64 event; + __u64 umask; + struct perf_event_attr pe; + struct membw_read_format return_value; + int fd; +}; + +static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; + +void membw_initialize_perf_event_attr(int i, int j) +{ + memset(&imc_counters_config[i][j].pe, 0, + sizeof(struct perf_event_attr)); + imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type; + imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr); + imc_counters_config[i][j].pe.disabled = 1; + imc_counters_config[i][j].pe.inherit = 1; + imc_counters_config[i][j].pe.exclude_guest = 0; + imc_counters_config[i][j].pe.config = + imc_counters_config[i][j].umask << 8 | + imc_counters_config[i][j].event; + imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER; + imc_counters_config[i][j].pe.read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; +} + +void membw_ioctl_perf_event_ioc_reset_enable(int i, int j) +{ + ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0); + ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0); +} + +void membw_ioctl_perf_event_ioc_disable(int i, int j) +{ + ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0); +} + +/* + * get_event_and_umask: Parse config into event and umask + * @cas_count_cfg: Config + * @count: iMC number + * @op: Operation (read/write) + */ +void get_event_and_umask(char *cas_count_cfg, int count, bool op) +{ + char *token[MAX_TOKENS]; + int i = 0; + + strcat(cas_count_cfg, ","); + token[0] = strtok(cas_count_cfg, "=,"); + + for (i = 1; i < MAX_TOKENS; i++) + token[i] = strtok(NULL, "=,"); + + for (i = 0; i < MAX_TOKENS; i++) { + if (!token[i]) + break; + if (strcmp(token[i], "event") == 0) { + if (op == READ) + imc_counters_config[count][READ].event = + strtol(token[i + 1], NULL, 16); + else + imc_counters_config[count][WRITE].event = + strtol(token[i + 1], NULL, 16); + } + if (strcmp(token[i], "umask") == 0) { + if (op == READ) + imc_counters_config[count][READ].umask = + strtol(token[i + 1], NULL, 16); + else + imc_counters_config[count][WRITE].umask = + strtol(token[i + 1], NULL, 16); + } + } +} -- cgit v1.2.3 From 7f4d257e3a2a4864268f6f0da7c85ca4f083e643 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 16 Jan 2020 13:32:37 -0800 Subject: selftests/resctrl: Add callback to start a benchmark The callback starts a child process and puts the child pid in created resctrl group with specified memory bandwidth in schemata. The child starts running benchmark. Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/resctrl.h | 29 ++ tools/testing/selftests/resctrl/resctrl_val.c | 574 ++++++++++++++++++++++++++ 2 files changed, 603 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index ba98cd6efc64..fb42087c904d 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -3,6 +3,8 @@ #ifndef RESCTRL_H #define RESCTRL_H #include +#include +#include #include #include #include @@ -19,6 +21,7 @@ #include #include +#define MB (1024 * 1024) #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" @@ -29,10 +32,35 @@ exit(EXIT_FAILURE); \ } while (0) +/* + * resctrl_val_param: resctrl test parameters + * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + * @ctrlgrp: Name of the control monitor group (con_mon grp) + * @mongrp: Name of the monitor group (mon grp) + * @cpu_no: CPU number to which the benchmark would be binded + * @span: Memory bytes accessed in each benchmark iteration + * @mum_resctrlfs: Should the resctrl FS be remounted? + * @filename: Name of file to which the o/p should be written + * @bw_report: Bandwidth report type (reads vs writes) + * @setup: Call back function to setup test environment + */ +struct resctrl_val_param { + char *resctrl_val; + char ctrlgrp[64]; + char mongrp[64]; + int cpu_no; + unsigned long span; + int mum_resctrlfs; + char filename[64]; + char *bw_report; + int (*setup)(int num, ...); +}; + pid_t bm_pid, ppid; int remount_resctrlfs(bool mum_resctrlfs); int get_resource_id(int cpu_no, int *resource_id); +int umount_resctrlfs(void); int validate_bw_report_request(char *bw_report); bool validate_resctrl_feature_request(char *resctrl_val); char *fgrep(FILE *inf, const char *str); @@ -46,5 +74,6 @@ int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush, int op, char *resctrl_va); +int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); #endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 0ca4c9252516..0d1bd03ec4be 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -47,6 +47,8 @@ struct imc_counter_config { int fd; }; +static char mbm_total_path[1024]; +static int imcs; static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; void membw_initialize_perf_event_attr(int i, int j) @@ -115,3 +117,575 @@ void get_event_and_umask(char *cas_count_cfg, int count, bool op) } } } + +static int open_perf_event(int i, int cpu_no, int j) +{ + imc_counters_config[i][j].fd = + perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1, + PERF_FLAG_FD_CLOEXEC); + + if (imc_counters_config[i][j].fd == -1) { + fprintf(stderr, "Error opening leader %llx\n", + imc_counters_config[i][j].pe.config); + + return -1; + } + + return 0; +} + +/* Get type and config (read and write) of an iMC counter */ +static int read_from_imc_dir(char *imc_dir, int count) +{ + char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; + FILE *fp; + + /* Get type of iMC counter */ + sprintf(imc_counter_type, "%s%s", imc_dir, "type"); + fp = fopen(imc_counter_type, "r"); + if (!fp) { + perror("Failed to open imc counter type file"); + + return -1; + } + if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) { + perror("Could not get imc type"); + fclose(fp); + + return -1; + } + fclose(fp); + + imc_counters_config[count][WRITE].type = + imc_counters_config[count][READ].type; + + /* Get read config */ + sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); + fp = fopen(imc_counter_cfg, "r"); + if (!fp) { + perror("Failed to open imc config file"); + + return -1; + } + if (fscanf(fp, "%s", cas_count_cfg) <= 0) { + perror("Could not get imc cas count read"); + fclose(fp); + + return -1; + } + fclose(fp); + + get_event_and_umask(cas_count_cfg, count, READ); + + /* Get write config */ + sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME); + fp = fopen(imc_counter_cfg, "r"); + if (!fp) { + perror("Failed to open imc config file"); + + return -1; + } + if (fscanf(fp, "%s", cas_count_cfg) <= 0) { + perror("Could not get imc cas count write"); + fclose(fp); + + return -1; + } + fclose(fp); + + get_event_and_umask(cas_count_cfg, count, WRITE); + + return 0; +} + +/* + * A system can have 'n' number of iMC (Integrated Memory Controller) + * counters, get that 'n'. For each iMC counter get it's type and config. + * Also, each counter has two configs, one for read and the other for write. + * A config again has two parts, event and umask. + * Enumerate all these details into an array of structures. + * + * Return: >= 0 on success. < 0 on failure. + */ +static int num_of_imcs(void) +{ + unsigned int count = 0; + char imc_dir[512]; + struct dirent *ep; + int ret; + DIR *dp; + + dp = opendir(DYN_PMU_PATH); + if (dp) { + while ((ep = readdir(dp))) { + if (strstr(ep->d_name, UNCORE_IMC)) { + sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH, + ep->d_name); + ret = read_from_imc_dir(imc_dir, count); + if (ret) { + closedir(dp); + + return ret; + } + count++; + } + } + closedir(dp); + if (count == 0) { + perror("Unable find iMC counters!\n"); + + return -1; + } + } else { + perror("Unable to open PMU directory!\n"); + + return -1; + } + + return count; +} + +static int initialize_mem_bw_imc(void) +{ + int imc, j; + + imcs = num_of_imcs(); + if (imcs <= 0) + return imcs; + + /* Initialize perf_event_attr structures for all iMC's */ + for (imc = 0; imc < imcs; imc++) { + for (j = 0; j < 2; j++) + membw_initialize_perf_event_attr(imc, j); + } + + return 0; +} + +/* + * get_mem_bw_imc: Memory band width as reported by iMC counters + * @cpu_no: CPU number that the benchmark PID is binded to + * @bw_report: Bandwidth report type (reads, writes) + * + * Memory B/W utilized by a process on a socket can be calculated using + * iMC counters. Perf events are used to read these counters. + * + * Return: >= 0 on success. < 0 on failure. + */ +static float get_mem_bw_imc(int cpu_no, char *bw_report) +{ + float reads, writes, of_mul_read, of_mul_write; + int imc, j, ret; + + /* Start all iMC counters to log values (both read and write) */ + reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; + for (imc = 0; imc < imcs; imc++) { + for (j = 0; j < 2; j++) { + ret = open_perf_event(imc, cpu_no, j); + if (ret) + return -1; + } + for (j = 0; j < 2; j++) + membw_ioctl_perf_event_ioc_reset_enable(imc, j); + } + + sleep(1); + + /* Stop counters after a second to get results (both read and write) */ + for (imc = 0; imc < imcs; imc++) { + for (j = 0; j < 2; j++) + membw_ioctl_perf_event_ioc_disable(imc, j); + } + + /* + * Get results which are stored in struct type imc_counter_config + * Take over flow into consideration before calculating total b/w + */ + for (imc = 0; imc < imcs; imc++) { + struct imc_counter_config *r = + &imc_counters_config[imc][READ]; + struct imc_counter_config *w = + &imc_counters_config[imc][WRITE]; + + if (read(r->fd, &r->return_value, + sizeof(struct membw_read_format)) == -1) { + perror("Couldn't get read b/w through iMC"); + + return -1; + } + + if (read(w->fd, &w->return_value, + sizeof(struct membw_read_format)) == -1) { + perror("Couldn't get write bw through iMC"); + + return -1; + } + + __u64 r_time_enabled = r->return_value.time_enabled; + __u64 r_time_running = r->return_value.time_running; + + if (r_time_enabled != r_time_running) + of_mul_read = (float)r_time_enabled / + (float)r_time_running; + + __u64 w_time_enabled = w->return_value.time_enabled; + __u64 w_time_running = w->return_value.time_running; + + if (w_time_enabled != w_time_running) + of_mul_write = (float)w_time_enabled / + (float)w_time_running; + reads += r->return_value.value * of_mul_read * SCALE; + writes += w->return_value.value * of_mul_write * SCALE; + } + + for (imc = 0; imc < imcs; imc++) { + close(imc_counters_config[imc][READ].fd); + close(imc_counters_config[imc][WRITE].fd); + } + + if (strcmp(bw_report, "reads") == 0) + return reads; + + if (strcmp(bw_report, "writes") == 0) + return writes; + + return (reads + writes); +} + +void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id) +{ + if (ctrlgrp && mongrp) + sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH, + RESCTRL_PATH, ctrlgrp, mongrp, resource_id); + else if (!ctrlgrp && mongrp) + sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + mongrp, resource_id); + else if (ctrlgrp && !mongrp) + sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + ctrlgrp, resource_id); + else if (!ctrlgrp && !mongrp) + sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + resource_id); +} + +/* + * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" + * @ctrlgrp: Name of the control monitor group (con_mon grp) + * @mongrp: Name of the monitor group (mon grp) + * @cpu_no: CPU number that the benchmark PID is binded to + * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + */ +static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, + int cpu_no, char *resctrl_val) +{ + int resource_id; + + if (get_resource_id(cpu_no, &resource_id) < 0) { + perror("Could not get resource_id"); + return; + } + + if (strcmp(resctrl_val, "mbm") == 0) + set_mbm_path(ctrlgrp, mongrp, resource_id); + + if ((strcmp(resctrl_val, "mba") == 0)) { + if (ctrlgrp) + sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, + RESCTRL_PATH, ctrlgrp, resource_id); + else + sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, + RESCTRL_PATH, resource_id); + } +} + +/* + * Get MBM Local bytes as reported by resctrl FS + * For MBM, + * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp + * 2. If only con_mon grp is given, then read from con_mon grp + * 3. If both are not given, then read from root con_mon grp + * For MBA, + * 1. If con_mon grp is given, then read from it + * 2. If con_mon grp is not given, then read from root con_mon grp + */ +static unsigned long get_mem_bw_resctrl(void) +{ + unsigned long mbm_total = 0; + FILE *fp; + + fp = fopen(mbm_total_path, "r"); + if (!fp) { + perror("Failed to open total bw file"); + + return -1; + } + if (fscanf(fp, "%lu", &mbm_total) <= 0) { + perror("Could not get mbm local bytes"); + fclose(fp); + + return -1; + } + fclose(fp); + + return mbm_total; +} + +pid_t bm_pid, ppid; + +static void ctrlc_handler(int signum, siginfo_t *info, void *ptr) +{ + kill(bm_pid, SIGKILL); + printf("Ending\n\n"); + + exit(EXIT_SUCCESS); +} + +/* + * print_results_bw: the memory bandwidth results are stored in a file + * @filename: file that stores the results + * @bm_pid: child pid that runs benchmark + * @bw_imc: perf imc counter value + * @bw_resc: memory bandwidth value + * + * Return: 0 on success. non-zero on failure. + */ +static int print_results_bw(char *filename, int bm_pid, float bw_imc, + unsigned long bw_resc) +{ + unsigned long diff = fabs(bw_imc - bw_resc); + FILE *fp; + + if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { + printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc); + printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); + } else { + fp = fopen(filename, "a"); + if (!fp) { + perror("Cannot open results file"); + + return errno; + } + if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", + bm_pid, bw_imc, bw_resc, diff) <= 0) { + fclose(fp); + perror("Could not log results."); + + return errno; + } + fclose(fp); + } + + return 0; +} + +static int +measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) +{ + unsigned long bw_imc, bw_resc, bw_resc_end; + int ret; + + /* + * Measure memory bandwidth from resctrl and from + * another source which is perf imc value or could + * be something else if perf imc event is not available. + * Compare the two values to validate resctrl value. + * It takes 1sec to measure the data. + */ + bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report); + if (bw_imc <= 0) + return bw_imc; + + bw_resc_end = get_mem_bw_resctrl(); + if (bw_resc_end <= 0) + return bw_resc_end; + + bw_resc = (bw_resc_end - *bw_resc_start) / MB; + ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); + if (ret) + return ret; + + *bw_resc_start = bw_resc_end; + + return 0; +} + +/* + * resctrl_val: execute benchmark and measure memory bandwidth on + * the benchmark + * @benchmark_cmd: benchmark command and its arguments + * @param: parameters passed to resctrl_val() + * + * Return: 0 on success. non-zero on failure. + */ +int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) +{ + char *resctrl_val = param->resctrl_val; + unsigned long bw_resc_start = 0; + struct sigaction sigact; + int ret = 0, pipefd[2]; + char pipe_message = 0; + union sigval value; + + if (strcmp(param->filename, "") == 0) + sprintf(param->filename, "stdio"); + + if ((strcmp(resctrl_val, "mba")) == 0 || + (strcmp(resctrl_val, "mbm")) == 0) { + ret = validate_bw_report_request(param->bw_report); + if (ret) + return ret; + } + + ret = remount_resctrlfs(param->mum_resctrlfs); + if (ret) + return ret; + + /* + * If benchmark wasn't successfully started by child, then child should + * kill parent, so save parent's pid + */ + ppid = getpid(); + + if (pipe(pipefd)) { + perror("# Unable to create pipe"); + + return -1; + } + + /* + * Fork to start benchmark, save child's pid so that it can be killed + * when needed + */ + bm_pid = fork(); + if (bm_pid == -1) { + perror("# Unable to fork"); + + return -1; + } + + if (bm_pid == 0) { + /* + * Mask all signals except SIGUSR1, parent uses SIGUSR1 to + * start benchmark + */ + sigfillset(&sigact.sa_mask); + sigdelset(&sigact.sa_mask, SIGUSR1); + + sigact.sa_sigaction = run_benchmark; + sigact.sa_flags = SA_SIGINFO; + + /* Register for "SIGUSR1" signal from parent */ + if (sigaction(SIGUSR1, &sigact, NULL)) + PARENT_EXIT("Can't register child for signal"); + + /* Tell parent that child is ready */ + close(pipefd[0]); + pipe_message = 1; + if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < + sizeof(pipe_message)) { + perror("# failed signaling parent process"); + close(pipefd[1]); + return -1; + } + close(pipefd[1]); + + /* Suspend child until delivery of "SIGUSR1" from parent */ + sigsuspend(&sigact.sa_mask); + + PARENT_EXIT("Child is done"); + } + + printf("# benchmark PID: %d\n", bm_pid); + + /* + * Register CTRL-C handler for parent, as it has to kill benchmark + * before exiting + */ + sigact.sa_sigaction = ctrlc_handler; + sigemptyset(&sigact.sa_mask); + sigact.sa_flags = SA_SIGINFO; + if (sigaction(SIGINT, &sigact, NULL) || + sigaction(SIGHUP, &sigact, NULL)) { + perror("# sigaction"); + ret = errno; + goto out; + } + + value.sival_ptr = benchmark_cmd; + + /* Taskset benchmark to specified cpu */ + ret = taskset_benchmark(bm_pid, param->cpu_no); + if (ret) + goto out; + + /* Write benchmark to specified control&monitoring grp in resctrl FS */ + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, + resctrl_val); + if (ret) + goto out; + + if ((strcmp(resctrl_val, "mbm") == 0) || + (strcmp(resctrl_val, "mba") == 0)) { + ret = initialize_mem_bw_imc(); + if (ret) + goto out; + + initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, + param->cpu_no, resctrl_val); + } + + /* Parent waits for child to be ready. */ + close(pipefd[1]); + while (pipe_message != 1) { + if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) < + sizeof(pipe_message)) { + perror("# failed reading message from child process"); + close(pipefd[0]); + goto out; + } + } + close(pipefd[0]); + + /* Signal child to start benchmark */ + if (sigqueue(bm_pid, SIGUSR1, value) == -1) { + perror("# sigqueue SIGUSR1 to child"); + ret = errno; + goto out; + } + + /* Give benchmark enough time to fully run */ + sleep(1); + + /* Test runs until the callback setup() tells the test to stop. */ + while (1) { + if (strcmp(resctrl_val, "mbm") == 0) { + ret = param->setup(1, param); + if (ret) { + ret = 0; + break; + } + + ret = measure_vals(param, &bw_resc_start); + if (ret) + break; + } else if ((strcmp(resctrl_val, "mba") == 0)) { + ret = param->setup(1, param); + if (ret) { + ret = 0; + break; + } + + ret = measure_vals(param, &bw_resc_start); + if (ret) + break; + } else { + break; + } + } + +out: + kill(bm_pid, SIGKILL); + umount_resctrlfs(); + + return ret; +} -- cgit v1.2.3 From a2561b12fe392e0024e3187d325dbcbde7b99c04 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 16 Jan 2020 13:32:38 -0800 Subject: selftests/resctrl: Add built in benchmark Built-in benchmark fill_buf generates stressful memory bandwidth and cache traffic. Later it will be used as a default benchmark by various resctrl tests such as MBA (Memory Bandwidth Allocation) and MBM (Memory Bandwidth Monitoring) tests. Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/fill_buf.c | 207 +++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 tools/testing/selftests/resctrl/fill_buf.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c new file mode 100644 index 000000000000..b76bf6a4a0a5 --- /dev/null +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fill_buf benchmark + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "resctrl.h" + +#define CL_SIZE (64) +#define PAGE_SIZE (4 * 1024) +#define MB (1024 * 1024) + +static unsigned char *startptr; + +static void sb(void) +{ +#if defined(__i386) || defined(__x86_64) + asm volatile("sfence\n\t" + : : : "memory"); +#endif +} + +static void ctrl_handler(int signo) +{ + free(startptr); + printf("\nEnding\n"); + sb(); + exit(EXIT_SUCCESS); +} + +static void cl_flush(void *p) +{ +#if defined(__i386) || defined(__x86_64) + asm volatile("clflush (%0)\n\t" + : : "r"(p) : "memory"); +#endif +} + +static void mem_flush(void *p, size_t s) +{ + char *cp = (char *)p; + size_t i = 0; + + s = s / CL_SIZE; /* mem size in cache llines */ + + for (i = 0; i < s; i++) + cl_flush(&cp[i * CL_SIZE]); + + sb(); +} + +static void *malloc_and_init_memory(size_t s) +{ + uint64_t *p64; + size_t s64; + + void *p = memalign(PAGE_SIZE, s); + + p64 = (uint64_t *)p; + s64 = s / sizeof(uint64_t); + + while (s64 > 0) { + *p64 = (uint64_t)rand(); + p64 += (CL_SIZE / sizeof(uint64_t)); + s64 -= (CL_SIZE / sizeof(uint64_t)); + } + + return p; +} + +static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr) +{ + unsigned char sum, *p; + + sum = 0; + p = start_ptr; + while (p < end_ptr) { + sum += *p; + p += (CL_SIZE / 2); + } + + return sum; +} + +static +void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr) +{ + unsigned char *p; + + p = start_ptr; + while (p < end_ptr) { + *p = '1'; + p += (CL_SIZE / 2); + } +} + +static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, + char *resctrl_val) +{ + int ret = 0; + FILE *fp; + + while (1) + ret = fill_one_span_read(start_ptr, end_ptr); + + /* Consume read result so that reading memory is not optimized out. */ + fp = fopen("/dev/null", "w"); + if (!fp) + perror("Unable to write to /dev/null"); + fprintf(fp, "Sum: %d ", ret); + fclose(fp); + + return 0; +} + +static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr, + char *resctrl_val) +{ + while (1) + fill_one_span_write(start_ptr, end_ptr); + + return 0; +} + +static int +fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush, + int op, char *resctrl_val) +{ + unsigned char *start_ptr, *end_ptr; + unsigned long long i; + int ret; + + if (malloc_and_init) + start_ptr = malloc_and_init_memory(buf_size); + else + start_ptr = malloc(buf_size); + + if (!start_ptr) + return -1; + + startptr = start_ptr; + end_ptr = start_ptr + buf_size; + + /* + * It's better to touch the memory once to avoid any compiler + * optimizations + */ + if (!malloc_and_init) { + for (i = 0; i < buf_size; i++) + *start_ptr++ = (unsigned char)rand(); + } + + start_ptr = startptr; + + /* Flush the memory before using to avoid "cache hot pages" effect */ + if (memflush) + mem_flush(start_ptr, buf_size); + + if (op == 0) + ret = fill_cache_read(start_ptr, end_ptr, resctrl_val); + else + ret = fill_cache_write(start_ptr, end_ptr, resctrl_val); + + if (ret) { + printf("\n Errror in fill cache read/write...\n"); + return -1; + } + + free(startptr); + + return 0; +} + +int run_fill_buf(unsigned long span, int malloc_and_init_memory, + int memflush, int op, char *resctrl_val) +{ + unsigned long long cache_size = span; + int ret; + + /* set up ctrl-c handler */ + if (signal(SIGINT, ctrl_handler) == SIG_ERR) + printf("Failed to catch SIGINT!\n"); + if (signal(SIGHUP, ctrl_handler) == SIG_ERR) + printf("Failed to catch SIGHUP!\n"); + + ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op, + resctrl_val); + if (ret) { + printf("\n Errror in fill cache\n"); + return -1; + } + + return 0; +} -- cgit v1.2.3 From ecdbb911f22d6cc5d422571dedf048b889d71417 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Jan 2020 13:32:39 -0800 Subject: selftests/resctrl: Add MBM test MBM (Memory Bandwidth Monitoring) test is the first implemented selftest. It starts a stressful memory bandwidth benchmark and assigns the bandwidth pid in a resctrl monitoring group. Read and compare perf IMC counter and MBM total bytes for the benchmark. The numbers should be close enough to pass the test. Default benchmark is built-in fill_buf. But users can specify their own benchmark by option "-b". We can add memory bandwidth monitoring for multiple processes in the future. Co-developed-by: Sai Praneeth Prakhya Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/Makefile | 7 +- tools/testing/selftests/resctrl/mbm_test.c | 145 ++++++++++++++++++++++++ tools/testing/selftests/resctrl/resctrl.h | 6 + tools/testing/selftests/resctrl/resctrl_tests.c | 132 +++++++++++++++++++++ tools/testing/selftests/resctrl/resctrl_val.c | 2 + tools/testing/selftests/resctrl/resctrlfs.c | 77 +++++++++++++ 6 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/resctrl/mbm_test.c create mode 100644 tools/testing/selftests/resctrl/resctrl_tests.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 76bbd6d3e4a8..d585cc1948cc 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -3,10 +3,15 @@ CFLAGS = -g -Wall SRCS=$(wildcard *.c) OBJS=$(SRCS:.c=.o) +all: resctrl_tests + $(OBJS): $(SRCS) $(CC) $(CFLAGS) -c $(SRCS) +resctrl_tests: $(OBJS) + $(CC) $(CFLAGS) -o $@ $^ + .PHONY: clean clean: - $(RM) $(OBJS) + $(RM) $(OBJS) resctrl_tests diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c new file mode 100644 index 000000000000..4700f7453f81 --- /dev/null +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory Bandwidth Monitoring (MBM) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" + +#define RESULT_FILE_NAME "result_mbm" +#define MAX_DIFF 300 +#define NUM_OF_RUNS 5 + +static void +show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) +{ + unsigned long avg_bw_imc = 0, avg_bw_resc = 0; + unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_diff = 0; + int runs; + + /* + * Discard the first value which is inaccurate due to monitoring setup + * transition phase. + */ + for (runs = 1; runs < NUM_OF_RUNS ; runs++) { + sum_bw_imc += bw_imc[runs]; + sum_bw_resc += bw_resc[runs]; + } + + avg_bw_imc = sum_bw_imc / 4; + avg_bw_resc = sum_bw_resc / 4; + avg_diff = avg_bw_resc - avg_bw_imc; + + printf("%sok MBM: diff within %d%%\n", + labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF); + tests_run++; + printf("# avg_diff: %lu\n", labs(avg_diff)); + printf("# Span (MB): %d\n", span); + printf("# avg_bw_imc: %lu\n", avg_bw_imc); + printf("# avg_bw_resc: %lu\n", avg_bw_resc); +} + +static int check_results(int span) +{ + unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS]; + char temp[1024], *token_array[8]; + char output[] = RESULT_FILE_NAME; + int runs; + FILE *fp; + + printf("# Checking for pass/fail\n"); + + fp = fopen(output, "r"); + if (!fp) { + perror(output); + + return errno; + } + + runs = 0; + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int i = 0; + + while (token) { + token_array[i++] = token; + token = strtok(NULL, ":\t"); + } + + bw_resc[runs] = strtoul(token_array[5], NULL, 0); + bw_imc[runs] = strtoul(token_array[3], NULL, 0); + runs++; + } + + show_bw_info(bw_imc, bw_resc, span); + + fclose(fp); + + return 0; +} + +static int mbm_setup(int num, ...) +{ + struct resctrl_val_param *p; + static int num_of_runs; + va_list param; + int ret = 0; + + /* Run NUM_OF_RUNS times */ + if (num_of_runs++ >= NUM_OF_RUNS) + return -1; + + va_start(param, num); + p = va_arg(param, struct resctrl_val_param *); + va_end(param); + + /* Set up shemata with 100% allocation on the first run. */ + if (num_of_runs == 0) + ret = write_schemata(p->ctrlgrp, "100", p->cpu_no, + p->resctrl_val); + + return ret; +} + +void mbm_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) +{ + struct resctrl_val_param param = { + .resctrl_val = "mbm", + .ctrlgrp = "c1", + .mongrp = "m1", + .span = span, + .cpu_no = cpu_no, + .mum_resctrlfs = 1, + .filename = RESULT_FILE_NAME, + .bw_report = bw_report, + .setup = mbm_setup + }; + int ret; + + remove(RESULT_FILE_NAME); + + if (!validate_resctrl_feature_request("mbm")) + return -1; + + ret = resctrl_val(benchmark_cmd, ¶m); + if (ret) + return ret; + + ret = check_results(span); + if (ret) + return ret; + + mbm_test_cleanup(); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index fb42087c904d..c92bc50c6751 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -57,7 +57,10 @@ struct resctrl_val_param { }; pid_t bm_pid, ppid; +int tests_run; +bool check_resctrlfs_support(void); +int filter_dmesg(void); int remount_resctrlfs(bool mum_resctrlfs); int get_resource_id(int cpu_no, int *resource_id); int umount_resctrlfs(void); @@ -75,5 +78,8 @@ int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush, int op, char *resctrl_va); int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); +int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd); +void tests_cleanup(void); +void mbm_test_cleanup(void); #endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c new file mode 100644 index 000000000000..496c8030fe43 --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Resctrl tests + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" + +#define BENCHMARK_ARGS 64 +#define BENCHMARK_ARG_SIZE 64 + +static void cmd_help(void) +{ + printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list]\n"); + printf("\t-b benchmark_cmd [options]: run specified benchmark\n"); + printf("\t default benchmark is builtin fill_buf\n"); + printf("\t-t test list: run tests specified in the test list, "); + printf("e.g. -t mbm,mba\n"); + printf("\t-h: help\n"); +} + +void tests_cleanup(void) +{ + mbm_test_cleanup(); +} + +int main(int argc, char **argv) +{ + int res, c, cpu_no = 1, span = 250, argc_new = argc, i, ben_ind; + char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; + char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; + bool has_ben = false, mbm_test = true; + int ben_count; + + for (i = 0; i < argc; i++) { + if (strcmp(argv[i], "-b") == 0) { + ben_ind = i + 1; + ben_count = argc - ben_ind; + argc_new = ben_ind - 1; + has_ben = true; + break; + } + } + + while ((c = getopt(argc_new, argv, "ht:b:")) != -1) { + char *token; + + switch (c) { + case 't': + token = strtok(optarg, ","); + + mbm_test = false; + while (token) { + if (!strcmp(token, "mbm")) { + mbm_test = true; + } else { + printf("invalid argument\n"); + + return -1; + } + token = strtok(NULL, ":\t"); + } + break; + case 'p': + cpu_no = atoi(optarg); + break; + case 'h': + cmd_help(); + + return 0; + default: + printf("invalid argument\n"); + + return -1; + } + } + + printf("TAP version 13\n"); + + /* + * Typically we need root privileges, because: + * 1. We write to resctrl FS + * 2. We execute perf commands + */ + if (geteuid() != 0) + printf("# WARNING: not running as root, tests may fail.\n"); + + if (has_ben) { + /* Extract benchmark command from command line. */ + for (i = ben_ind; i < argc; i++) { + benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i]; + sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]); + } + benchmark_cmd[ben_count] = NULL; + } else { + /* If no benchmark is given by "-b" argument, use fill_buf. */ + for (i = 0; i < 6; i++) + benchmark_cmd[i] = benchmark_cmd_area[i]; + + strcpy(benchmark_cmd[0], "fill_buf"); + sprintf(benchmark_cmd[1], "%d", span); + strcpy(benchmark_cmd[2], "1"); + strcpy(benchmark_cmd[3], "1"); + strcpy(benchmark_cmd[4], "0"); + strcpy(benchmark_cmd[5], ""); + benchmark_cmd[6] = NULL; + } + + sprintf(bw_report, "reads"); + sprintf(bm_type, "fill_buf"); + + check_resctrlfs_support(); + filter_dmesg(); + + if (mbm_test) { + printf("# Starting MBM BW change ...\n"); + if (!has_ben) + sprintf(benchmark_cmd[5], "%s", "mba"); + res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); + printf("%sok MBM: bw change\n", res ? "not " : ""); + mbm_test_cleanup(); + tests_run++; + } + + printf("1..%d\n", tests_run); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 0d1bd03ec4be..4f7bd5a4d86e 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -435,6 +435,8 @@ pid_t bm_pid, ppid; static void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { kill(bm_pid, SIGKILL); + umount_resctrlfs(); + tests_cleanup(); printf("Ending\n\n"); exit(EXIT_SUCCESS); diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 1b125f9d8e5d..07c8394b427f 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -388,6 +388,41 @@ out: return ret; } +bool check_resctrlfs_support(void) +{ + FILE *inf = fopen("/proc/filesystems", "r"); + DIR *dp; + char *res; + bool ret = false; + + if (!inf) + return false; + + res = fgrep(inf, "nodev\tresctrl\n"); + + if (res) { + ret = true; + free(res); + } + + fclose(inf); + + printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not "); + tests_run++; + + dp = opendir(RESCTRL_PATH); + printf("%sok resctrl mountpoint \"%s\" exists\n", + dp ? "" : "not ", RESCTRL_PATH); + if (dp) + closedir(dp); + tests_run++; + + printf("# resctrl filesystem %s mounted\n", + find_resctrl_mount(NULL) ? "not" : "is"); + + return ret; +} + char *fgrep(FILE *inf, const char *str) { char line[256]; @@ -433,6 +468,48 @@ bool validate_resctrl_feature_request(char *resctrl_val) return found; } +int filter_dmesg(void) +{ + char line[1024]; + FILE *fp; + int pipefds[2]; + pid_t pid; + int ret; + + ret = pipe(pipefds); + if (ret) { + perror("pipe"); + return ret; + } + pid = fork(); + if (pid == 0) { + close(pipefds[0]); + dup2(pipefds[1], STDOUT_FILENO); + execlp("dmesg", "dmesg", NULL); + perror("executing dmesg"); + exit(1); + } + close(pipefds[1]); + fp = fdopen(pipefds[0], "r"); + if (!fp) { + perror("fdopen(pipe)"); + kill(pid, SIGTERM); + + return -1; + } + + while (fgets(line, 1024, fp)) { + if (strstr(line, "intel_rdt:")) + printf("# dmesg: %s", line); + if (strstr(line, "resctrl:")) + printf("# dmesg: %s", line); + } + fclose(fp); + waitpid(pid, NULL, 0); + + return 0; +} + int validate_bw_report_request(char *bw_report) { if (strcmp(bw_report, "reads") == 0) -- cgit v1.2.3 From 01fee6b4d1f93247e806dddb0065b88317949085 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Jan 2020 13:32:40 -0800 Subject: selftests/resctrl: Add MBA test MBA (Memory Bandwidth Allocation) test starts a stressful memory bandwidth benchmark and allocates memory bandwidth from 100% down to 10% for the benchmark process. For each allocation, compare perf IMC counter and mbm total bytes from resctrl. The difference between the two values should be within a threshold to pass the test. Default benchmark is built-in fill_buf. But users can specify their own benchmark by option "-b". We can add memory bandwidth allocation for multiple processes in the future. Co-developed-by: Sai Praneeth Prakhya Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/mba_test.c | 171 ++++++++++++++++++++++++ tools/testing/selftests/resctrl/resctrl.h | 2 + tools/testing/selftests/resctrl/resctrl_tests.c | 16 ++- 3 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/resctrl/mba_test.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c new file mode 100644 index 000000000000..7bf8eaa6204b --- /dev/null +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory Bandwidth Allocation (MBA) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" + +#define RESULT_FILE_NAME "result_mba" +#define NUM_OF_RUNS 5 +#define MAX_DIFF 300 +#define ALLOCATION_MAX 100 +#define ALLOCATION_MIN 10 +#define ALLOCATION_STEP 10 + +/* + * Change schemata percentage from 100 to 10%. Write schemata to specified + * con_mon grp, mon_grp in resctrl FS. + * For each allocation, run 5 times in order to get average values. + */ +static int mba_setup(int num, ...) +{ + static int runs_per_allocation, allocation = 100; + struct resctrl_val_param *p; + char allocation_str[64]; + va_list param; + + va_start(param, num); + p = va_arg(param, struct resctrl_val_param *); + va_end(param); + + if (runs_per_allocation >= NUM_OF_RUNS) + runs_per_allocation = 0; + + /* Only set up schemata once every NUM_OF_RUNS of allocations */ + if (runs_per_allocation++ != 0) + return 0; + + if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX) + return -1; + + sprintf(allocation_str, "%d", allocation); + + write_schemata(p->ctrlgrp, allocation_str, p->cpu_no, p->resctrl_val); + allocation -= ALLOCATION_STEP; + + return 0; +} + +static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) +{ + int allocation, runs; + bool failed = false; + + printf("# Results are displayed in (MB)\n"); + /* Memory bandwidth from 100% down to 10% */ + for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP; + allocation++) { + unsigned long avg_bw_imc, avg_bw_resc; + unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + unsigned long avg_diff; + + /* + * The first run is discarded due to inaccurate value from + * phase transition. + */ + for (runs = NUM_OF_RUNS * allocation + 1; + runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) { + sum_bw_imc += bw_imc[runs]; + sum_bw_resc += bw_resc[runs]; + } + + avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1); + avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1); + avg_diff = labs((long)(avg_bw_resc - avg_bw_imc)); + + printf("%sok MBA schemata percentage %u smaller than %d %%\n", + avg_diff > MAX_DIFF ? "not " : "", + ALLOCATION_MAX - ALLOCATION_STEP * allocation, + MAX_DIFF); + tests_run++; + printf("# avg_diff: %lu\n", avg_diff); + printf("# avg_bw_imc: %lu\n", avg_bw_imc); + printf("# avg_bw_resc: %lu\n", avg_bw_resc); + if (avg_diff > MAX_DIFF) + failed = true; + } + + printf("%sok schemata change using MBA%s\n", failed ? "not " : "", + failed ? " # at least one test failed" : ""); + tests_run++; +} + +static int check_results(void) +{ + char *token_array[8], output[] = RESULT_FILE_NAME, temp[512]; + unsigned long bw_imc[1024], bw_resc[1024]; + int runs; + FILE *fp; + + fp = fopen(output, "r"); + if (!fp) { + perror(output); + + return errno; + } + + runs = 0; + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int fields = 0; + + while (token) { + token_array[fields++] = token; + token = strtok(NULL, ":\t"); + } + + /* Field 3 is perf imc value */ + bw_imc[runs] = strtoul(token_array[3], NULL, 0); + /* Field 5 is resctrl value */ + bw_resc[runs] = strtoul(token_array[5], NULL, 0); + runs++; + } + + fclose(fp); + + show_mba_info(bw_imc, bw_resc); + + return 0; +} + +void mba_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) +{ + struct resctrl_val_param param = { + .resctrl_val = "mba", + .ctrlgrp = "c1", + .mongrp = "m1", + .cpu_no = cpu_no, + .mum_resctrlfs = 1, + .filename = RESULT_FILE_NAME, + .bw_report = bw_report, + .setup = mba_setup + }; + int ret; + + remove(RESULT_FILE_NAME); + + if (!validate_resctrl_feature_request("mba")) + return -1; + + ret = resctrl_val(benchmark_cmd, ¶m); + if (ret) + return ret; + + ret = check_results(); + if (ret) + return ret; + + mba_test_cleanup(); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index c92bc50c6751..a6196ab6b59f 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -81,5 +81,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd); void tests_cleanup(void); void mbm_test_cleanup(void); +int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); +void mba_test_cleanup(void); #endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 496c8030fe43..0ed953a97db2 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -26,6 +26,7 @@ static void cmd_help(void) void tests_cleanup(void) { mbm_test_cleanup(); + mba_test_cleanup(); } int main(int argc, char **argv) @@ -33,7 +34,7 @@ int main(int argc, char **argv) int res, c, cpu_no = 1, span = 250, argc_new = argc, i, ben_ind; char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; - bool has_ben = false, mbm_test = true; + bool has_ben = false, mbm_test = true, mba_test = true; int ben_count; for (i = 0; i < argc; i++) { @@ -54,9 +55,12 @@ int main(int argc, char **argv) token = strtok(optarg, ","); mbm_test = false; + mba_test = false; while (token) { if (!strcmp(token, "mbm")) { mbm_test = true; + } else if (!strcmp(token, "mba")) { + mba_test = true; } else { printf("invalid argument\n"); @@ -126,6 +130,16 @@ int main(int argc, char **argv) tests_run++; } + if (mba_test) { + printf("# Starting MBA Schemata change ...\n"); + if (!has_ben) + sprintf(benchmark_cmd[1], "%d", span); + res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); + printf("%sok MBA: schemata change\n", res ? "not " : ""); + mba_test_cleanup(); + tests_run++; + } + printf("1..%d\n", tests_run); return 0; -- cgit v1.2.3 From 78941183d1b151317beb37b25690b7d87fe2596d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Jan 2020 13:32:41 -0800 Subject: selftests/resctrl: Add Cache QoS Monitoring (CQM) selftest Cache QoS Monitoring (CQM) selftest starts stressful cache benchmark with specified size of memory to access the cache. Last Level cache occupancy reported by CQM should be close to the size of the memory. Co-developed-by: Sai Praneeth Prakhya Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/cache.c | 99 ++++++++++++ tools/testing/selftests/resctrl/cqm_test.c | 176 +++++++++++++++++++++ tools/testing/selftests/resctrl/resctrl.h | 16 ++ tools/testing/selftests/resctrl/resctrl_tests.c | 31 +++- tools/testing/selftests/resctrl/resctrl_val.c | 63 +++++++- tools/testing/selftests/resctrl/resctrlfs.c | 193 +++++++++++++++++++++++- 6 files changed, 558 insertions(+), 20 deletions(-) create mode 100644 tools/testing/selftests/resctrl/cache.c create mode 100644 tools/testing/selftests/resctrl/cqm_test.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c new file mode 100644 index 000000000000..d7719219ab51 --- /dev/null +++ b/tools/testing/selftests/resctrl/cache.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "resctrl.h" + +struct read_format { + __u64 nr; /* The number of events */ + struct { + __u64 value; /* The value of the event */ + } values[2]; +}; + +char llc_occup_path[1024]; + +/* + * Get LLC Occupancy as reported by RESCTRL FS + * For CQM, + * 1. If con_mon grp and mon grp given, then read from mon grp in + * con_mon grp + * 2. If only con_mon grp given, then read from con_mon grp + * 3. If both not given, then read from root con_mon grp + * For CAT, + * 1. If con_mon grp given, then read from it + * 2. If con_mon grp not given, then read from root con_mon grp + * + * Return: =0 on success. <0 on failure. + */ +static int get_llc_occu_resctrl(unsigned long *llc_occupancy) +{ + FILE *fp; + + fp = fopen(llc_occup_path, "r"); + if (!fp) { + perror("Failed to open results file"); + + return errno; + } + if (fscanf(fp, "%lu", llc_occupancy) <= 0) { + perror("Could not get llc occupancy"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * print_results_cache: the cache results are stored in a file + * @filename: file that stores the results + * @bm_pid: child pid that runs benchmark + * @llc_value: perf miss value / + * llc occupancy value reported by resctrl FS + * + * Return: 0 on success. non-zero on failure. + */ +static int print_results_cache(char *filename, int bm_pid, + unsigned long llc_value) +{ + FILE *fp; + + if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { + printf("Pid: %d \t LLC_value: %lu\n", bm_pid, + llc_value); + } else { + fp = fopen(filename, "a"); + if (!fp) { + perror("Cannot open results file"); + + return errno; + } + fprintf(fp, "Pid: %d \t llc_value: %lu\n", bm_pid, llc_value); + fclose(fp); + } + + return 0; +} + +int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) +{ + unsigned long llc_occu_resc = 0, llc_value = 0; + int ret; + + /* + * Measure llc occupancy from resctrl. + */ + if (!strcmp(param->resctrl_val, "cqm")) { + ret = get_llc_occu_resctrl(&llc_occu_resc); + if (ret < 0) + return ret; + llc_value = llc_occu_resc; + } + ret = print_results_cache(param->filename, bm_pid, llc_value); + if (ret) + return ret; + + return 0; +} diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cqm_test.c new file mode 100644 index 000000000000..c8756152bd61 --- /dev/null +++ b/tools/testing/selftests/resctrl/cqm_test.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cache Monitoring Technology (CQM) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" +#include + +#define RESULT_FILE_NAME "result_cqm" +#define NUM_OF_RUNS 5 +#define MAX_DIFF 2000000 +#define MAX_DIFF_PERCENT 15 + +int count_of_bits; +char cbm_mask[256]; +unsigned long long_mask; +unsigned long cache_size; + +static int cqm_setup(int num, ...) +{ + struct resctrl_val_param *p; + va_list param; + + va_start(param, num); + p = va_arg(param, struct resctrl_val_param *); + va_end(param); + + /* Run NUM_OF_RUNS times */ + if (p->num_of_runs >= NUM_OF_RUNS) + return -1; + + p->num_of_runs++; + + return 0; +} + +static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits, + unsigned long span) +{ + unsigned long avg_llc_occu_resc = 0; + float diff_percent; + long avg_diff = 0; + bool res; + + avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1); + avg_diff = (long)abs(span - avg_llc_occu_resc); + + diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100; + + if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) || + (abs(avg_diff) <= MAX_DIFF)) + res = true; + else + res = false; + + printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not", + MAX_DIFF, (int)MAX_DIFF_PERCENT); + + printf("# diff: %ld\n", avg_diff); + printf("# percent diff=%d\n", abs((int)diff_percent)); + printf("# Results are displayed in (Bytes)\n"); + printf("# Number of bits: %d\n", no_of_bits); + printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc); + printf("# llc_occu_exp (span): %lu\n", span); + + tests_run++; +} + +static int check_results(struct resctrl_val_param *param, int no_of_bits) +{ + char *token_array[8], temp[512]; + unsigned long sum_llc_occu_resc = 0; + int runs = 0; + FILE *fp; + + printf("# checking for pass/fail\n"); + fp = fopen(param->filename, "r"); + if (!fp) { + perror("# Error in opening file\n"); + + return errno; + } + + while (fgets(temp, 1024, fp)) { + char *token = strtok(temp, ":\t"); + int fields = 0; + + while (token) { + token_array[fields++] = token; + token = strtok(NULL, ":\t"); + } + + /* Field 3 is llc occ resc value */ + if (runs > 0) + sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); + runs++; + } + fclose(fp); + show_cache_info(sum_llc_occu_resc, no_of_bits, param->span); + + return 0; +} + +void cqm_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) +{ + int ret, mum_resctrlfs; + + cache_size = 0; + mum_resctrlfs = 1; + + ret = remount_resctrlfs(mum_resctrlfs); + if (ret) + return ret; + + if (!validate_resctrl_feature_request("cqm")) + return -1; + + ret = get_cbm_mask("L3"); + if (ret) + return ret; + + long_mask = strtoul(cbm_mask, NULL, 16); + + ret = get_cache_size(cpu_no, "L3", &cache_size); + if (ret) + return ret; + printf("cache size :%lu\n", cache_size); + + count_of_bits = count_bits(long_mask); + + if (n < 1 || n > count_of_bits) { + printf("Invalid input value for numbr_of_bits n!\n"); + printf("Please Enter value in range 1 to %d\n", count_of_bits); + return -1; + } + + struct resctrl_val_param param = { + .resctrl_val = "cqm", + .ctrlgrp = "c1", + .mongrp = "m1", + .cpu_no = cpu_no, + .mum_resctrlfs = 0, + .filename = RESULT_FILE_NAME, + .mask = ~(long_mask << n) & long_mask, + .span = cache_size * n / count_of_bits, + .num_of_runs = 0, + .setup = cqm_setup, + }; + + if (strcmp(benchmark_cmd[0], "fill_buf") == 0) + sprintf(benchmark_cmd[1], "%lu", param.span); + + remove(RESULT_FILE_NAME); + + ret = resctrl_val(benchmark_cmd, ¶m); + if (ret) + return ret; + + ret = check_results(¶m, n); + if (ret) + return ret; + + cqm_test_cleanup(); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index a6196ab6b59f..d326c7e95862 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -18,12 +18,16 @@ #include #include #include +#include +#include +#include #include #include #define MB (1024 * 1024) #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" +#define CBM_MASK_PATH "/sys/fs/resctrl/info" #define PARENT_EXIT(err_msg) \ do { \ @@ -53,12 +57,16 @@ struct resctrl_val_param { int mum_resctrlfs; char filename[64]; char *bw_report; + unsigned long mask; + int num_of_runs; int (*setup)(int num, ...); }; pid_t bm_pid, ppid; int tests_run; +char llc_occup_path[1024]; + bool check_resctrlfs_support(void); int filter_dmesg(void); int remount_resctrlfs(bool mum_resctrlfs); @@ -83,5 +91,13 @@ void tests_cleanup(void); void mbm_test_cleanup(void); int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); void mba_test_cleanup(void); +int get_cbm_mask(char *cache_type); +int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); +void ctrlc_handler(int signum, siginfo_t *info, void *ptr); +int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd); +unsigned int count_bits(unsigned long n); +void cqm_test_cleanup(void); +int get_core_sibling(int cpu_no); +int measure_cache_vals(struct resctrl_val_param *param, int bm_pid); #endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 0ed953a97db2..70179a61df3b 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -15,11 +15,13 @@ static void cmd_help(void) { - printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list]\n"); - printf("\t-b benchmark_cmd [options]: run specified benchmark\n"); + printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n"); + printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM"); printf("\t default benchmark is builtin fill_buf\n"); printf("\t-t test list: run tests specified in the test list, "); - printf("e.g. -t mbm,mba\n"); + printf("e.g. -t mbm, mba, cqm\n"); + printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n"); + printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n"); printf("\t-h: help\n"); } @@ -27,15 +29,16 @@ void tests_cleanup(void) { mbm_test_cleanup(); mba_test_cleanup(); + cqm_test_cleanup(); } int main(int argc, char **argv) { - int res, c, cpu_no = 1, span = 250, argc_new = argc, i, ben_ind; + bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true; + int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5; char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; - bool has_ben = false, mbm_test = true, mba_test = true; - int ben_count; + int ben_ind, ben_count; for (i = 0; i < argc; i++) { if (strcmp(argv[i], "-b") == 0) { @@ -56,11 +59,14 @@ int main(int argc, char **argv) mbm_test = false; mba_test = false; + cqm_test = false; while (token) { if (!strcmp(token, "mbm")) { mbm_test = true; } else if (!strcmp(token, "mba")) { mba_test = true; + } else if (!strcmp(token, "cqm")) { + cqm_test = true; } else { printf("invalid argument\n"); @@ -72,6 +78,9 @@ int main(int argc, char **argv) case 'p': cpu_no = atoi(optarg); break; + case 'n': + no_of_bits = atoi(optarg); + break; case 'h': cmd_help(); @@ -140,6 +149,16 @@ int main(int argc, char **argv) tests_run++; } + if (cqm_test) { + printf("# Starting CQM test ...\n"); + if (!has_ben) + sprintf(benchmark_cmd[5], "%s", "cqm"); + res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd); + printf("%sok CQM: test\n", res ? "not " : ""); + cqm_test_cleanup(); + tests_run++; + } + printf("1..%d\n", tests_run); return 0; diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 4f7bd5a4d86e..520fea3606d1 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -31,6 +31,18 @@ #define MBM_LOCAL_BYTES_PATH \ "%s/mon_data/mon_L3_%02d/mbm_local_bytes" +#define CON_MON_LCC_OCCUP_PATH \ + "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" + +#define CON_LCC_OCCUP_PATH \ + "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" + +#define MON_LCC_OCCUP_PATH \ + "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" + +#define LCC_OCCUP_PATH \ + "%s/mon_data/mon_L3_%02d/llc_occupancy" + struct membw_read_format { __u64 value; /* The value of the event */ __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ @@ -432,7 +444,7 @@ static unsigned long get_mem_bw_resctrl(void) pid_t bm_pid, ppid; -static void ctrlc_handler(int signum, siginfo_t *info, void *ptr) +void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { kill(bm_pid, SIGKILL); umount_resctrlfs(); @@ -480,6 +492,42 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return 0; } +static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num) +{ + if (strlen(ctrlgrp) && strlen(mongrp)) + sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, + ctrlgrp, mongrp, sock_num); + else if (!strlen(ctrlgrp) && strlen(mongrp)) + sprintf(llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH, + mongrp, sock_num); + else if (strlen(ctrlgrp) && !strlen(mongrp)) + sprintf(llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH, + ctrlgrp, sock_num); + else if (!strlen(ctrlgrp) && !strlen(mongrp)) + sprintf(llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num); +} + +/* + * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path" + * @ctrlgrp: Name of the control monitor group (con_mon grp) + * @mongrp: Name of the monitor group (mon grp) + * @cpu_no: CPU number that the benchmark PID is binded to + * @resctrl_val: Resctrl feature (Eg: cat, cqm.. etc) + */ +static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, + int cpu_no, char *resctrl_val) +{ + int resource_id; + + if (get_resource_id(cpu_no, &resource_id) < 0) { + perror("# Unable to resource_id"); + return; + } + + if (strcmp(resctrl_val, "cqm") == 0) + set_cqm_path(ctrlgrp, mongrp, resource_id); +} + static int measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) { @@ -634,7 +682,9 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); - } + } else if (strcmp(resctrl_val, "cqm") == 0) + initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp, + param->cpu_no, resctrl_val); /* Parent waits for child to be ready. */ close(pipefd[1]); @@ -660,7 +710,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) /* Test runs until the callback setup() tells the test to stop. */ while (1) { - if (strcmp(resctrl_val, "mbm") == 0) { + if ((strcmp(resctrl_val, "mbm") == 0) || + (strcmp(resctrl_val, "mba") == 0)) { ret = param->setup(1, param); if (ret) { ret = 0; @@ -670,14 +721,14 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) ret = measure_vals(param, &bw_resc_start); if (ret) break; - } else if ((strcmp(resctrl_val, "mba") == 0)) { + } else if (strcmp(resctrl_val, "cqm") == 0) { ret = param->setup(1, param); if (ret) { ret = 0; break; } - - ret = measure_vals(param, &bw_resc_start); + sleep(1); + ret = measure_cache_vals(param, bm_pid); if (ret) break; } else { diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 07c8394b427f..9c050a6c1723 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -49,6 +49,8 @@ static int find_resctrl_mount(char *buffer) return -ENOENT; } +char cbm_mask[256]; + /* * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl * @mum_resctrlfs: Should the resctrl FS be remounted? @@ -130,6 +132,145 @@ int get_resource_id(int cpu_no, int *resource_id) return 0; } +/* + * get_cache_size - Get cache size for a specified CPU + * @cpu_no: CPU number + * @cache_type: Cache level L2/L3 + * @cache_size: pointer to cache_size + * + * Return: = 0 on success, < 0 on failure. + */ +int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size) +{ + char cache_path[1024], cache_str[64]; + int length, i, cache_num; + FILE *fp; + + if (!strcmp(cache_type, "L3")) { + cache_num = 3; + } else if (!strcmp(cache_type, "L2")) { + cache_num = 2; + } else { + perror("Invalid cache level"); + return -1; + } + + sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size", + cpu_no, cache_num); + fp = fopen(cache_path, "r"); + if (!fp) { + perror("Failed to open cache size"); + + return -1; + } + if (fscanf(fp, "%s", cache_str) <= 0) { + perror("Could not get cache_size"); + fclose(fp); + + return -1; + } + fclose(fp); + + length = (int)strlen(cache_str); + + *cache_size = 0; + + for (i = 0; i < length; i++) { + if ((cache_str[i] >= '0') && (cache_str[i] <= '9')) + + *cache_size = *cache_size * 10 + (cache_str[i] - '0'); + + else if (cache_str[i] == 'K') + + *cache_size = *cache_size * 1024; + + else if (cache_str[i] == 'M') + + *cache_size = *cache_size * 1024 * 1024; + + else + break; + } + + return 0; +} + +#define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu" + +/* + * get_cbm_mask - Get cbm mask for given cache + * @cache_type: Cache level L2/L3 + * + * Mask is stored in cbm_mask which is global variable. + * + * Return: = 0 on success, < 0 on failure. + */ +int get_cbm_mask(char *cache_type) +{ + char cbm_mask_path[1024]; + FILE *fp; + + sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type); + + fp = fopen(cbm_mask_path, "r"); + if (!fp) { + perror("Failed to open cache level"); + + return -1; + } + if (fscanf(fp, "%s", cbm_mask) <= 0) { + perror("Could not get max cbm_mask"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * get_core_sibling - Get sibling core id from the same socket for given CPU + * @cpu_no: CPU number + * + * Return: > 0 on success, < 0 on failure. + */ +int get_core_sibling(int cpu_no) +{ + char core_siblings_path[1024], cpu_list_str[64]; + int sibling_cpu_no = -1; + FILE *fp; + + sprintf(core_siblings_path, "%s%d/topology/core_siblings_list", + CORE_SIBLINGS_PATH, cpu_no); + + fp = fopen(core_siblings_path, "r"); + if (!fp) { + perror("Failed to open core siblings path"); + + return -1; + } + if (fscanf(fp, "%s", cpu_list_str) <= 0) { + perror("Could not get core_siblings list"); + fclose(fp); + + return -1; + } + fclose(fp); + + char *token = strtok(cpu_list_str, "-,"); + + while (token) { + sibling_cpu_no = atoi(token); + /* Skipping core 0 as we don't want to run test on core 0 */ + if (sibling_cpu_no != 0) + break; + token = strtok(NULL, "-,"); + } + + return sibling_cpu_no; +} + /* * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu * @bm_pid: PID that should be binded @@ -164,9 +305,10 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no) */ void run_benchmark(int signum, siginfo_t *info, void *ucontext) { - unsigned long span; - int operation, ret; + int operation, ret, malloc_and_init_memory, memflush; + unsigned long span, buffer_span; char **benchmark_cmd; + char resctrl_val[64]; FILE *fp; benchmark_cmd = info->si_ptr; @@ -182,8 +324,18 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext) if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { /* Execute default fill_buf benchmark */ span = strtoul(benchmark_cmd[1], NULL, 10); + malloc_and_init_memory = atoi(benchmark_cmd[2]); + memflush = atoi(benchmark_cmd[3]); operation = atoi(benchmark_cmd[4]); - if (run_fill_buf(span, 1, 1, operation, NULL)) + sprintf(resctrl_val, "%s", benchmark_cmd[5]); + + if (strcmp(resctrl_val, "cqm") != 0) + buffer_span = span * MB; + else + buffer_span = span; + + if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush, + operation, resctrl_val)) fprintf(stderr, "Error in running fill buffer\n"); } else { /* Execute specified benchmark */ @@ -210,6 +362,14 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) struct dirent *ep; DIR *dp; + /* + * At this point, we are guaranteed to have resctrl FS mounted and if + * length of grp_name == 0, it means, user wants to use root con_mon + * grp, so do nothing + */ + if (strlen(grp_name) == 0) + return 0; + /* Check if requested grp exists or not */ dp = opendir(parent_grp); if (dp) { @@ -293,9 +453,10 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, if (ret) goto out; - /* Create mon grp and write pid into it for "mbm" test */ - if ((strcmp(resctrl_val, "mbm") == 0)) { - if (mongrp) { + /* Create mon grp and write pid into it for "mbm" and "cqm" test */ + if ((strcmp(resctrl_val, "cqm") == 0) || + (strcmp(resctrl_val, "mbm") == 0)) { + if (strlen(mongrp)) { sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); ret = create_grp(mongrp, monitorgroup, monitorgroup_p); @@ -339,7 +500,8 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) int resource_id, ret = 0; FILE *fp; - if (strcmp(resctrl_val, "mba") != 0) + if ((strcmp(resctrl_val, "mba") != 0) && + (strcmp(resctrl_val, "cqm") != 0)) return -ENOENT; if (!schemata) { @@ -360,7 +522,10 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); - sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); + if (!strcmp(resctrl_val, "cqm")) + sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata); + if (strcmp(resctrl_val, "mba") == 0) + sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); fp = fopen(controlgroup, "w"); if (!fp) { @@ -537,3 +702,15 @@ int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, group_fd, flags); return ret; } + +unsigned int count_bits(unsigned long n) +{ + unsigned int count = 0; + + while (n) { + count += n & 1; + n >>= 1; + } + + return count; +} -- cgit v1.2.3 From 790bf585b0eeec9aa0e680ba090142b98da7f948 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Jan 2020 13:32:42 -0800 Subject: selftests/resctrl: Add Cache Allocation Technology (CAT) selftest Cache Allocation Technology (CAT) selftest allocates a portion of last level cache and starts a benchmark to read each cache line in this portion of cache. Measure the cache misses in perf and the misses should be equal to the number of cache lines in this portion of cache. We don't use CQM to calculate cache usage because some CAT enabled platforms don't have CQM. Co-developed-by: Sai Praneeth Prakhya Signed-off-by: Sai Praneeth Prakhya Co-developed-by: Babu Moger Signed-off-by: Babu Moger Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/cache.c | 175 ++++++++++++++++- tools/testing/selftests/resctrl/cat_test.c | 250 ++++++++++++++++++++++++ tools/testing/selftests/resctrl/fill_buf.c | 10 +- tools/testing/selftests/resctrl/resctrl.h | 3 + tools/testing/selftests/resctrl/resctrl_tests.c | 15 +- tools/testing/selftests/resctrl/resctrlfs.c | 3 +- 6 files changed, 451 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/resctrl/cat_test.c (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index d7719219ab51..38dbf4962e33 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -10,8 +10,105 @@ struct read_format { } values[2]; }; +static struct perf_event_attr pea_llc_miss; +static struct read_format rf_cqm; +static int fd_lm; char llc_occup_path[1024]; +static void initialize_perf_event_attr(void) +{ + pea_llc_miss.type = PERF_TYPE_HARDWARE; + pea_llc_miss.size = sizeof(struct perf_event_attr); + pea_llc_miss.read_format = PERF_FORMAT_GROUP; + pea_llc_miss.exclude_kernel = 1; + pea_llc_miss.exclude_hv = 1; + pea_llc_miss.exclude_idle = 1; + pea_llc_miss.exclude_callchain_kernel = 1; + pea_llc_miss.inherit = 1; + pea_llc_miss.exclude_guest = 1; + pea_llc_miss.disabled = 1; +} + +static void ioctl_perf_event_ioc_reset_enable(void) +{ + ioctl(fd_lm, PERF_EVENT_IOC_RESET, 0); + ioctl(fd_lm, PERF_EVENT_IOC_ENABLE, 0); +} + +static int perf_event_open_llc_miss(pid_t pid, int cpu_no) +{ + fd_lm = perf_event_open(&pea_llc_miss, pid, cpu_no, -1, + PERF_FLAG_FD_CLOEXEC); + if (fd_lm == -1) { + perror("Error opening leader"); + ctrlc_handler(0, NULL, NULL); + return -1; + } + + return 0; +} + +static int initialize_llc_perf(void) +{ + memset(&pea_llc_miss, 0, sizeof(struct perf_event_attr)); + memset(&rf_cqm, 0, sizeof(struct read_format)); + + /* Initialize perf_event_attr structures for HW_CACHE_MISSES */ + initialize_perf_event_attr(); + + pea_llc_miss.config = PERF_COUNT_HW_CACHE_MISSES; + + rf_cqm.nr = 1; + + return 0; +} + +static int reset_enable_llc_perf(pid_t pid, int cpu_no) +{ + int ret = 0; + + ret = perf_event_open_llc_miss(pid, cpu_no); + if (ret < 0) + return ret; + + /* Start counters to log values */ + ioctl_perf_event_ioc_reset_enable(); + + return 0; +} + +/* + * get_llc_perf: llc cache miss through perf events + * @cpu_no: CPU number that the benchmark PID is binded to + * + * Perf events like HW_CACHE_MISSES could be used to validate number of + * cache lines allocated. + * + * Return: =0 on success. <0 on failure. + */ +static int get_llc_perf(unsigned long *llc_perf_miss) +{ + __u64 total_misses; + + /* Stop counters after one span to get miss rate */ + + ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0); + + if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) { + perror("Could not get llc misses through perf"); + + return -1; + } + + total_misses = rf_cqm.values[0].value; + + close(fd_lm); + + *llc_perf_miss = total_misses; + + return 0; +} + /* * Get LLC Occupancy as reported by RESCTRL FS * For CQM, @@ -79,9 +176,19 @@ static int print_results_cache(char *filename, int bm_pid, int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) { - unsigned long llc_occu_resc = 0, llc_value = 0; + unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0; int ret; + /* + * Measure cache miss from perf. + */ + if (!strcmp(param->resctrl_val, "cat")) { + ret = get_llc_perf(&llc_perf_miss); + if (ret < 0) + return ret; + llc_value = llc_perf_miss; + } + /* * Measure llc occupancy from resctrl. */ @@ -97,3 +204,69 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) return 0; } + +/* + * cache_val: execute benchmark and measure LLC occupancy resctrl + * and perf cache miss for the benchmark + * @param: parameters passed to cache_val() + * + * Return: 0 on success. non-zero on failure. + */ +int cat_val(struct resctrl_val_param *param) +{ + int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0; + char *resctrl_val = param->resctrl_val; + pid_t bm_pid; + + if (strcmp(param->filename, "") == 0) + sprintf(param->filename, "stdio"); + + bm_pid = getpid(); + + /* Taskset benchmark to specified cpu */ + ret = taskset_benchmark(bm_pid, param->cpu_no); + if (ret) + return ret; + + /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, + resctrl_val); + if (ret) + return ret; + + if ((strcmp(resctrl_val, "cat") == 0)) { + ret = initialize_llc_perf(); + if (ret) + return ret; + } + + /* Test runs until the callback setup() tells the test to stop. */ + while (1) { + if (strcmp(resctrl_val, "cat") == 0) { + ret = param->setup(1, param); + if (ret) { + ret = 0; + break; + } + ret = reset_enable_llc_perf(bm_pid, param->cpu_no); + if (ret) + break; + + if (run_fill_buf(param->span, malloc_and_init_memory, + memflush, operation, resctrl_val)) { + fprintf(stderr, "Error-running fill buffer\n"); + ret = -1; + break; + } + + sleep(1); + ret = measure_cache_vals(param, bm_pid); + if (ret) + break; + } else { + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c new file mode 100644 index 000000000000..c194fcc42407 --- /dev/null +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cache Allocation Technology (CAT) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya , + * Fenghua Yu + */ +#include "resctrl.h" +#include + +#define RESULT_FILE_NAME1 "result_cat1" +#define RESULT_FILE_NAME2 "result_cat2" +#define NUM_OF_RUNS 5 +#define MAX_DIFF_PERCENT 4 +#define MAX_DIFF 1000000 + +int count_of_bits; +char cbm_mask[256]; +unsigned long long_mask; +unsigned long cache_size; + +/* + * Change schemata. Write schemata to specified + * con_mon grp, mon_grp in resctrl FS. + * Run 5 times in order to get average values. + */ +static int cat_setup(int num, ...) +{ + struct resctrl_val_param *p; + char schemata[64]; + va_list param; + int ret = 0; + + va_start(param, num); + p = va_arg(param, struct resctrl_val_param *); + va_end(param); + + /* Run NUM_OF_RUNS times */ + if (p->num_of_runs >= NUM_OF_RUNS) + return -1; + + if (p->num_of_runs == 0) { + sprintf(schemata, "%lx", p->mask); + ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no, + p->resctrl_val); + } + p->num_of_runs++; + + return ret; +} + +static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits, + unsigned long span) +{ + unsigned long allocated_cache_lines = span / 64; + unsigned long avg_llc_perf_miss = 0; + float diff_percent; + + avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1); + diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) / + allocated_cache_lines * 100; + + printf("%sok CAT: cache miss rate within %d%%\n", + abs((int)diff_percent) > MAX_DIFF_PERCENT ? "not " : "", + MAX_DIFF_PERCENT); + tests_run++; + printf("# Percent diff=%d\n", abs((int)diff_percent)); + printf("# Number of bits: %d\n", no_of_bits); + printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss); + printf("# Allocated cache lines: %lu\n", allocated_cache_lines); +} + +static int check_results(struct resctrl_val_param *param) +{ + char *token_array[8], temp[512]; + unsigned long sum_llc_perf_miss = 0; + int runs = 0, no_of_bits = 0; + FILE *fp; + + printf("# Checking for pass/fail\n"); + fp = fopen(param->filename, "r"); + if (!fp) { + perror("# Cannot open file"); + + return errno; + } + + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int fields = 0; + + while (token) { + token_array[fields++] = token; + token = strtok(NULL, ":\t"); + } + /* + * Discard the first value which is inaccurate due to monitoring + * setup transition phase. + */ + if (runs > 0) + sum_llc_perf_miss += strtoul(token_array[3], NULL, 0); + runs++; + } + + fclose(fp); + no_of_bits = count_bits(param->mask); + + show_cache_info(sum_llc_perf_miss, no_of_bits, param->span); + + return 0; +} + +void cat_test_cleanup(void) +{ + remove(RESULT_FILE_NAME1); + remove(RESULT_FILE_NAME2); +} + +int cat_perf_miss_val(int cpu_no, int n, char *cache_type) +{ + unsigned long l_mask, l_mask_1; + int ret, pipefd[2], sibling_cpu_no; + char pipe_message; + pid_t bm_pid; + + cache_size = 0; + + ret = remount_resctrlfs(true); + if (ret) + return ret; + + if (!validate_resctrl_feature_request("cat")) + return -1; + + /* Get default cbm mask for L3/L2 cache */ + ret = get_cbm_mask(cache_type); + if (ret) + return ret; + + long_mask = strtoul(cbm_mask, NULL, 16); + + /* Get L3/L2 cache size */ + ret = get_cache_size(cpu_no, cache_type, &cache_size); + if (ret) + return ret; + printf("cache size :%lu\n", cache_size); + + /* Get max number of bits from default-cabm mask */ + count_of_bits = count_bits(long_mask); + + if (n < 1 || n > count_of_bits - 1) { + printf("Invalid input value for no_of_bits n!\n"); + printf("Please Enter value in range 1 to %d\n", + count_of_bits - 1); + return -1; + } + + /* Get core id from same socket for running another thread */ + sibling_cpu_no = get_core_sibling(cpu_no); + if (sibling_cpu_no < 0) + return -1; + + struct resctrl_val_param param = { + .resctrl_val = "cat", + .cpu_no = cpu_no, + .mum_resctrlfs = 0, + .setup = cat_setup, + }; + + l_mask = long_mask >> n; + l_mask_1 = ~l_mask & long_mask; + + /* Set param values for parent thread which will be allocated bitmask + * with (max_bits - n) bits + */ + param.span = cache_size * (count_of_bits - n) / count_of_bits; + strcpy(param.ctrlgrp, "c2"); + strcpy(param.mongrp, "m2"); + strcpy(param.filename, RESULT_FILE_NAME2); + param.mask = l_mask; + param.num_of_runs = 0; + + if (pipe(pipefd)) { + perror("# Unable to create pipe"); + return errno; + } + + bm_pid = fork(); + + /* Set param values for child thread which will be allocated bitmask + * with n bits + */ + if (bm_pid == 0) { + param.mask = l_mask_1; + strcpy(param.ctrlgrp, "c1"); + strcpy(param.mongrp, "m1"); + param.span = cache_size * n / count_of_bits; + strcpy(param.filename, RESULT_FILE_NAME1); + param.num_of_runs = 0; + param.cpu_no = sibling_cpu_no; + } + + remove(param.filename); + + ret = cat_val(¶m); + if (ret) + return ret; + + ret = check_results(¶m); + if (ret) + return ret; + + if (bm_pid == 0) { + /* Tell parent that child is ready */ + close(pipefd[0]); + pipe_message = 1; + if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < + sizeof(pipe_message)) { + close(pipefd[1]); + perror("# failed signaling parent process"); + return errno; + } + + close(pipefd[1]); + while (1) + ; + } else { + /* Parent waits for child to be ready. */ + close(pipefd[1]); + pipe_message = 0; + while (pipe_message != 1) { + if (read(pipefd[0], &pipe_message, + sizeof(pipe_message)) < sizeof(pipe_message)) { + perror("# failed reading from child process"); + break; + } + } + close(pipefd[0]); + kill(bm_pid, SIGKILL); + } + + cat_test_cleanup(); + if (bm_pid) + umount_resctrlfs(); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index b76bf6a4a0a5..84d2a8b9657a 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -113,8 +113,11 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, int ret = 0; FILE *fp; - while (1) + while (1) { ret = fill_one_span_read(start_ptr, end_ptr); + if (!strcmp(resctrl_val, "cat")) + break; + } /* Consume read result so that reading memory is not optimized out. */ fp = fopen("/dev/null", "w"); @@ -129,8 +132,11 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr, char *resctrl_val) { - while (1) + while (1) { fill_one_span_write(start_ptr, end_ptr); + if (!strcmp(resctrl_val, "cat")) + break; + } return 0; } diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index d326c7e95862..e8399389f3ad 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -94,6 +94,9 @@ void mba_test_cleanup(void); int get_cbm_mask(char *cache_type); int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); +int cat_val(struct resctrl_val_param *param); +void cat_test_cleanup(void); +int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd); unsigned int count_bits(unsigned long n); void cqm_test_cleanup(void); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 70179a61df3b..7f75b0b01bb5 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -19,7 +19,7 @@ static void cmd_help(void) printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM"); printf("\t default benchmark is builtin fill_buf\n"); printf("\t-t test list: run tests specified in the test list, "); - printf("e.g. -t mbm, mba, cqm\n"); + printf("e.g. -t mbm, mba, cqm, cat\n"); printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n"); printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n"); printf("\t-h: help\n"); @@ -30,6 +30,7 @@ void tests_cleanup(void) mbm_test_cleanup(); mba_test_cleanup(); cqm_test_cleanup(); + cat_test_cleanup(); } int main(int argc, char **argv) @@ -39,6 +40,7 @@ int main(int argc, char **argv) char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; int ben_ind, ben_count; + bool cat_test = true; for (i = 0; i < argc; i++) { if (strcmp(argv[i], "-b") == 0) { @@ -60,6 +62,7 @@ int main(int argc, char **argv) mbm_test = false; mba_test = false; cqm_test = false; + cat_test = false; while (token) { if (!strcmp(token, "mbm")) { mbm_test = true; @@ -67,6 +70,8 @@ int main(int argc, char **argv) mba_test = true; } else if (!strcmp(token, "cqm")) { cqm_test = true; + } else if (!strcmp(token, "cat")) { + cat_test = true; } else { printf("invalid argument\n"); @@ -159,6 +164,14 @@ int main(int argc, char **argv) tests_run++; } + if (cat_test) { + printf("# Starting CAT test ...\n"); + res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); + printf("%sok CAT: test\n", res ? "not " : ""); + tests_run++; + cat_test_cleanup(); + } + printf("1..%d\n", tests_run); return 0; diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 9c050a6c1723..8772c8c4d5d0 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -501,6 +501,7 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) FILE *fp; if ((strcmp(resctrl_val, "mba") != 0) && + (strcmp(resctrl_val, "cat") != 0) && (strcmp(resctrl_val, "cqm") != 0)) return -ENOENT; @@ -522,7 +523,7 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); - if (!strcmp(resctrl_val, "cqm")) + if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm")) sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata); if (strcmp(resctrl_val, "mba") == 0) sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); -- cgit v1.2.3 From 53f74fbec9f069241aa6f4c4d908229c77ee83e2 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 16 Jan 2020 13:32:43 -0800 Subject: selftests/resctrl: Add vendor detection mechanism RESCTRL feature is supported both on Intel and AMD now. Some features are implemented differently. Add vendor detection mechanism. Use the vendor check where there are differences. Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/resctrl.h | 1 + tools/testing/selftests/resctrl/resctrl_tests.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index e8399389f3ad..39bf59c6b9c5 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -66,6 +66,7 @@ pid_t bm_pid, ppid; int tests_run; char llc_occup_path[1024]; +bool is_amd; bool check_resctrlfs_support(void); int filter_dmesg(void); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 7f75b0b01bb5..884e918b1e97 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -13,6 +13,27 @@ #define BENCHMARK_ARGS 64 #define BENCHMARK_ARG_SIZE 64 +bool is_amd; + +void detect_amd(void) +{ + FILE *inf = fopen("/proc/cpuinfo", "r"); + char *res; + + if (!inf) + return; + + res = fgrep(inf, "vendor_id"); + + if (res) { + char *s = strchr(res, ':'); + + is_amd = s && !strcmp(s, ": AuthenticAMD\n"); + free(res); + } + fclose(inf); +} + static void cmd_help(void) { printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n"); @@ -107,6 +128,9 @@ int main(int argc, char **argv) if (geteuid() != 0) printf("# WARNING: not running as root, tests may fail.\n"); + /* Detect AMD vendor */ + detect_amd(); + if (has_ben) { /* Extract benchmark command from command line. */ for (i = ben_ind; i < argc; i++) { -- cgit v1.2.3 From c0327e1d7c42adb31a9541de6812f740893177a1 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 16 Jan 2020 13:32:44 -0800 Subject: selftests/resctrl: Use cache index3 id for AMD schemata masks AMD uses the cache l3 boundary for schemata masks. Update it accordigly. Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/resctrlfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 8772c8c4d5d0..19c0ec4045a4 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -113,8 +113,13 @@ int get_resource_id(int cpu_no, int *resource_id) char phys_pkg_path[1024]; FILE *fp; - sprintf(phys_pkg_path, "%s%d/topology/physical_package_id", - PHYS_ID_PATH, cpu_no); + if (is_amd) + sprintf(phys_pkg_path, "%s%d/cache/index3/id", + PHYS_ID_PATH, cpu_no); + else + sprintf(phys_pkg_path, "%s%d/topology/physical_package_id", + PHYS_ID_PATH, cpu_no); + fp = fopen(phys_pkg_path, "r"); if (!fp) { perror("Failed to open physical_package_id"); -- cgit v1.2.3 From 85f553d24ada5399cab730e3199a970e2cc78c82 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 16 Jan 2020 13:32:45 -0800 Subject: selftests/resctrl: Disable MBA and MBM tests for AMD For now, disable MBA and MBM tests for AMD. Deciding test pass/fail is not clear right now. We can enable when we have some clarity. Signed-off-by: Babu Moger Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/cat_test.c | 4 ++-- tools/testing/selftests/resctrl/resctrl_tests.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index c194fcc42407..5da43767b973 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -64,8 +64,8 @@ static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits, allocated_cache_lines * 100; printf("%sok CAT: cache miss rate within %d%%\n", - abs((int)diff_percent) > MAX_DIFF_PERCENT ? "not " : "", - MAX_DIFF_PERCENT); + !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ? + "not " : "", MAX_DIFF_PERCENT); tests_run++; printf("# Percent diff=%d\n", abs((int)diff_percent)); printf("# Number of bits: %d\n", no_of_bits); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 884e918b1e97..425cc85ac883 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -158,7 +158,7 @@ int main(int argc, char **argv) check_resctrlfs_support(); filter_dmesg(); - if (mbm_test) { + if (!is_amd && mbm_test) { printf("# Starting MBM BW change ...\n"); if (!has_ben) sprintf(benchmark_cmd[5], "%s", "mba"); @@ -168,7 +168,7 @@ int main(int argc, char **argv) tests_run++; } - if (mba_test) { + if (!is_amd && mba_test) { printf("# Starting MBA Schemata change ...\n"); if (!has_ben) sprintf(benchmark_cmd[1], "%d", span); -- cgit v1.2.3 From 644592d328370af4b3e027b7b1ae9f81613782d8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 10 Feb 2020 12:32:38 -0600 Subject: objtool: Fail the kernel build on fatal errors When objtool encounters a fatal error, it usually means the binary is corrupt or otherwise broken in some way. Up until now, such errors were just treated as warnings which didn't fail the kernel build. However, objtool is now stable enough that if a fatal error is discovered, it most likely means something is seriously wrong and it should fail the kernel build. Note that this doesn't apply to "normal" objtool warnings; only fatal ones. Suggested-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Julien Thierry Link: https://lkml.kernel.org/r/f18c3743de0fef673d49dd35760f26bdef7f6fc3.1581359535.git.jpoimboe@redhat.com --- tools/objtool/check.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4768d91c6d68..796f6a172efd 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2491,8 +2491,14 @@ int check(const char *_objname, bool orc) out: cleanup(&file); - /* ignore warnings for now until we get all the code cleaned up */ - if (ret || warnings) - return 0; + if (ret < 0) { + /* + * Fatal error. The binary is corrupt or otherwise broken in + * some way, or objtool itself is broken. Fail the kernel + * build. + */ + return ret; + } + return 0; } -- cgit v1.2.3 From a22961409c02b93ffa7ed78f67fb57a1ba6c787d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 10 Feb 2020 12:32:39 -0600 Subject: objtool: Add is_static_jump() helper There are several places where objtool tests for a non-dynamic (aka direct) jump. Move the check to a helper function. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Julien Thierry Link: https://lkml.kernel.org/r/9b8b438df918276315e4765c60d2587f3c7ad698.1581359535.git.jpoimboe@redhat.com --- tools/objtool/check.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 796f6a172efd..9016ae1c0c55 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -97,14 +97,19 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) +static bool is_static_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_CONDITIONAL || + insn->type == INSN_JUMP_UNCONDITIONAL; +} + static bool is_sibling_call(struct instruction *insn) { /* An indirect jump is either a sibling call or a jump to a table. */ if (insn->type == INSN_JUMP_DYNAMIC) return list_empty(&insn->alts); - if (insn->type != INSN_JUMP_CONDITIONAL && - insn->type != INSN_JUMP_UNCONDITIONAL) + if (!is_static_jump(insn)) return false; /* add_jump_destinations() sets insn->call_dest for sibling calls. */ @@ -553,8 +558,7 @@ static int add_jump_destinations(struct objtool_file *file) unsigned long dest_off; for_each_insn(file, insn) { - if (insn->type != INSN_JUMP_CONDITIONAL && - insn->type != INSN_JUMP_UNCONDITIONAL) + if (!is_static_jump(insn)) continue; if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET) @@ -764,8 +768,7 @@ static int handle_group_alt(struct objtool_file *file, insn->ignore = orig_insn->ignore_alts; insn->func = orig_insn->func; - if (insn->type != INSN_JUMP_CONDITIONAL && - insn->type != INSN_JUMP_UNCONDITIONAL) + if (!is_static_jump(insn)) continue; if (!insn->immediate) -- cgit v1.2.3 From dc4197236c20e761f2007c641afd193f81a00a74 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 10 Feb 2020 12:32:40 -0600 Subject: objtool: Add relocation check for alternative sections Relocations in alternative code can be dangerous, because the code is copy/pasted to the text section after relocations have been resolved, which can corrupt PC-relative addresses. However, relocations might be acceptable in some cases, depending on the architecture. For example, the x86 alternatives code manually fixes up the target addresses for PC-relative jumps and calls. So disallow relocations in alternative code, except where the x86 arch code allows it. This code may need to be tweaked for other arches when objtool gets support for them. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Julien Thierry Link: https://lkml.kernel.org/r/7b90b68d093311e4e8f6b504a9e1c758fd7e0002.1581359535.git.jpoimboe@redhat.com --- tools/objtool/check.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9016ae1c0c55..b038de2ccd71 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -768,6 +768,27 @@ static int handle_group_alt(struct objtool_file *file, insn->ignore = orig_insn->ignore_alts; insn->func = orig_insn->func; + /* + * Since alternative replacement code is copy/pasted by the + * kernel after applying relocations, generally such code can't + * have relative-address relocation references to outside the + * .altinstr_replacement section, unless the arch's + * alternatives code can adjust the relative offsets + * accordingly. + * + * The x86 alternatives code adjusts the offsets only when it + * encounters a branch instruction at the very beginning of the + * replacement group. + */ + if ((insn->offset != special_alt->new_off || + (insn->type != INSN_CALL && !is_static_jump(insn))) && + find_rela_by_dest_range(insn->sec, insn->offset, insn->len)) { + + WARN_FUNC("unsupported relocation in alternatives section", + insn->sec, insn->offset); + return -1; + } + if (!is_static_jump(insn)) continue; -- cgit v1.2.3 From 33f0c47b8fb4724792b16351a32b24902a5d3b07 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Nov 2019 10:17:54 +0100 Subject: tools: gpio: implement gpio-watch Add a simple program that allows to test the new LINECHANGED_FD ioctl(). Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- tools/gpio/.gitignore | 1 + tools/gpio/Build | 1 + tools/gpio/Makefile | 11 +++++- tools/gpio/gpio-watch.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 tools/gpio/gpio-watch.c (limited to 'tools') diff --git a/tools/gpio/.gitignore b/tools/gpio/.gitignore index a94c0e83b209..eab36c6d7751 100644 --- a/tools/gpio/.gitignore +++ b/tools/gpio/.gitignore @@ -1,4 +1,5 @@ gpio-event-mon gpio-hammer +gpio-watch lsgpio include/linux/gpio.h diff --git a/tools/gpio/Build b/tools/gpio/Build index 4141f35837db..67c7b7f6a717 100644 --- a/tools/gpio/Build +++ b/tools/gpio/Build @@ -2,3 +2,4 @@ gpio-utils-y += gpio-utils.o lsgpio-y += lsgpio.o gpio-utils.o gpio-hammer-y += gpio-hammer.o gpio-utils.o gpio-event-mon-y += gpio-event-mon.o gpio-utils.o +gpio-watch-y += gpio-watch.o diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 6080de58861f..842287e42c83 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -18,7 +18,7 @@ MAKEFLAGS += -r override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon +ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon gpio-watch ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) all: $(ALL_PROGRAMS) @@ -66,6 +66,15 @@ $(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +# +# gpio-watch +# +GPIO_WATCH_IN := $(OUTPUT)gpio-watch-in.o +$(GPIO_WATCH_IN): prepare FORCE + $(Q)$(MAKE) $(build)=gpio-watch +$(OUTPUT)gpio-watch: $(GPIO_WATCH_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + clean: rm -f $(ALL_PROGRAMS) rm -f $(OUTPUT)include/linux/gpio.h diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c new file mode 100644 index 000000000000..5cea24fddfa7 --- /dev/null +++ b/tools/gpio/gpio-watch.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * gpio-watch - monitor unrequested lines for property changes using the + * character device + * + * Copyright (C) 2019 BayLibre SAS + * Author: Bartosz Golaszewski + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + struct gpioline_info_changed chg; + struct gpioline_info req; + struct pollfd pfd; + int fd, i, j, ret; + char *event, *end; + ssize_t rd; + + if (argc < 3) + goto err_usage; + + fd = open(argv[1], O_RDWR | O_CLOEXEC); + if (fd < 0) { + perror("unable to open gpiochip"); + return EXIT_FAILURE; + } + + for (i = 0, j = 2; i < argc - 2; i++, j++) { + memset(&req, 0, sizeof(req)); + + req.line_offset = strtoul(argv[j], &end, 0); + if (*end != '\0') + goto err_usage; + + ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req); + if (ret) { + perror("unable to set up line watch"); + return EXIT_FAILURE; + } + } + + pfd.fd = fd; + pfd.events = POLLIN | POLLPRI; + + for (;;) { + ret = poll(&pfd, 1, 5000); + if (ret < 0) { + perror("error polling the linechanged fd"); + return EXIT_FAILURE; + } else if (ret > 0) { + memset(&chg, 0, sizeof(chg)); + rd = read(pfd.fd, &chg, sizeof(chg)); + if (rd < 0 || rd != sizeof(chg)) { + if (rd != sizeof(chg)) + errno = EIO; + + perror("error reading line change event"); + return EXIT_FAILURE; + } + + switch (chg.event_type) { + case GPIOLINE_CHANGED_REQUESTED: + event = "requested"; + break; + case GPIOLINE_CHANGED_RELEASED: + event = "released"; + break; + case GPIOLINE_CHANGED_CONFIG: + event = "config changed"; + break; + default: + fprintf(stderr, + "invalid event type received from the kernel\n"); + return EXIT_FAILURE; + } + + printf("line %u: %s at %llu\n", + chg.info.line_offset, event, chg.timestamp); + } + } + + return 0; + +err_usage: + printf("%s: ...\n", argv[0]); + return EXIT_FAILURE; +} -- cgit v1.2.3 From 04189382c0be4e045d63a38b47f012a8f6c35edc Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 24 Jan 2020 12:40:17 +0100 Subject: kselftest/cgroup: add cgroup destruction test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new test to verify that a cgroup with dead processes can be destroyed. The test spawns a child process which allocates and touches 100MB of RAM to ensure prolonged exit. Subsequently it kills the child, waits until the cgroup containing the child is empty and destroys the cgroup. Signed-off-by: Suren Baghdasaryan [mkoutny@suse.com: Fix typo in test_cgcore_destroy comment] Acked-by: Michal Koutný Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_core.c | 113 +++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index e19ce940cd6a..8289d3f03de4 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -2,7 +2,10 @@ #include #include +#include +#include #include +#include #include #include #include @@ -12,6 +15,115 @@ #include "../kselftest.h" #include "cgroup_util.h" +static int touch_anon(char *buf, size_t size) +{ + int fd; + char *pos = buf; + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + return -1; + + while (size > 0) { + ssize_t ret = read(fd, pos, size); + + if (ret < 0) { + if (errno != EINTR) { + close(fd); + return -1; + } + } else { + pos += ret; + size -= ret; + } + } + close(fd); + + return 0; +} + +static int alloc_and_touch_anon_noexit(const char *cgroup, void *arg) +{ + int ppid = getppid(); + size_t size = (size_t)arg; + void *buf; + + buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + 0, 0); + if (buf == MAP_FAILED) + return -1; + + if (touch_anon((char *)buf, size)) { + munmap(buf, size); + return -1; + } + + while (getppid() == ppid) + sleep(1); + + munmap(buf, size); + return 0; +} + +/* + * Create a child process that allocates and touches 100MB, then waits to be + * killed. Wait until the child is attached to the cgroup, kill all processes + * in that cgroup and wait until "cgroup.procs" is empty. At this point try to + * destroy the empty cgroup. The test helps detect race conditions between + * dying processes leaving the cgroup and cgroup destruction path. + */ +static int test_cgcore_destroy(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_test = NULL; + int child_pid; + char buf[PAGE_SIZE]; + + cg_test = cg_name(root, "cg_test"); + + if (!cg_test) + goto cleanup; + + for (int i = 0; i < 10; i++) { + if (cg_create(cg_test)) + goto cleanup; + + child_pid = cg_run_nowait(cg_test, alloc_and_touch_anon_noexit, + (void *) MB(100)); + + if (child_pid < 0) + goto cleanup; + + /* wait for the child to enter cgroup */ + if (cg_wait_for_proc_count(cg_test, 1)) + goto cleanup; + + if (cg_killall(cg_test)) + goto cleanup; + + /* wait for cgroup to be empty */ + while (1) { + if (cg_read(cg_test, "cgroup.procs", buf, sizeof(buf))) + goto cleanup; + if (buf[0] == '\0') + break; + usleep(1000); + } + + if (rmdir(cg_test)) + goto cleanup; + + if (waitpid(child_pid, NULL, 0) < 0) + goto cleanup; + } + ret = KSFT_PASS; +cleanup: + if (cg_test) + cg_destroy(cg_test); + free(cg_test); + return ret; +} + /* * A(0) - B(0) - C(1) * \ D(0) @@ -512,6 +624,7 @@ struct corecg_test { T(test_cgcore_populated), T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), + T(test_cgcore_destroy), }; #undef T -- cgit v1.2.3 From 9bd5910d7f3db2f65be139d2679dd9daa4a3419a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 5 Feb 2020 14:26:23 +0100 Subject: selftests/cgroup: add tests for cloning into cgroups Expand the cgroup test-suite to include tests for CLONE_INTO_CGROUP. This adds the following tests: - CLONE_INTO_CGROUP manages to clone a process directly into a correctly delegated cgroup - CLONE_INTO_CGROUP fails to clone a process into a cgroup that has been removed after we've opened an fd to it - CLONE_INTO_CGROUP fails to clone a process into an invalid domain cgroup - CLONE_INTO_CGROUP adheres to the no internal process constraint - CLONE_INTO_CGROUP works with the freezer feature Cc: Tejun Heo Cc: Shuah Khan Cc: cgroups@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Acked-by: Roman Gushchin Signed-off-by: Christian Brauner Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/Makefile | 6 +- tools/testing/selftests/cgroup/cgroup_util.c | 126 ++++++++++++++++++++++ tools/testing/selftests/cgroup/cgroup_util.h | 4 + tools/testing/selftests/cgroup/test_core.c | 64 +++++++++++ tools/testing/selftests/clone3/clone3_selftests.h | 19 +++- 5 files changed, 214 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 66aafe1f5746..967f268fde74 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -11,6 +11,6 @@ TEST_GEN_PROGS += test_freezer include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c +$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 8f7131dcf1ff..8a637ca7d73a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -15,6 +15,7 @@ #include #include "cgroup_util.h" +#include "../clone3/clone3_selftests.h" static ssize_t read_text(const char *path, char *buf, size_t max_len) { @@ -331,12 +332,112 @@ int cg_run(const char *cgroup, } } +pid_t clone_into_cgroup(int cgroup_fd) +{ +#ifdef CLONE_ARGS_SIZE_VER2 + pid_t pid; + + struct clone_args args = { + .flags = CLONE_INTO_CGROUP, + .exit_signal = SIGCHLD, + .cgroup = cgroup_fd, + }; + + pid = sys_clone3(&args, sizeof(struct clone_args)); + /* + * Verify that this is a genuine test failure: + * ENOSYS -> clone3() not available + * E2BIG -> CLONE_INTO_CGROUP not available + */ + if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) + goto pretend_enosys; + + return pid; + +pretend_enosys: +#endif + errno = ENOSYS; + return -ENOSYS; +} + +int clone_reap(pid_t pid, int options) +{ + int ret; + siginfo_t info = { + .si_signo = 0, + }; + +again: + ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); + if (ret < 0) { + if (errno == EINTR) + goto again; + return -1; + } + + if (options & WEXITED) { + if (WIFEXITED(info.si_status)) + return WEXITSTATUS(info.si_status); + } + + if (options & WSTOPPED) { + if (WIFSTOPPED(info.si_status)) + return WSTOPSIG(info.si_status); + } + + if (options & WCONTINUED) { + if (WIFCONTINUED(info.si_status)) + return 0; + } + + return -1; +} + +int dirfd_open_opath(const char *dir) +{ + return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); +} + +#define close_prot_errno(fd) \ + if (fd >= 0) { \ + int _e_ = errno; \ + close(fd); \ + errno = _e_; \ + } + +static int clone_into_cgroup_run_nowait(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg) +{ + int cgroup_fd; + pid_t pid; + + cgroup_fd = dirfd_open_opath(cgroup); + if (cgroup_fd < 0) + return -1; + + pid = clone_into_cgroup(cgroup_fd); + close_prot_errno(cgroup_fd); + if (pid == 0) + exit(fn(cgroup, arg)); + + return pid; +} + int cg_run_nowait(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg) { int pid; + pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); + if (pid > 0) + return pid; + + /* Genuine test failure. */ + if (pid < 0 && errno != ENOSYS) + return -1; + pid = fork(); if (pid == 0) { char buf[64]; @@ -450,3 +551,28 @@ int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) return strstr(buf, needle) ? 0 : -1; } + +int clone_into_cgroup_run_wait(const char *cgroup) +{ + int cgroup_fd; + pid_t pid; + + cgroup_fd = dirfd_open_opath(cgroup); + if (cgroup_fd < 0) + return -1; + + pid = clone_into_cgroup(cgroup_fd); + close_prot_errno(cgroup_fd); + if (pid < 0) + return -1; + + if (pid == 0) + exit(EXIT_SUCCESS); + + /* + * We don't care whether this fails. We only care whether the initial + * clone succeeded. + */ + (void)clone_reap(pid, WEXITED); + return 0; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 49c54fbdb229..5a1305dd1f0b 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -50,3 +50,7 @@ extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); +extern pid_t clone_into_cgroup(int cgroup_fd); +extern int clone_reap(pid_t pid, int options); +extern int clone_into_cgroup_run_wait(const char *cgroup); +extern int dirfd_open_opath(const char *dir); diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 8289d3f03de4..3df648c37876 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -137,8 +137,11 @@ cleanup: static int test_cgcore_populated(const char *root) { int ret = KSFT_FAIL; + int err; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_c = NULL, *cg_test_d = NULL; + int cgroup_fd = -EBADF; + pid_t pid; cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_a/cg_test_b"); @@ -190,6 +193,52 @@ static int test_cgcore_populated(const char *root) if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) goto cleanup; + /* Test that we can directly clone into a new cgroup. */ + cgroup_fd = dirfd_open_opath(cg_test_d); + if (cgroup_fd < 0) + goto cleanup; + + pid = clone_into_cgroup(cgroup_fd); + if (pid < 0) { + if (errno == ENOSYS) + goto cleanup_pass; + goto cleanup; + } + + if (pid == 0) { + if (raise(SIGSTOP)) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n"); + + (void)clone_reap(pid, WSTOPPED); + (void)kill(pid, SIGCONT); + (void)clone_reap(pid, WEXITED); + + if (err) + goto cleanup; + + if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) + goto cleanup; + + /* Remove cgroup. */ + if (cg_test_d) { + cg_destroy(cg_test_d); + free(cg_test_d); + cg_test_d = NULL; + } + + pid = clone_into_cgroup(cgroup_fd); + if (pid < 0) + goto cleanup_pass; + if (pid == 0) + exit(EXIT_SUCCESS); + (void)clone_reap(pid, WEXITED); + goto cleanup; + +cleanup_pass: ret = KSFT_PASS; cleanup: @@ -205,6 +254,8 @@ cleanup: free(cg_test_c); free(cg_test_b); free(cg_test_a); + if (cgroup_fd >= 0) + close(cgroup_fd); return ret; } @@ -248,6 +299,16 @@ static int test_cgcore_invalid_domain(const char *root) if (errno != EOPNOTSUPP) goto cleanup; + if (!clone_into_cgroup_run_wait(child)) + goto cleanup; + + if (errno == ENOSYS) + goto cleanup_pass; + + if (errno != EOPNOTSUPP) + goto cleanup; + +cleanup_pass: ret = KSFT_PASS; cleanup: @@ -457,6 +518,9 @@ static int test_cgcore_internal_process_constraint(const char *root) if (!cg_enter_current(parent)) goto cleanup; + if (!clone_into_cgroup_run_wait(parent)) + goto cleanup; + ret = KSFT_PASS; cleanup: diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index a3f2c8ad8bcc..91c1a78ddb39 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -5,12 +5,24 @@ #define _GNU_SOURCE #include +#include +#include #include #include -#include +#include + +#include "../kselftest.h" #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +#ifndef CLONE_INTO_CGROUP +#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ +#endif + +#ifndef CLONE_ARGS_SIZE_VER0 +#define CLONE_ARGS_SIZE_VER0 64 +#endif + #ifndef __NR_clone3 #define __NR_clone3 -1 struct clone_args { @@ -22,10 +34,13 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; +#define CLONE_ARGS_SIZE_VER1 80 __aligned_u64 set_tid; __aligned_u64 set_tid_size; +#define CLONE_ARGS_SIZE_VER2 88 + __aligned_u64 cgroup; }; -#endif +#endif /* __NR_clone3 */ static pid_t sys_clone3(struct clone_args *args, size_t size) { -- cgit v1.2.3 From 020bd6c48ebd864d42b5b551a87a323e443918a6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 13 Feb 2020 12:16:05 +0800 Subject: spi: spidev_test: Remove break after exit statement When call print_usage() in parse_opts(), it will exit directly. Since break is not useful after exit statement, remove it. Signed-off-by: Tiezhu Yang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1581567368-8055-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Mark Brown --- tools/spi/spidev_test.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 3559e7646256..113b1e1d62ca 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -283,7 +283,6 @@ static void parse_opts(int argc, char *argv[]) break; default: print_usage(argv[0]); - break; } } if (mode & SPI_LOOP) { -- cgit v1.2.3 From 1f3c36328a487059beebd1f7be042e3b7abf7d34 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 13 Feb 2020 12:16:06 +0800 Subject: spi: spidev_test: Check input_tx and input_file first after parse options It is better to check input_tx and input_file first after parse options. Otherwise, it will do some useless operations when both -p and --input are selected. Signed-off-by: Tiezhu Yang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1581567368-8055-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Mark Brown --- tools/spi/spidev_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 113b1e1d62ca..5866178cdcc9 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -404,6 +404,9 @@ int main(int argc, char *argv[]) parse_opts(argc, argv); + if (input_tx && input_file) + pabort("only one of -p and --input may be selected"); + fd = open(device, O_RDWR); if (fd < 0) pabort("can't open device"); @@ -445,9 +448,6 @@ int main(int argc, char *argv[]) printf("bits per word: %d\n", bits); printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000); - if (input_tx && input_file) - pabort("only one of -p and --input may be selected"); - if (input_tx) transfer_escaped_string(fd, input_tx); else if (input_file) -- cgit v1.2.3 From 470a072e12200576788dc622d58894609feae2d7 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 13 Feb 2020 12:16:07 +0800 Subject: spi: spidev_test: Use perror() only if errno is not 0 It is better to use perror() only if errno is not 0, it should use printf() when errno is 0, otherwise there exists redudant ": Success". E.g. without this patch: $ ./spidev_test -p 1234 --input test.bin only one of -p and --input may be selected: Success Aborted (core dumped) With this patch: $ ./spidev_test -p 1234 --input test.bin only one of -p and --input may be selected Aborted (core dumped) Signed-off-by: Tiezhu Yang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1581567368-8055-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Mark Brown --- tools/spi/spidev_test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 5866178cdcc9..27967dd90f8f 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -26,7 +27,11 @@ static void pabort(const char *s) { - perror(s); + if (errno != 0) + perror(s); + else + printf("%s\n", s); + abort(); } -- cgit v1.2.3 From aea7afd9079f2e43b05790241d748ff0537ec917 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 13 Feb 2020 12:16:08 +0800 Subject: spi: spidev_test: Remove the whole "include" directory when make clean In the current code, it only removes "include/linux/spi/spidev.h" file when make clean and there still exists useless "include/linux/spi/" directory, just remove it. Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1581567368-8055-4-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Mark Brown --- tools/spi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/spi/Makefile b/tools/spi/Makefile index 5c342e655e55..2249a1546cc1 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -51,7 +51,7 @@ $(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN) clean: rm -f $(ALL_PROGRAMS) - rm -f $(OUTPUT)include/linux/spi/spidev.h + rm -rf $(OUTPUT)include/ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete install: $(ALL_PROGRAMS) -- cgit v1.2.3 From 9de9f7d1cb143770e3e0faa8e35694922eb3066e Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Fri, 14 Feb 2020 12:48:02 +0100 Subject: tools: testing: vsock: Test when server is bound but not listening Whenever the server side of vsock is binding to the socket, but not listening yet, we expect the behavior from the client to be identical to what happens when the server is not even started. This new test runs the server side so that it binds to the socket without ever listening to it. The client side will try to connect and should receive an ECONNRESET error. This new test provides a way to validate the previously introduced patch for making sure the server side will always answer with a RST packet in case the client requested a new connection. Signed-off-by: Sebastien Boeuf Signed-off-by: David S. Miller --- tools/testing/vsock/vsock_test.c | 77 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 1d8b93f1af31..5a4fb80fa832 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -55,6 +55,78 @@ static void test_stream_connection_reset(const struct test_opts *opts) close(fd); } +static void test_stream_bind_only_client(const struct test_opts *opts) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = opts->peer_cid, + }, + }; + int ret; + int fd; + + /* Wait for the server to be ready */ + control_expectln("BIND"); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + timeout_begin(TIMEOUT); + do { + ret = connect(fd, &addr.sa, sizeof(addr.svm)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret != -1) { + fprintf(stderr, "expected connect(2) failure, got %d\n", ret); + exit(EXIT_FAILURE); + } + if (errno != ECONNRESET) { + fprintf(stderr, "unexpected connect(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + + /* Notify the server that the client has finished */ + control_writeln("DONE"); + + close(fd); +} + +static void test_stream_bind_only_server(const struct test_opts *opts) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = VMADDR_CID_ANY, + }, + }; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + /* Notify the client that the server is ready */ + control_writeln("BIND"); + + /* Wait for the client to finish */ + control_expectln("DONE"); + + close(fd); +} + static void test_stream_client_close_client(const struct test_opts *opts) { int fd; @@ -212,6 +284,11 @@ static struct test_case test_cases[] = { .name = "SOCK_STREAM connection reset", .run_client = test_stream_connection_reset, }, + { + .name = "SOCK_STREAM bind only", + .run_client = test_stream_bind_only_client, + .run_server = test_stream_bind_only_server, + }, { .name = "SOCK_STREAM client close", .run_client = test_stream_client_close_client, -- cgit v1.2.3 From 745a7ea72dc25ffe8c89a2a8d19022243dc8dadc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 Feb 2020 16:29:36 +0200 Subject: selftests: mlxsw: Remove deprecated test The addition of a VLAN on a bridge slave prompts the driver to have the local port in question join the FID corresponding to this VLAN. Before recent changes, the operation of joining the FID would also mean that the driver would enable VXLAN tunneling if a VXLAN device was also member in the VLAN. In case the configuration of the VXLAN tunnel was not supported, an extack error would be returned. Since the operation of joining the FID no longer means that VXLAN tunneling is potentially enabled, the test is no longer relevant. Remove it. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/extack.sh | 30 ---------------------- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 15 ----------- 2 files changed, 45 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index d72d8488a3b2..d9e02624c70b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -8,7 +8,6 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" netdev_pre_up_test vxlan_vlan_add_test - port_vlan_add_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -106,35 +105,6 @@ vxlan_vlan_add_test() ip link del dev br1 } -port_vlan_add_test() -{ - RET=0 - - ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 - - # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". - ip link add name vx1 up type vxlan id 1000 \ - local 192.0.2.17 remote 192.0.2.18 \ - dstport 4789 tos inherit ttl 100 - - ip link set dev $swp1 master br1 - check_err $? - - bridge vlan del dev $swp1 vid 1 - - ip link set dev vx1 master br1 - check_err $? - - bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? - - log_test "extack - map VLAN at port" - - ip link del dev vx1 - ip link del dev br1 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 4632f51af7ab..f68a109c0352 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -854,21 +854,6 @@ sanitization_vlan_aware_test() bridge vlan del vid 10 dev vxlan20 bridge vlan add vid 20 dev vxlan20 pvid untagged - # Test that offloading of an unsupported tunnel fails when it is - # triggered by addition of VLAN to a local port - RET=0 - - # TOS must be set to inherit - ip link set dev vxlan10 type vxlan tos 42 - - ip link set dev $swp1 master br0 - bridge vlan add vid 10 dev $swp1 &> /dev/null - check_fail $? - - log_test "vlan-aware - failed vlan addition to a local port" - - ip link set dev vxlan10 type vxlan tos inherit - ip link del dev vxlan20 ip link del dev vxlan10 ip link del dev br0 -- cgit v1.2.3 From bdc58bea0d46153f09656a02acc05fba1ee58c50 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 Feb 2020 16:29:37 +0200 Subject: selftests: mlxsw: extack: Test bridge creation with VXLAN Test that creation of a bridge (both VLAN-aware and VLAN-unaware) fails with an extack when a VXLAN device with an unsupported configuration is already enslaved to it. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/extack.sh | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index d9e02624c70b..d4e8e3359c02 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -8,6 +8,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" netdev_pre_up_test vxlan_vlan_add_test + vxlan_bridge_create_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -105,6 +106,37 @@ vxlan_vlan_add_test() ip link del dev br1 } +vxlan_bridge_create_test() +{ + RET=0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + # Test with VLAN-aware bridge. + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev vx1 master br1 + + ip link set dev $swp1 master br1 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + # Test with VLAN-unaware bridge. + ip link set dev br1 type bridge vlan_filtering 0 + + ip link set dev $swp1 master br1 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - bridge creation with VXLAN" + + ip link del dev br1 + ip link del dev vx1 +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 6c4e61ff5f268f05e6660ace5f72c52cd4bc5b00 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 Feb 2020 16:29:38 +0200 Subject: selftests: mlxsw: extack: Test creation of multiple VLAN-aware bridges The driver supports a single VLAN-aware bridge. Test that the enslavement of a port to the second VLAN-aware bridge fails with an extack. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/extack.sh | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index d4e8e3359c02..7a0a99c1d22f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -9,6 +9,7 @@ ALL_TESTS=" netdev_pre_up_test vxlan_vlan_add_test vxlan_bridge_create_test + bridge_create_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -137,6 +138,28 @@ vxlan_bridge_create_test() ip link del dev vx1 } +bridge_create_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 + ip link add name br2 up type bridge vlan_filtering 1 + + ip link set dev $swp1 master br1 + check_err $? + + # Only one VLAN-aware bridge is supported, so this should fail with + # an extack. + ip link set dev $swp2 master br2 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - multiple VLAN-aware bridges creation" + + ip link del dev br2 + ip link del dev br1 +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 58ba0238e98a3a2ed9d24697ee0b54b86245bc6b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 Feb 2020 16:29:39 +0200 Subject: selftests: mlxsw: vxlan: Adjust test to recent changes After recent changes, the VXLAN tunnel will be offloaded regardless if any local ports are member in the FID or not. Adjust the test to make sure the tunnel is offloaded in this case. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index f68a109c0352..56b95fd414d6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -1049,11 +1049,9 @@ offload_indication_vlan_aware_l3vni_test() ip link set dev vxlan0 master br0 bridge vlan add dev vxlan0 vid 10 pvid untagged - # No local port or router port is member in the VLAN, so tunnel should - # not be offloaded bridge fdb show brport vxlan0 | grep $zmac | grep self \ | grep -q offload - check_fail $? "vxlan tunnel offloaded when should not" + check_err $? "vxlan tunnel not offloaded when should" # Configure a VLAN interface and make sure tunnel is offloaded ip link add link br0 name br10 up type vlan id 10 -- cgit v1.2.3 From 495c3da648a1f1dd977db97e531dcb6b2a51e9a2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 Feb 2020 16:29:40 +0200 Subject: selftests: mlxsw: vxlan: Add test for error path Test that when two VXLAN tunnels with conflicting configurations (i.e., different TTL) are enslaved to the same VLAN-aware bridge, then the enslavement of a port to the bridge is denied. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 56b95fd414d6..15eb0dc9a685 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -854,6 +854,26 @@ sanitization_vlan_aware_test() bridge vlan del vid 10 dev vxlan20 bridge vlan add vid 20 dev vxlan20 pvid untagged + # Test that when two VXLAN tunnels with conflicting configurations + # (i.e., different TTL) are enslaved to the same VLAN-aware bridge, + # then the enslavement of a port to the bridge is denied. + + # Use the offload indication of the local route to ensure the VXLAN + # configuration was correctly rollbacked. + ip address add 198.51.100.1/32 dev lo + + ip link set dev vxlan10 type vxlan ttl 10 + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vlan-aware - failed enslavement to bridge due to conflict" + + ip link set dev vxlan10 type vxlan ttl 20 + ip address del 198.51.100.1/32 dev lo + ip link del dev vxlan20 ip link del dev vxlan10 ip link del dev br0 -- cgit v1.2.3 From c0e71d602053e4e7637e4bc7d0bc9603ea77a33f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jan 2020 16:47:20 +0100 Subject: tools/testing/nvdimm: Fix compilation failure without CONFIG_DEV_DAX_PMEM_COMPAT When a kernel is configured without CONFIG_DEV_DAX_PMEM_COMPAT, the compilation of tools/testing/nvdimm fails with: Building modules, stage 2. MODPOST 11 modules ERROR: "dax_pmem_compat_test" [tools/testing/nvdimm/test/nfit_test.ko] undefined! Fix the problem by calling dax_pmem_compat_test() only if the kernel has the required functionality. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200123154720.12097-1-jack@suse.cz Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index bf6422a6af7f..a8ee5c4d41eb 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3164,7 +3164,9 @@ static __init int nfit_test_init(void) mcsafe_test(); dax_pmem_test(); dax_pmem_core_test(); +#ifdef CONFIG_DEV_DAX_PMEM_COMPAT dax_pmem_compat_test(); +#endif nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); -- cgit v1.2.3 From e2c79ab7d75b4c6ed827e8078e5ebe2d059edafc Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Fri, 14 Feb 2020 18:41:34 +0100 Subject: tools/edid: Move EDID data sets from Documentation/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EDID files are not really documentation. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/EDID/1024x768.S | 43 ------- Documentation/EDID/1280x1024.S | 43 ------- Documentation/EDID/1600x1200.S | 43 ------- Documentation/EDID/1680x1050.S | 43 ------- Documentation/EDID/1920x1080.S | 43 ------- Documentation/EDID/800x600.S | 40 ------ Documentation/EDID/Makefile | 37 ------ Documentation/EDID/edid.S | 274 ----------------------------------------- Documentation/EDID/hex | 1 - tools/edid/1024x768.S | 43 +++++++ tools/edid/1280x1024.S | 43 +++++++ tools/edid/1600x1200.S | 43 +++++++ tools/edid/1680x1050.S | 43 +++++++ tools/edid/1920x1080.S | 43 +++++++ tools/edid/800x600.S | 40 ++++++ tools/edid/Makefile | 37 ++++++ tools/edid/edid.S | 274 +++++++++++++++++++++++++++++++++++++++++ tools/edid/hex | 1 + 18 files changed, 567 insertions(+), 567 deletions(-) delete mode 100644 Documentation/EDID/1024x768.S delete mode 100644 Documentation/EDID/1280x1024.S delete mode 100644 Documentation/EDID/1600x1200.S delete mode 100644 Documentation/EDID/1680x1050.S delete mode 100644 Documentation/EDID/1920x1080.S delete mode 100644 Documentation/EDID/800x600.S delete mode 100644 Documentation/EDID/Makefile delete mode 100644 Documentation/EDID/edid.S delete mode 100644 Documentation/EDID/hex create mode 100644 tools/edid/1024x768.S create mode 100644 tools/edid/1280x1024.S create mode 100644 tools/edid/1600x1200.S create mode 100644 tools/edid/1680x1050.S create mode 100644 tools/edid/1920x1080.S create mode 100644 tools/edid/800x600.S create mode 100644 tools/edid/Makefile create mode 100644 tools/edid/edid.S create mode 100644 tools/edid/hex (limited to 'tools') diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S deleted file mode 100644 index 4aed3f9ab88a..000000000000 --- a/Documentation/EDID/1024x768.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 65000 /* kHz */ -#define XPIX 1024 -#define YPIX 768 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 320 -#define YBLANK 38 -#define XOFFSET 8 -#define XPULSE 144 -#define YOFFSET 3 -#define YPULSE 6 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux XGA" -#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */ -#define HSYNC_POL 0 -#define VSYNC_POL 0 - -#include "edid.S" diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S deleted file mode 100644 index b26dd424cad7..000000000000 --- a/Documentation/EDID/1280x1024.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 108000 /* kHz */ -#define XPIX 1280 -#define YPIX 1024 -#define XY_RATIO XY_RATIO_5_4 -#define XBLANK 408 -#define YBLANK 42 -#define XOFFSET 48 -#define XPULSE 112 -#define YOFFSET 1 -#define YPULSE 3 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux SXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S deleted file mode 100644 index 0d091b282768..000000000000 --- a/Documentation/EDID/1600x1200.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor - - Copyright (C) 2013 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 162000 /* kHz */ -#define XPIX 1600 -#define YPIX 1200 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 560 -#define YBLANK 50 -#define XOFFSET 64 -#define XPULSE 192 -#define YOFFSET 1 -#define YPULSE 3 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux UXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S deleted file mode 100644 index 7dfed9a33eab..000000000000 --- a/Documentation/EDID/1680x1050.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 146250 /* kHz */ -#define XPIX 1680 -#define YPIX 1050 -#define XY_RATIO XY_RATIO_16_10 -#define XBLANK 560 -#define YBLANK 39 -#define XOFFSET 104 -#define XPULSE 176 -#define YOFFSET 3 -#define YPULSE 6 -#define DPI 96 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux WSXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S deleted file mode 100644 index d6ffbba28e95..000000000000 --- a/Documentation/EDID/1920x1080.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 148500 /* kHz */ -#define XPIX 1920 -#define YPIX 1080 -#define XY_RATIO XY_RATIO_16_9 -#define XBLANK 280 -#define YBLANK 45 -#define XOFFSET 88 -#define XPULSE 44 -#define YOFFSET 4 -#define YPULSE 5 -#define DPI 96 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux FHD" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S deleted file mode 100644 index a5616588de08..000000000000 --- a/Documentation/EDID/800x600.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - 800x600.S: EDID data set for standard 800x600 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - Copyright (C) 2014 Linaro Limited - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 40000 /* kHz */ -#define XPIX 800 -#define YPIX 600 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 256 -#define YBLANK 28 -#define XOFFSET 40 -#define XPULSE 128 -#define YOFFSET 1 -#define YPULSE 4 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux SVGA" -#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/Makefile b/Documentation/EDID/Makefile deleted file mode 100644 index 85a927dfab02..000000000000 --- a/Documentation/EDID/Makefile +++ /dev/null @@ -1,37 +0,0 @@ - -SOURCES := $(wildcard [0-9]*x[0-9]*.S) - -BIN := $(patsubst %.S, %.bin, $(SOURCES)) - -IHEX := $(patsubst %.S, %.bin.ihex, $(SOURCES)) - -CODE := $(patsubst %.S, %.c, $(SOURCES)) - -all: $(BIN) $(IHEX) $(CODE) - -clean: - @rm -f *.o *.bin.ihex *.bin *.c - -%.o: %.S - @cc -c $^ - -%.bin.nocrc: %.o - @objcopy -Obinary $^ $@ - -%.crc: %.bin.nocrc - @list=$$(for i in `seq 1 127`; do head -c$$i $^ | tail -c1 \ - | hexdump -v -e '/1 "%02X+"'; done); \ - echo "ibase=16;100-($${list%?})%100" | bc >$@ - -%.p: %.crc %.S - @cc -c -DCRC="$$(cat $*.crc)" -o $@ $*.S - -%.bin: %.p - @objcopy -Obinary $^ $@ - -%.bin.ihex: %.p - @objcopy -Oihex $^ $@ - @dos2unix $@ 2>/dev/null - -%.c: %.bin - @echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@ diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S deleted file mode 100644 index c3d13815526d..000000000000 --- a/Documentation/EDID/edid.S +++ /dev/null @@ -1,274 +0,0 @@ -/* - edid.S: EDID data template - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - - -/* Manufacturer */ -#define MFG_LNX1 'L' -#define MFG_LNX2 'N' -#define MFG_LNX3 'X' -#define SERIAL 0 -#define YEAR 2012 -#define WEEK 5 - -/* EDID 1.3 standard definitions */ -#define XY_RATIO_16_10 0b00 -#define XY_RATIO_4_3 0b01 -#define XY_RATIO_5_4 0b10 -#define XY_RATIO_16_9 0b11 - -/* Provide defaults for the timing bits */ -#ifndef ESTABLISHED_TIMING1_BITS -#define ESTABLISHED_TIMING1_BITS 0x00 -#endif -#ifndef ESTABLISHED_TIMING2_BITS -#define ESTABLISHED_TIMING2_BITS 0x00 -#endif -#ifndef ESTABLISHED_TIMING3_BITS -#define ESTABLISHED_TIMING3_BITS 0x00 -#endif - -#define mfgname2id(v1,v2,v3) \ - ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f)) -#define swap16(v1) ((v1>>8)+((v1&0xff)<<8)) -#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f)) -#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f)) -#define msbs4(v1,v2,v3,v4) \ - ((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\ - (((v3>>4)&0x03)<<2)+((v4>>4)&0x03)) -#define pixdpi2mm(pix,dpi) ((pix*25)/dpi) -#define xsize pixdpi2mm(XPIX,DPI) -#define ysize pixdpi2mm(YPIX,DPI) - - .data - -/* Fixed header pattern */ -header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00 - -mfg_id: .hword swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3)) - -prod_code: .hword 0 - -/* Serial number. 32 bits, little endian. */ -serial_number: .long SERIAL - -/* Week of manufacture */ -week: .byte WEEK - -/* Year of manufacture, less 1990. (1990-2245) - If week=255, it is the model year instead */ -year: .byte YEAR-1990 - -version: .byte VERSION /* EDID version, usually 1 (for 1.3) */ -revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */ - -/* If Bit 7=1 Digital input. If set, the following bit definitions apply: - Bits 6-1 Reserved, must be 0 - Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB, - 1 pixel per clock, up to 8 bits per color, MSB aligned, - If Bit 7=0 Analog input. If clear, the following bit definitions apply: - Bits 6-5 Video white and sync levels, relative to blank - 00=+0.7/-0.3 V; 01=+0.714/-0.286 V; - 10=+1.0/-0.4 V; 11=+0.7/0 V - Bit 4 Blank-to-black setup (pedestal) expected - Bit 3 Separate sync supported - Bit 2 Composite sync (on HSync) supported - Bit 1 Sync on green supported - Bit 0 VSync pulse must be serrated when somposite or - sync-on-green is used. */ -video_parms: .byte 0x6d - -/* Maximum horizontal image size, in centimetres - (max 292 cm/115 in at 16:9 aspect ratio) */ -max_hor_size: .byte xsize/10 - -/* Maximum vertical image size, in centimetres. - If either byte is 0, undefined (e.g. projector) */ -max_vert_size: .byte ysize/10 - -/* Display gamma, minus 1, times 100 (range 1.00-3.5 */ -gamma: .byte 120 - -/* Bit 7 DPMS standby supported - Bit 6 DPMS suspend supported - Bit 5 DPMS active-off supported - Bits 4-3 Display type: 00=monochrome; 01=RGB colour; - 10=non-RGB multicolour; 11=undefined - Bit 2 Standard sRGB colour space. Bytes 25-34 must contain - sRGB standard values. - Bit 1 Preferred timing mode specified in descriptor block 1. - Bit 0 GTF supported with default parameter values. */ -dsp_features: .byte 0xea - -/* Chromaticity coordinates. */ -/* Red and green least-significant bits - Bits 7-6 Red x value least-significant 2 bits - Bits 5-4 Red y value least-significant 2 bits - Bits 3-2 Green x value lst-significant 2 bits - Bits 1-0 Green y value least-significant 2 bits */ -red_green_lsb: .byte 0x5e - -/* Blue and white least-significant 2 bits */ -blue_white_lsb: .byte 0xc0 - -/* Red x value most significant 8 bits. - 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */ -red_x_msb: .byte 0xa4 - -/* Red y value most significant 8 bits */ -red_y_msb: .byte 0x59 - -/* Green x and y value most significant 8 bits */ -green_x_y_msb: .byte 0x4a,0x98 - -/* Blue x and y value most significant 8 bits */ -blue_x_y_msb: .byte 0x25,0x20 - -/* Default white point x and y value most significant 8 bits */ -white_x_y_msb: .byte 0x50,0x54 - -/* Established timings */ -/* Bit 7 720x400 @ 70 Hz - Bit 6 720x400 @ 88 Hz - Bit 5 640x480 @ 60 Hz - Bit 4 640x480 @ 67 Hz - Bit 3 640x480 @ 72 Hz - Bit 2 640x480 @ 75 Hz - Bit 1 800x600 @ 56 Hz - Bit 0 800x600 @ 60 Hz */ -estbl_timing1: .byte ESTABLISHED_TIMING1_BITS - -/* Bit 7 800x600 @ 72 Hz - Bit 6 800x600 @ 75 Hz - Bit 5 832x624 @ 75 Hz - Bit 4 1024x768 @ 87 Hz, interlaced (1024x768) - Bit 3 1024x768 @ 60 Hz - Bit 2 1024x768 @ 72 Hz - Bit 1 1024x768 @ 75 Hz - Bit 0 1280x1024 @ 75 Hz */ -estbl_timing2: .byte ESTABLISHED_TIMING2_BITS - -/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II) - Bits 6-0 Other manufacturer-specific display mod */ -estbl_timing3: .byte ESTABLISHED_TIMING3_BITS - -/* Standard timing */ -/* X resolution, less 31, divided by 8 (256-2288 pixels) */ -std_xres: .byte (XPIX/8)-31 -/* Y resolution, X:Y pixel ratio - Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9. - Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */ -std_vres: .byte (XY_RATIO<<6)+VFREQ-60 - .fill 7,2,0x0101 /* Unused */ - -descriptor1: -/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */ -clock: .hword CLOCK/10 - -/* Horizontal active pixels 8 lsbits (0-4095) */ -x_act_lsb: .byte XPIX&0xff -/* Horizontal blanking pixels 8 lsbits (0-4095) - End of active to start of next active. */ -x_blk_lsb: .byte XBLANK&0xff -/* Bits 7-4 Horizontal active pixels 4 msbits - Bits 3-0 Horizontal blanking pixels 4 msbits */ -x_msbs: .byte msbs2(XPIX,XBLANK) - -/* Vertical active lines 8 lsbits (0-4095) */ -y_act_lsb: .byte YPIX&0xff -/* Vertical blanking lines 8 lsbits (0-4095) */ -y_blk_lsb: .byte YBLANK&0xff -/* Bits 7-4 Vertical active lines 4 msbits - Bits 3-0 Vertical blanking lines 4 msbits */ -y_msbs: .byte msbs2(YPIX,YBLANK) - -/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */ -x_snc_off_lsb: .byte XOFFSET&0xff -/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */ -x_snc_pls_lsb: .byte XPULSE&0xff -/* Bits 7-4 Vertical sync offset lines 4 lsbits (0-63) - Bits 3-0 Vertical sync pulse width lines 4 lsbits (0-63) */ -y_snc_lsb: .byte lsbs2(YOFFSET, YPULSE) -/* Bits 7-6 Horizontal sync offset pixels 2 msbits - Bits 5-4 Horizontal sync pulse width pixels 2 msbits - Bits 3-2 Vertical sync offset lines 2 msbits - Bits 1-0 Vertical sync pulse width lines 2 msbits */ -xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE) - -/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */ -x_dsp_size: .byte xsize&0xff - -/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */ -y_dsp_size: .byte ysize&0xff - -/* Bits 7-4 Horizontal display size, mm, 4 msbits - Bits 3-0 Vertical display size, mm, 4 msbits */ -dsp_size_mbsb: .byte msbs2(xsize,ysize) - -/* Horizontal border pixels (each side; total is twice this) */ -x_border: .byte 0 -/* Vertical border lines (each side; total is twice this) */ -y_border: .byte 0 - -/* Bit 7 Interlaced - Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0: - Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar, - sync=1 during left; 11=4-way interleaved stereo - Bit 0=1 2-way interleaved stereo: 01=Right image on even lines; - 10=Left image on even lines; 11=side-by-side - Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite; - 10=Digital composite (on HSync); 11=Digital separate - Bit 2 If digital separate: Vertical sync polarity (1=positive) - Other types: VSync serrated (HSync during VSync) - Bit 1 If analog sync: Sync on all 3 RGB lines (else green only) - Digital: HSync polarity (1=positive) - Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */ -features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1) - -descriptor2: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xff /* Descriptor is monitor serial number (text) */ - .byte 0 /* Must be zero */ -start1: .ascii "Linux #0" -end1: .byte 0x0a /* End marker */ - .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */ -descriptor3: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xfd /* Descriptor is monitor range limits */ - .byte 0 /* Must be zero */ -start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */ - .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */ - .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate - (1-255 kHz) */ - .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate - (1-255 kHz) */ - .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up - to 10 MHz multiple (10-2550 MHz) */ - .byte 0 /* No extended timing information type */ -end2: .byte 0x0a /* End marker */ - .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */ -descriptor4: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xfc /* Descriptor is text */ - .byte 0 /* Must be zero */ -start3: .ascii TIMING_NAME -end3: .byte 0x0a /* End marker */ - .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */ -extensions: .byte 0 /* Number of extensions to follow */ -checksum: .byte CRC /* Sum of all bytes must be 0 */ diff --git a/Documentation/EDID/hex b/Documentation/EDID/hex deleted file mode 100644 index 8873ebb618af..000000000000 --- a/Documentation/EDID/hex +++ /dev/null @@ -1 +0,0 @@ -"\t" 8/1 "0x%02x, " "\n" diff --git a/tools/edid/1024x768.S b/tools/edid/1024x768.S new file mode 100644 index 000000000000..4aed3f9ab88a --- /dev/null +++ b/tools/edid/1024x768.S @@ -0,0 +1,43 @@ +/* + 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor + + Copyright (C) 2011 Carsten Emde + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +/* EDID */ +#define VERSION 1 +#define REVISION 3 + +/* Display */ +#define CLOCK 65000 /* kHz */ +#define XPIX 1024 +#define YPIX 768 +#define XY_RATIO XY_RATIO_4_3 +#define XBLANK 320 +#define YBLANK 38 +#define XOFFSET 8 +#define XPULSE 144 +#define YOFFSET 3 +#define YPULSE 6 +#define DPI 72 +#define VFREQ 60 /* Hz */ +#define TIMING_NAME "Linux XGA" +#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */ +#define HSYNC_POL 0 +#define VSYNC_POL 0 + +#include "edid.S" diff --git a/tools/edid/1280x1024.S b/tools/edid/1280x1024.S new file mode 100644 index 000000000000..b26dd424cad7 --- /dev/null +++ b/tools/edid/1280x1024.S @@ -0,0 +1,43 @@ +/* + 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor + + Copyright (C) 2011 Carsten Emde + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +/* EDID */ +#define VERSION 1 +#define REVISION 3 + +/* Display */ +#define CLOCK 108000 /* kHz */ +#define XPIX 1280 +#define YPIX 1024 +#define XY_RATIO XY_RATIO_5_4 +#define XBLANK 408 +#define YBLANK 42 +#define XOFFSET 48 +#define XPULSE 112 +#define YOFFSET 1 +#define YPULSE 3 +#define DPI 72 +#define VFREQ 60 /* Hz */ +#define TIMING_NAME "Linux SXGA" +/* No ESTABLISHED_TIMINGx_BITS */ +#define HSYNC_POL 1 +#define VSYNC_POL 1 + +#include "edid.S" diff --git a/tools/edid/1600x1200.S b/tools/edid/1600x1200.S new file mode 100644 index 000000000000..0d091b282768 --- /dev/null +++ b/tools/edid/1600x1200.S @@ -0,0 +1,43 @@ +/* + 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor + + Copyright (C) 2013 Carsten Emde + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +/* EDID */ +#define VERSION 1 +#define REVISION 3 + +/* Display */ +#define CLOCK 162000 /* kHz */ +#define XPIX 1600 +#define YPIX 1200 +#define XY_RATIO XY_RATIO_4_3 +#define XBLANK 560 +#define YBLANK 50 +#define XOFFSET 64 +#define XPULSE 192 +#define YOFFSET 1 +#define YPULSE 3 +#define DPI 72 +#define VFREQ 60 /* Hz */ +#define TIMING_NAME "Linux UXGA" +/* No ESTABLISHED_TIMINGx_BITS */ +#define HSYNC_POL 1 +#define VSYNC_POL 1 + +#include "edid.S" diff --git a/tools/edid/1680x1050.S b/tools/edid/1680x1050.S new file mode 100644 index 000000000000..7dfed9a33eab --- /dev/null +++ b/tools/edid/1680x1050.S @@ -0,0 +1,43 @@ +/* + 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor + + Copyright (C) 2012 Carsten Emde + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +/* EDID */ +#define VERSION 1 +#define REVISION 3 + +/* Display */ +#define CLOCK 146250 /* kHz */ +#define XPIX 1680 +#define YPIX 1050 +#define XY_RATIO XY_RATIO_16_10 +#define XBLANK 560 +#define YBLANK 39 +#define XOFFSET 104 +#define XPULSE 176 +#define YOFFSET 3 +#define YPULSE 6 +#define DPI 96 +#define VFREQ 60 /* Hz */ +#define TIMING_NAME "Linux WSXGA" +/* No ESTABLISHED_TIMINGx_BITS */ +#define HSYNC_POL 1 +#define VSYNC_POL 1 + +#include "edid.S" diff --git a/tools/edid/1920x1080.S b/tools/edid/1920x1080.S new file mode 100644 index 000000000000..d6ffbba28e95 --- /dev/null +++ b/tools/edid/1920x1080.S @@ -0,0 +1,43 @@ +/* + 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor + + Copyright (C) 2012 Carsten Emde + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +/* EDID */ +#define VERSION 1 +#define REVISION 3 + +/* Display */ +#define CLOCK 148500 /* kHz */ +#define XPIX 1920 +#define YPIX 1080 +#define XY_RATIO XY_RATIO_16_9 +#define XBLANK 280 +#define YBLANK 45 +#define XOFFSET 88 +#define XPULSE 44 +#define YOFFSET 4 +#define YPULSE 5 +#define DPI 96 +#define VFREQ 60 /* Hz */ +#define TIMING_NAME "Linux FHD" +/* No ESTABLISHED_TIMINGx_BITS */ +#define HSYNC_POL 1 +#define VSYNC_POL 1 + +#include "edid.S" diff --git a/tools/edid/800x600.S b/tools/edid/800x600.S new file mode 100644 index 000000000000..a5616588de08 --- /dev/null +++ b/tools/edid/800x600.S @@ -0,0 +1,40 @@ +/* + 800x600.S: EDID data set for standard 800x600 60 Hz monitor + + Copyright (C) 2011 Carsten Emde + Copyright (C) 2014 Linaro Limited + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +/* EDID */ +#define VERSION 1 +#define REVISION 3 + +/* Display */ +#define CLOCK 40000 /* kHz */ +#define XPIX 800 +#define YPIX 600 +#define XY_RATIO XY_RATIO_4_3 +#define XBLANK 256 +#define YBLANK 28 +#define XOFFSET 40 +#define XPULSE 128 +#define YOFFSET 1 +#define YPULSE 4 +#define DPI 72 +#define VFREQ 60 /* Hz */ +#define TIMING_NAME "Linux SVGA" +#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */ +#define HSYNC_POL 1 +#define VSYNC_POL 1 + +#include "edid.S" diff --git a/tools/edid/Makefile b/tools/edid/Makefile new file mode 100644 index 000000000000..85a927dfab02 --- /dev/null +++ b/tools/edid/Makefile @@ -0,0 +1,37 @@ + +SOURCES := $(wildcard [0-9]*x[0-9]*.S) + +BIN := $(patsubst %.S, %.bin, $(SOURCES)) + +IHEX := $(patsubst %.S, %.bin.ihex, $(SOURCES)) + +CODE := $(patsubst %.S, %.c, $(SOURCES)) + +all: $(BIN) $(IHEX) $(CODE) + +clean: + @rm -f *.o *.bin.ihex *.bin *.c + +%.o: %.S + @cc -c $^ + +%.bin.nocrc: %.o + @objcopy -Obinary $^ $@ + +%.crc: %.bin.nocrc + @list=$$(for i in `seq 1 127`; do head -c$$i $^ | tail -c1 \ + | hexdump -v -e '/1 "%02X+"'; done); \ + echo "ibase=16;100-($${list%?})%100" | bc >$@ + +%.p: %.crc %.S + @cc -c -DCRC="$$(cat $*.crc)" -o $@ $*.S + +%.bin: %.p + @objcopy -Obinary $^ $@ + +%.bin.ihex: %.p + @objcopy -Oihex $^ $@ + @dos2unix $@ 2>/dev/null + +%.c: %.bin + @echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@ diff --git a/tools/edid/edid.S b/tools/edid/edid.S new file mode 100644 index 000000000000..c3d13815526d --- /dev/null +++ b/tools/edid/edid.S @@ -0,0 +1,274 @@ +/* + edid.S: EDID data template + + Copyright (C) 2012 Carsten Emde + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + + +/* Manufacturer */ +#define MFG_LNX1 'L' +#define MFG_LNX2 'N' +#define MFG_LNX3 'X' +#define SERIAL 0 +#define YEAR 2012 +#define WEEK 5 + +/* EDID 1.3 standard definitions */ +#define XY_RATIO_16_10 0b00 +#define XY_RATIO_4_3 0b01 +#define XY_RATIO_5_4 0b10 +#define XY_RATIO_16_9 0b11 + +/* Provide defaults for the timing bits */ +#ifndef ESTABLISHED_TIMING1_BITS +#define ESTABLISHED_TIMING1_BITS 0x00 +#endif +#ifndef ESTABLISHED_TIMING2_BITS +#define ESTABLISHED_TIMING2_BITS 0x00 +#endif +#ifndef ESTABLISHED_TIMING3_BITS +#define ESTABLISHED_TIMING3_BITS 0x00 +#endif + +#define mfgname2id(v1,v2,v3) \ + ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f)) +#define swap16(v1) ((v1>>8)+((v1&0xff)<<8)) +#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f)) +#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f)) +#define msbs4(v1,v2,v3,v4) \ + ((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\ + (((v3>>4)&0x03)<<2)+((v4>>4)&0x03)) +#define pixdpi2mm(pix,dpi) ((pix*25)/dpi) +#define xsize pixdpi2mm(XPIX,DPI) +#define ysize pixdpi2mm(YPIX,DPI) + + .data + +/* Fixed header pattern */ +header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00 + +mfg_id: .hword swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3)) + +prod_code: .hword 0 + +/* Serial number. 32 bits, little endian. */ +serial_number: .long SERIAL + +/* Week of manufacture */ +week: .byte WEEK + +/* Year of manufacture, less 1990. (1990-2245) + If week=255, it is the model year instead */ +year: .byte YEAR-1990 + +version: .byte VERSION /* EDID version, usually 1 (for 1.3) */ +revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */ + +/* If Bit 7=1 Digital input. If set, the following bit definitions apply: + Bits 6-1 Reserved, must be 0 + Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB, + 1 pixel per clock, up to 8 bits per color, MSB aligned, + If Bit 7=0 Analog input. If clear, the following bit definitions apply: + Bits 6-5 Video white and sync levels, relative to blank + 00=+0.7/-0.3 V; 01=+0.714/-0.286 V; + 10=+1.0/-0.4 V; 11=+0.7/0 V + Bit 4 Blank-to-black setup (pedestal) expected + Bit 3 Separate sync supported + Bit 2 Composite sync (on HSync) supported + Bit 1 Sync on green supported + Bit 0 VSync pulse must be serrated when somposite or + sync-on-green is used. */ +video_parms: .byte 0x6d + +/* Maximum horizontal image size, in centimetres + (max 292 cm/115 in at 16:9 aspect ratio) */ +max_hor_size: .byte xsize/10 + +/* Maximum vertical image size, in centimetres. + If either byte is 0, undefined (e.g. projector) */ +max_vert_size: .byte ysize/10 + +/* Display gamma, minus 1, times 100 (range 1.00-3.5 */ +gamma: .byte 120 + +/* Bit 7 DPMS standby supported + Bit 6 DPMS suspend supported + Bit 5 DPMS active-off supported + Bits 4-3 Display type: 00=monochrome; 01=RGB colour; + 10=non-RGB multicolour; 11=undefined + Bit 2 Standard sRGB colour space. Bytes 25-34 must contain + sRGB standard values. + Bit 1 Preferred timing mode specified in descriptor block 1. + Bit 0 GTF supported with default parameter values. */ +dsp_features: .byte 0xea + +/* Chromaticity coordinates. */ +/* Red and green least-significant bits + Bits 7-6 Red x value least-significant 2 bits + Bits 5-4 Red y value least-significant 2 bits + Bits 3-2 Green x value lst-significant 2 bits + Bits 1-0 Green y value least-significant 2 bits */ +red_green_lsb: .byte 0x5e + +/* Blue and white least-significant 2 bits */ +blue_white_lsb: .byte 0xc0 + +/* Red x value most significant 8 bits. + 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */ +red_x_msb: .byte 0xa4 + +/* Red y value most significant 8 bits */ +red_y_msb: .byte 0x59 + +/* Green x and y value most significant 8 bits */ +green_x_y_msb: .byte 0x4a,0x98 + +/* Blue x and y value most significant 8 bits */ +blue_x_y_msb: .byte 0x25,0x20 + +/* Default white point x and y value most significant 8 bits */ +white_x_y_msb: .byte 0x50,0x54 + +/* Established timings */ +/* Bit 7 720x400 @ 70 Hz + Bit 6 720x400 @ 88 Hz + Bit 5 640x480 @ 60 Hz + Bit 4 640x480 @ 67 Hz + Bit 3 640x480 @ 72 Hz + Bit 2 640x480 @ 75 Hz + Bit 1 800x600 @ 56 Hz + Bit 0 800x600 @ 60 Hz */ +estbl_timing1: .byte ESTABLISHED_TIMING1_BITS + +/* Bit 7 800x600 @ 72 Hz + Bit 6 800x600 @ 75 Hz + Bit 5 832x624 @ 75 Hz + Bit 4 1024x768 @ 87 Hz, interlaced (1024x768) + Bit 3 1024x768 @ 60 Hz + Bit 2 1024x768 @ 72 Hz + Bit 1 1024x768 @ 75 Hz + Bit 0 1280x1024 @ 75 Hz */ +estbl_timing2: .byte ESTABLISHED_TIMING2_BITS + +/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II) + Bits 6-0 Other manufacturer-specific display mod */ +estbl_timing3: .byte ESTABLISHED_TIMING3_BITS + +/* Standard timing */ +/* X resolution, less 31, divided by 8 (256-2288 pixels) */ +std_xres: .byte (XPIX/8)-31 +/* Y resolution, X:Y pixel ratio + Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9. + Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */ +std_vres: .byte (XY_RATIO<<6)+VFREQ-60 + .fill 7,2,0x0101 /* Unused */ + +descriptor1: +/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */ +clock: .hword CLOCK/10 + +/* Horizontal active pixels 8 lsbits (0-4095) */ +x_act_lsb: .byte XPIX&0xff +/* Horizontal blanking pixels 8 lsbits (0-4095) + End of active to start of next active. */ +x_blk_lsb: .byte XBLANK&0xff +/* Bits 7-4 Horizontal active pixels 4 msbits + Bits 3-0 Horizontal blanking pixels 4 msbits */ +x_msbs: .byte msbs2(XPIX,XBLANK) + +/* Vertical active lines 8 lsbits (0-4095) */ +y_act_lsb: .byte YPIX&0xff +/* Vertical blanking lines 8 lsbits (0-4095) */ +y_blk_lsb: .byte YBLANK&0xff +/* Bits 7-4 Vertical active lines 4 msbits + Bits 3-0 Vertical blanking lines 4 msbits */ +y_msbs: .byte msbs2(YPIX,YBLANK) + +/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */ +x_snc_off_lsb: .byte XOFFSET&0xff +/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */ +x_snc_pls_lsb: .byte XPULSE&0xff +/* Bits 7-4 Vertical sync offset lines 4 lsbits (0-63) + Bits 3-0 Vertical sync pulse width lines 4 lsbits (0-63) */ +y_snc_lsb: .byte lsbs2(YOFFSET, YPULSE) +/* Bits 7-6 Horizontal sync offset pixels 2 msbits + Bits 5-4 Horizontal sync pulse width pixels 2 msbits + Bits 3-2 Vertical sync offset lines 2 msbits + Bits 1-0 Vertical sync pulse width lines 2 msbits */ +xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE) + +/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */ +x_dsp_size: .byte xsize&0xff + +/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */ +y_dsp_size: .byte ysize&0xff + +/* Bits 7-4 Horizontal display size, mm, 4 msbits + Bits 3-0 Vertical display size, mm, 4 msbits */ +dsp_size_mbsb: .byte msbs2(xsize,ysize) + +/* Horizontal border pixels (each side; total is twice this) */ +x_border: .byte 0 +/* Vertical border lines (each side; total is twice this) */ +y_border: .byte 0 + +/* Bit 7 Interlaced + Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0: + Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar, + sync=1 during left; 11=4-way interleaved stereo + Bit 0=1 2-way interleaved stereo: 01=Right image on even lines; + 10=Left image on even lines; 11=side-by-side + Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite; + 10=Digital composite (on HSync); 11=Digital separate + Bit 2 If digital separate: Vertical sync polarity (1=positive) + Other types: VSync serrated (HSync during VSync) + Bit 1 If analog sync: Sync on all 3 RGB lines (else green only) + Digital: HSync polarity (1=positive) + Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */ +features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1) + +descriptor2: .byte 0,0 /* Not a detailed timing descriptor */ + .byte 0 /* Must be zero */ + .byte 0xff /* Descriptor is monitor serial number (text) */ + .byte 0 /* Must be zero */ +start1: .ascii "Linux #0" +end1: .byte 0x0a /* End marker */ + .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */ +descriptor3: .byte 0,0 /* Not a detailed timing descriptor */ + .byte 0 /* Must be zero */ + .byte 0xfd /* Descriptor is monitor range limits */ + .byte 0 /* Must be zero */ +start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */ + .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */ + .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate + (1-255 kHz) */ + .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate + (1-255 kHz) */ + .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up + to 10 MHz multiple (10-2550 MHz) */ + .byte 0 /* No extended timing information type */ +end2: .byte 0x0a /* End marker */ + .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */ +descriptor4: .byte 0,0 /* Not a detailed timing descriptor */ + .byte 0 /* Must be zero */ + .byte 0xfc /* Descriptor is text */ + .byte 0 /* Must be zero */ +start3: .ascii TIMING_NAME +end3: .byte 0x0a /* End marker */ + .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */ +extensions: .byte 0 /* Number of extensions to follow */ +checksum: .byte CRC /* Sum of all bytes must be 0 */ diff --git a/tools/edid/hex b/tools/edid/hex new file mode 100644 index 000000000000..8873ebb618af --- /dev/null +++ b/tools/edid/hex @@ -0,0 +1 @@ +"\t" 8/1 "0x%02x, " "\n" -- cgit v1.2.3 From d1ba1204f2eec134937cb32997ee47756d448aa2 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:30 +0000 Subject: selftests/bpf: Test unhashing kTLS socket after removing from map When a TCP socket gets inserted into a sockmap, its sk_prot callbacks get replaced with tcp_bpf callbacks built from regular tcp callbacks. If TLS gets enabled on the same socket, sk_prot callbacks get replaced once again, this time with kTLS callbacks built from tcp_bpf callbacks. Now, we allow removing a socket from a sockmap that has kTLS enabled. After removal, socket remains with kTLS configured. This is where things things get tricky. Since the socket has a set of sk_prot callbacks that are a mix of kTLS and tcp_bpf callbacks, we need to restore just the tcp_bpf callbacks to the original ones. At the moment, it comes down to the the unhash operation. We had a regression recently because tcp_bpf callbacks were not cleared in this particular scenario of removing a kTLS socket from a sockmap. It got fixed in commit 4da6a196f93b ("bpf: Sockmap/tls, during free we may call tcp_bpf_unhash() in loop"). Add a test that triggers the regression so that we don't reintroduce it in the future. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-4-jakub@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_ktls.c | 123 +++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c new file mode 100644 index 000000000000..589b50c91b96 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +/* + * Tests for sockmap/sockhash holding kTLS sockets. + */ + +#include "test_progs.h" + +#define MAX_TEST_NAME 80 + +static int tcp_server(int family) +{ + int err, s; + + s = socket(family, SOCK_STREAM, 0); + if (CHECK_FAIL(s == -1)) { + perror("socket"); + return -1; + } + + err = listen(s, SOMAXCONN); + if (CHECK_FAIL(err)) { + perror("listen"); + return -1; + } + + return s; +} + +static int disconnect(int fd) +{ + struct sockaddr unspec = { AF_UNSPEC }; + + return connect(fd, &unspec, sizeof(unspec)); +} + +/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */ +static void test_sockmap_ktls_disconnect_after_delete(int family, int map) +{ + struct sockaddr_storage addr = {0}; + socklen_t len = sizeof(addr); + int err, cli, srv, zero = 0; + + srv = tcp_server(family); + if (srv == -1) + return; + + err = getsockname(srv, (struct sockaddr *)&addr, &len); + if (CHECK_FAIL(err)) { + perror("getsockopt"); + goto close_srv; + } + + cli = socket(family, SOCK_STREAM, 0); + if (CHECK_FAIL(cli == -1)) { + perror("socket"); + goto close_srv; + } + + err = connect(cli, (struct sockaddr *)&addr, len); + if (CHECK_FAIL(err)) { + perror("connect"); + goto close_cli; + } + + err = bpf_map_update_elem(map, &zero, &cli, 0); + if (CHECK_FAIL(err)) { + perror("bpf_map_update_elem"); + goto close_cli; + } + + err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (CHECK_FAIL(err)) { + perror("setsockopt(TCP_ULP)"); + goto close_cli; + } + + err = bpf_map_delete_elem(map, &zero); + if (CHECK_FAIL(err)) { + perror("bpf_map_delete_elem"); + goto close_cli; + } + + err = disconnect(cli); + if (CHECK_FAIL(err)) + perror("disconnect"); + +close_cli: + close(cli); +close_srv: + close(srv); +} + +static void run_tests(int family, enum bpf_map_type map_type) +{ + char test_name[MAX_TEST_NAME]; + int map; + + map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); + if (CHECK_FAIL(map == -1)) { + perror("bpf_map_create"); + return; + } + + snprintf(test_name, MAX_TEST_NAME, + "sockmap_ktls disconnect_after_delete %s %s", + family == AF_INET ? "IPv4" : "IPv6", + map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"); + if (!test__start_subtest(test_name)) + return; + + test_sockmap_ktls_disconnect_after_delete(family, map); + + close(map); +} + +void test_sockmap_ktls(void) +{ + run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP); + run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH); + run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP); + run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH); +} -- cgit v1.2.3 From 67306f84ca78c2ca5136f21791710c126a55a19b Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 17 Feb 2020 19:04:32 -0800 Subject: selftests/bpf: Add bpf_read_branch_records() selftest Add a selftest to test: * default bpf_read_branch_records() behavior * BPF_F_GET_BRANCH_RECORDS_SIZE flag behavior * error path on non branch record perf events * using helper to write to stack * using helper to write to global On host with hardware counter support: # ./test_progs -t perf_branches #27/1 perf_branches_hw:OK #27/2 perf_branches_no_hw:OK #27 perf_branches:OK Summary: 1/2 PASSED, 0 SKIPPED, 0 FAILED On host without hardware counter support (VM): # ./test_progs -t perf_branches #27/1 perf_branches_hw:OK #27/2 perf_branches_no_hw:OK #27 perf_branches:OK Summary: 1/2 PASSED, 1 SKIPPED, 0 FAILED Also sync tools/include/uapi/linux/bpf.h. Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200218030432.4600-3-dxu@dxuuu.xyz --- tools/include/uapi/linux/bpf.h | 25 ++- .../selftests/bpf/prog_tests/perf_branches.c | 170 +++++++++++++++++++++ .../selftests/bpf/progs/test_perf_branches.c | 50 ++++++ 3 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_branches.c create mode 100644 tools/testing/selftests/bpf/progs/test_perf_branches.c (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f1d74a2bd234..a7e59756853f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2892,6 +2892,25 @@ union bpf_attr { * Obtain the 64bit jiffies * Return * The 64 bit jiffies + * + * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * Description + * For an eBPF program attached to a perf event, retrieve the + * branch records (struct perf_branch_entry) associated to *ctx* + * and store it in the buffer pointed by *buf* up to size + * *size* bytes. + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to + * instead return the number of bytes required to store all the + * branch entries. If this flag is set, *buf* may be NULL. + * + * **-EINVAL** if arguments invalid or **size** not a multiple + * of sizeof(struct perf_branch_entry). + * + * **-ENOENT** if architecture does not support branch records. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3012,7 +3031,8 @@ union bpf_attr { FN(probe_read_kernel_str), \ FN(tcp_send_ack), \ FN(send_signal_thread), \ - FN(jiffies64), + FN(jiffies64), \ + FN(read_branch_records), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3091,6 +3111,9 @@ enum bpf_func_id { /* BPF_FUNC_sk_storage_get flags */ #define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +/* BPF_FUNC_read_branch_records flags. */ +#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c new file mode 100644 index 000000000000..e35c444902a7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include "bpf/libbpf_internal.h" +#include "test_perf_branches.skel.h" + +static void check_good_sample(struct test_perf_branches *skel) +{ + int written_global = skel->bss->written_global_out; + int required_size = skel->bss->required_size_out; + int written_stack = skel->bss->written_stack_out; + int pbe_size = sizeof(struct perf_branch_entry); + int duration = 0; + + if (CHECK(!skel->bss->valid, "output not valid", + "no valid sample from prog")) + return; + + /* + * It's hard to validate the contents of the branch entries b/c it + * would require some kind of disassembler and also encoding the + * valid jump instructions for supported architectures. So just check + * the easy stuff for now. + */ + CHECK(required_size <= 0, "read_branches_size", "err %d\n", required_size); + CHECK(written_stack < 0, "read_branches_stack", "err %d\n", written_stack); + CHECK(written_stack % pbe_size != 0, "read_branches_stack", + "stack bytes written=%d not multiple of struct size=%d\n", + written_stack, pbe_size); + CHECK(written_global < 0, "read_branches_global", "err %d\n", written_global); + CHECK(written_global % pbe_size != 0, "read_branches_global", + "global bytes written=%d not multiple of struct size=%d\n", + written_global, pbe_size); + CHECK(written_global < written_stack, "read_branches_size", + "written_global=%d < written_stack=%d\n", written_global, written_stack); +} + +static void check_bad_sample(struct test_perf_branches *skel) +{ + int written_global = skel->bss->written_global_out; + int required_size = skel->bss->required_size_out; + int written_stack = skel->bss->written_stack_out; + int duration = 0; + + if (CHECK(!skel->bss->valid, "output not valid", + "no valid sample from prog")) + return; + + CHECK((required_size != -EINVAL && required_size != -ENOENT), + "read_branches_size", "err %d\n", required_size); + CHECK((written_stack != -EINVAL && written_stack != -ENOENT), + "read_branches_stack", "written %d\n", written_stack); + CHECK((written_global != -EINVAL && written_global != -ENOENT), + "read_branches_global", "written %d\n", written_global); +} + +static void test_perf_branches_common(int perf_fd, + void (*cb)(struct test_perf_branches *)) +{ + struct test_perf_branches *skel; + int err, i, duration = 0; + bool detached = false; + struct bpf_link *link; + volatile int j = 0; + cpu_set_t cpu_set; + + skel = test_perf_branches__open_and_load(); + if (CHECK(!skel, "test_perf_branches_load", + "perf_branches skeleton failed\n")) + return; + + /* attach perf_event */ + link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd); + if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link))) + goto out_destroy_skel; + + /* generate some branches on cpu 0 */ + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) + goto out_destroy; + /* spin the loop for a while (random high number) */ + for (i = 0; i < 1000000; ++i) + ++j; + + test_perf_branches__detach(skel); + detached = true; + + cb(skel); +out_destroy: + bpf_link__destroy(link); +out_destroy_skel: + if (!detached) + test_perf_branches__detach(skel); + test_perf_branches__destroy(skel); +} + +static void test_perf_branches_hw(void) +{ + struct perf_event_attr attr = {0}; + int duration = 0; + int pfd; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.freq = 1; + attr.sample_freq = 4000; + attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY; + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + + /* + * Some setups don't support branch records (virtual machines, !x86), + * so skip test in this case. + */ + if (pfd == -1) { + if (errno == ENOENT || errno == EOPNOTSUPP) { + printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n", + __func__); + test__skip(); + return; + } + if (CHECK(pfd < 0, "perf_event_open", "err %d errno %d\n", + pfd, errno)) + return; + } + + test_perf_branches_common(pfd, check_good_sample); + + close(pfd); +} + +/* + * Tests negative case -- run bpf_read_branch_records() on improperly configured + * perf event. + */ +static void test_perf_branches_no_hw(void) +{ + struct perf_event_attr attr = {0}; + int duration = 0; + int pfd; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; + attr.freq = 1; + attr.sample_freq = 4000; + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd)) + return; + + test_perf_branches_common(pfd, check_bad_sample); + + close(pfd); +} + +void test_perf_branches(void) +{ + if (test__start_subtest("perf_branches_hw")) + test_perf_branches_hw(); + if (test__start_subtest("perf_branches_no_hw")) + test_perf_branches_no_hw(); +} diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c new file mode 100644 index 000000000000..0f7e27d97567 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include +#include +#include +#include +#include "bpf_trace_helpers.h" + +int valid = 0; +int required_size_out = 0; +int written_stack_out = 0; +int written_global_out = 0; + +struct { + __u64 _a; + __u64 _b; + __u64 _c; +} fpbe[30] = {0}; + +SEC("perf_event") +int perf_branches(void *ctx) +{ + __u64 entries[4 * 3] = {0}; + int required_size, written_stack, written_global; + + /* write to stack */ + written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0); + /* ignore spurious events */ + if (!written_stack) + return 1; + + /* get required size */ + required_size = bpf_read_branch_records(ctx, NULL, 0, + BPF_F_GET_BRANCH_RECORDS_SIZE); + + written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0); + /* ignore spurious events */ + if (!written_global) + return 1; + + required_size_out = required_size; + written_stack_out = written_stack; + written_global_out = written_global; + valid = 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 83250f2b6940654a73a2cfab7ac112b804a5f648 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Feb 2020 16:42:36 -0800 Subject: selftests/bpf: Change llvm flag -mcpu=probe to -mcpu=v3 The latest llvm supports cpu version v3, which is cpu version v1 plus some additional 64bit jmp insns and 32bit jmp insn support. In selftests/bpf Makefile, the llvm flag -mcpu=probe did runtime probe into the host system. Depending on compilation environments, it is possible that runtime probe may fail, e.g., due to memlock issue. This will cause generated code with cpu version v1. This may cause confusion as the same compiler and the same C code generates different byte codes in different environment. Let us change the llvm flag -mcpu=probe to -mcpu=v3 so the generated code will be the same regardless of the compilation environment. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200219004236.2291125-1-yhs@fb.com --- tools/testing/selftests/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 257a1aaaa37d..2a583196fa51 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -209,7 +209,7 @@ define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 + $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE @@ -223,7 +223,7 @@ define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 + $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE -- cgit v1.2.3 From 500897804a369358f4d27356787dcf9b33527fd7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 19 Feb 2020 12:55:14 -0800 Subject: selftests/bpf: Fix build of sockmap_ktls.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The selftests fails to build with: tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c: In function ‘test_sockmap_ktls_disconnect_after_delete’: tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c:72:37: error: ‘TCP_ULP’ undeclared (first use in this function) 72 | err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); | ^~~~~~~ Similar to commit that fixes build of sockmap_basic.c on systems with old /usr/include fix the build of sockmap_ktls.c Fixes: d1ba1204f2ee ("selftests/bpf: Test unhashing kTLS socket after removing from map") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200219205514.3353788-1-ast@kernel.org --- tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 589b50c91b96..06b86addc181 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -7,6 +7,7 @@ #include "test_progs.h" #define MAX_TEST_NAME 80 +#define TCP_ULP 31 static int tcp_server(int family) { -- cgit v1.2.3 From 2bb07f4e1d861fbb6c05f3dd79bdc8b41dfd2c08 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 19 Feb 2020 16:37:56 -0500 Subject: tc-testing: updated tdc tests for basic filter Added tests for 'u32' extended match rules for u8 alignment. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/basic.json | 242 +++++++++++++++++++++ 1 file changed, 242 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index 98a20faf3198..75f547a5ee48 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -372,5 +372,247 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "bae4", + "name": "Add basic filter with u32 ematch u8/zero offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 0x0f at 0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(01000000/0f000000 at 0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "e6cb", + "name": "Add basic filter with u32 ematch u8/zero offset and invalid value >0xFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/0f000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7727", + "name": "Add basic filter with u32 ematch u8/positive offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0x1f at 12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17000000/1f000000 at 12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "a429", + "name": "Add basic filter with u32 ematch u8/invalid mask >0xFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff00 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000/ff000000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "8373", + "name": "Add basic filter with u32 ematch u8/missing offset", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff at)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "ab8e", + "name": "Add basic filter with u32 ematch u8/missing AT keyword", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "712d", + "name": "Add basic filter with u32 ematch u8/missing value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "350f", + "name": "Add basic filter with u32 ematch u8/non-numeric value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 zero 0xff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "e28f", + "name": "Add basic filter with u32 ematch u8/non-numeric mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 mask at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11000000/00000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "6d5f", + "name": "Add basic filter with u32 ematch u8/negative offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at -14)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(0000a000/0000f000 at -16\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "12dc", + "name": "Add basic filter with u32 ematch u8/nexthdr+ offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at nexthdr+0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] -- cgit v1.2.3 From 5327644614a18f5d0ff845844a4e9976210b3d8d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Feb 2020 22:26:35 -0800 Subject: libbpf: Relax check whether BTF is mandatory If BPF program is using BTF-defined maps, BTF is required only for libbpf itself to process map definitions. If after that BTF fails to be loaded into kernel (e.g., if it doesn't support BTF at all), this shouldn't prevent valid BPF program from loading. Existing retry-without-BTF logic for creating maps will succeed to create such maps without any problems. So, presence of .maps section shouldn't make BTF required for kernel. Update the check accordingly. Validated by ensuring simple BPF program with BTF-defined maps is still loaded on old kernel without BTF support and map is correctly parsed and created. Fixes: abd29c931459 ("libbpf: allow specifying map definitions using BTF") Reported-by: Julia Kartseva Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200220062635.1497872-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 514b1a524abb..0eb10b681413 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2280,9 +2280,7 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) { - return obj->efile.btf_maps_shndx >= 0 || - obj->efile.st_ops_shndx >= 0 || - obj->nr_extern > 0; + return obj->efile.st_ops_shndx >= 0 || obj->nr_extern > 0; } static int bpf_object__init_btf(struct bpf_object *obj, -- cgit v1.2.3 From 7aabb6f839622bc96a425d93f3f7373167be1e19 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 23 Jan 2020 12:32:31 -0800 Subject: locktorture: Allow CPU-hotplug to be disabled via --bootargs The bootparam_hotplug_cpu() bash function was checking for CPU-hotplug kernel-boot parameters from --bootargs, but that check was specific to rcutorture ("rcutorture\.onoff_"). This commit therefore makes this check also work for locktorture ("torture\.onoff_"). Note that rcuperf does not do CPU-hotplug operations, so it is not necessary to make a similar change for rcuperf. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index c3a49fb4d6f6..12810229fddc 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -12,7 +12,7 @@ # Returns 1 if the specified boot-parameter string tells rcutorture to # test CPU-hotplug operations. bootparam_hotplug_cpu () { - echo "$1" | grep -q "rcutorture\.onoff_" + echo "$1" | grep -q "torture\.onoff_" } # checkarg --argname argtype $# arg mustmatch cannotmatch -- cgit v1.2.3 From 90e23b6b81a9b374d2940cb0b33935d53664509e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Dec 2019 13:24:07 -0800 Subject: torture: Make results-directory date format completion-friendly The names of the per-test results directories are of the form 2019.11.29-20:42:19. This works, but the ":" characters make tab-based shell name completion a bit onerous because the user must remember to include a quote character somewhere before the first ":". This commit therefore changes the ":" characters to periods, as in 2019.12.01-20.48.01", which allows tab-based completion to work more naturally. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 78d18ab8e954..2315e2ec12d6 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -39,7 +39,7 @@ TORTURE_TRUST_MAKE="" resdir="" configs="" cpus=0 -ds=`date +%Y.%m.%d-%H:%M:%S` +ds=`date +%Y.%m.%d-%H.%M.%S` jitter="-1" usage () { -- cgit v1.2.3 From e0714247373b9c0253b002573f63f3e9698b7b30 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 15 Dec 2019 12:11:56 -0800 Subject: rcutorture: Add 100-CPU configuration The small-system rcutorture configurations have served us well for a great many years, but it is now time to add a larger one. This commit does just that, but does not add it to the defaults in CFLIST. This allows the kvm.sh argument '--configs "4*CFLIST TREE10" to run four instances of each of the default configurations concurrently with one instance of the large configuration. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/TREE10 | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TREE10 (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 new file mode 100644 index 000000000000..2debe7891aeb --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -0,0 +1,18 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=100 +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +#CHECK#CONFIG_PROVE_RCU=n +CONFIG_DEBUG_OBJECTS=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=n -- cgit v1.2.3 From c0b94ffb66845d13f9ec537a28bd1466b4de2e14 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2019 12:04:33 -0800 Subject: rcutorture: Summarize summary of build and run results When running the default list of tests, the run summary of a successful (that is, failed to find any errors) run fits easily on a 24-line screen. But a run with something like "--configs '5*CFLIST'" will be 80 lines long, and it is all too easy to miss a failure message when scrolling back. This commit therefore prints out the number of runs with failing builds or runtime failures, but only if there are any such failures. For example, a run with a single build error and a single runtime error would print two lines like this: 1 runs with build errors. 1 runs with runtime errors. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index e5edd5198725..0326f4a5ff9c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -13,6 +13,9 @@ # # Authors: Paul E. McKenney +T=/tmp/kvm-recheck.sh.$$ +trap 'rm -f $T' 0 2 + PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH . functions.sh for rd in "$@" @@ -68,4 +71,16 @@ do fi done done -EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1 +EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1 +ret=$? +builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`" +if test "$builderrors" -gt 0 +then + echo $builderrors runs with build errors. +fi +runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`" +if test "$runerrors" -gt 0 +then + echo $runerrors runs with runtime errors. +fi +exit $ret -- cgit v1.2.3 From beabc806f5aaa158fc90a939215e8b44ee9d7acc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2019 12:08:31 -0800 Subject: rcutorture: Make kvm-find-errors.sh abort on bad directory Currently, kvm-find-errors.sh gives a usage prompt when given a bad directory, but then soldiers on, giving a series of confusing error messages. This commit therefore prints an error message and exits when given a bad directory, hopefully reducing confusion. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 1871d00bccd7..6f50722f251f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -20,7 +20,9 @@ rundir="${1}" if test -z "$rundir" -o ! -d "$rundir" then + echo Directory "$rundir" not found. echo Usage: $0 directory + exit 1 fi editor=${EDITOR-vi} -- cgit v1.2.3 From a144935ceaed277c3e640b85f4cff89d7cce4b8f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2020 05:20:18 -0800 Subject: rcutorture: Set KCSAN Kconfig options to detect more data races This commit enables the KCSAN Kconfig options that (1) detect data races between reads and writes even when the writes do not change the variable's value and (2) detect data races involving plain C-language writes. These changes only affect scripted rcutorture runs and can be overridden using the kvm.sh --kconfig argument. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/CFcommon | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index e19a444a0684..0e92d85313aa 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -3,3 +3,5 @@ CONFIG_PRINTK_TIME=y CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y CONFIG_KVM_GUEST=y +CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n +CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n -- cgit v1.2.3 From dd88aed92d017bed2a2c0023f22f0eef7cd29702 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Thu, 20 Feb 2020 13:26:24 +0000 Subject: libbpf: Bump libpf current version to v0.0.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New development cycles starts, bump to v0.0.8. Signed-off-by: Eelco Chaudron Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/158220518424.127661.8278643006567775528.stgit@xdp-tutorial --- tools/lib/bpf/libbpf.map | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b035122142bb..45be19c9d752 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -235,3 +235,6 @@ LIBBPF_0.0.7 { btf__align_of; libbpf_find_kernel_btf; } LIBBPF_0.0.6; + +LIBBPF_0.0.8 { +} LIBBPF_0.0.7; -- cgit v1.2.3 From ff26ce5cd7680ebc9c5446cda013e2087decc60f Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Thu, 20 Feb 2020 13:26:35 +0000 Subject: libbpf: Add support for dynamic program attach target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently when you want to attach a trace program to a bpf program the section name needs to match the tracepoint/function semantics. However the addition of the bpf_program__set_attach_target() API allows you to specify the tracepoint/function dynamically. The call flow would look something like this: xdp_fd = bpf_prog_get_fd_by_id(id); trace_obj = bpf_object__open_file("func.o", NULL); prog = bpf_object__find_program_by_title(trace_obj, "fentry/myfunc"); bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY); bpf_program__set_attach_target(prog, xdp_fd, "xdpfilt_blk_all"); bpf_object__load(trace_obj) Signed-off-by: Eelco Chaudron Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/158220519486.127661.7964708960649051384.stgit@xdp-tutorial --- tools/lib/bpf/libbpf.c | 34 ++++++++++++++++++++++++++++++---- tools/lib/bpf/libbpf.h | 4 ++++ tools/lib/bpf/libbpf.map | 2 ++ 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0eb10b681413..8f0c436d5880 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4937,8 +4937,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i, btf_id; - if (prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_EXT) { + if ((prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { btf_id = libbpf_find_attach_btf_id(prog); if (btf_id <= 0) return btf_id; @@ -6581,6 +6581,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, else err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + if (err <= 0) + pr_warn("%s is not found in vmlinux BTF\n", name); + return err; } @@ -6653,8 +6656,6 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog) err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, name + section_defs[i].len, attach_type); - if (err <= 0) - pr_warn("%s is not found in vmlinux BTF\n", name); return err; } pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); @@ -8130,6 +8131,31 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } +int bpf_program__set_attach_target(struct bpf_program *prog, + int attach_prog_fd, + const char *attach_func_name) +{ + int btf_id; + + if (!prog || attach_prog_fd < 0 || !attach_func_name) + return -EINVAL; + + if (attach_prog_fd) + btf_id = libbpf_find_prog_btf_id(attach_func_name, + attach_prog_fd); + else + btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, + attach_func_name, + prog->expected_attach_type); + + if (btf_id < 0) + return btf_id; + + prog->attach_btf_id = btf_id; + prog->attach_prog_fd = attach_prog_fd; + return 0; +} + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { int err = 0, n, len, start, end = -1; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3fe12c9d1f92..02fc58a21a7f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -334,6 +334,10 @@ LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); +LIBBPF_API int +bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, + const char *attach_func_name); + LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 45be19c9d752..7b014c8cdece 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -237,4 +237,6 @@ LIBBPF_0.0.7 { } LIBBPF_0.0.6; LIBBPF_0.0.8 { + global: + bpf_program__set_attach_target; } LIBBPF_0.0.7; -- cgit v1.2.3 From 933ce62d68dee5465cb5f5b359ab0638c9f665b1 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Thu, 20 Feb 2020 13:26:45 +0000 Subject: selftests/bpf: Update xdp_bpf2bpf test to use new set_attach_target API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new bpf_program__set_attach_target() API in the xdp_bpf2bpf selftest so it can be referenced as an example on how to use it. Signed-off-by: Eelco Chaudron Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/158220520562.127661.14289388017034825841.stgit@xdp-tutorial --- tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 16 +++++++++++++--- tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 4 ++-- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 6b56bdc73ebc..4ba011031d4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -14,7 +14,7 @@ void test_xdp_bpf2bpf(void) struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + struct bpf_program *prog; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -27,11 +27,21 @@ void test_xdp_bpf2bpf(void) bpf_map_update_elem(map_fd, &key4, &value4, 0); /* Load trace program */ - opts.attach_prog_fd = pkt_fd, - ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts); + ftrace_skel = test_xdp_bpf2bpf__open(); if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) goto out; + /* Demonstrate the bpf_program__set_attach_target() API rather than + * the load with options, i.e. opts.attach_prog_fd. + */ + prog = ftrace_skel->progs.trace_on_entry; + bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY); + bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); + + prog = ftrace_skel->progs.trace_on_exit; + bpf_program__set_expected_attach_type(prog, BPF_TRACE_FEXIT); + bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); + err = test_xdp_bpf2bpf__load(ftrace_skel); if (CHECK(err, "__load", "ftrace skeleton failed\n")) goto out; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index cb8a04ab7a78..b840fc9e3ed5 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -28,7 +28,7 @@ struct xdp_buff { } __attribute__((preserve_access_index)); __u64 test_result_fentry = 0; -SEC("fentry/_xdp_tx_iptunnel") +SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { test_result_fentry = xdp->rxq->dev->ifindex; @@ -36,7 +36,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) } __u64 test_result_fexit = 0; -SEC("fexit/_xdp_tx_iptunnel") +SEC("fexit/FUNC") int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret) { test_result_fexit = ret; -- cgit v1.2.3 From 006ed53e8caa0b3c1e21a51dc34d6b3b5ab0aab6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 20 Feb 2020 15:05:46 -0800 Subject: selftests/bpf: Fix trampoline_count clean up logic Libbpf's Travis CI tests caught this issue. Ensure bpf_link and bpf_object clean up is performed correctly. Fixes: d633d57902a5 ("selftest/bpf: Add test for allowed trampolines count") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Cc: Jiri Olsa Link: https://lore.kernel.org/bpf/20200220230546.769250-1-andriin@fb.com --- .../selftests/bpf/prog_tests/trampoline_count.c | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 1f6ccdaed1ac..781c8d11604b 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -55,31 +55,40 @@ void test_trampoline_count(void) /* attach 'allowed' 40 trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + obj = NULL; goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) goto cleanup; inst[i].obj = obj; + obj = NULL; if (rand() % 2) { - link = load(obj, fentry_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + link = load(inst[i].obj, fentry_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + link = NULL; goto cleanup; + } inst[i].link_fentry = link; } else { - link = load(obj, fexit_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + link = load(inst[i].obj, fexit_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + link = NULL; goto cleanup; + } inst[i].link_fexit = link; } } /* and try 1 extra.. */ obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + obj = NULL; goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -104,7 +113,9 @@ void test_trampoline_count(void) cleanup_extra: bpf_object__close(obj); cleanup: - while (--i) { + if (i >= MAX_TRAMP_PROGS) + i = MAX_TRAMP_PROGS - 1; + for (; i >= 0; i--) { bpf_link__destroy(inst[i].link_fentry); bpf_link__destroy(inst[i].link_fexit); bpf_object__close(inst[i].obj); -- cgit v1.2.3 From 113d4bc9048336ba7c3d2ad972dbad4aef6e148a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 17 Feb 2020 21:41:53 -0600 Subject: objtool: Fix clang switch table edge case Clang has the ability to create a switch table which is not a jump table, but is rather a table of string pointers. This confuses objtool, because it sees the relocations for the string pointers and assumes they're part of a jump table: drivers/ata/sata_dwc_460ex.o: warning: objtool: sata_dwc_bmdma_start_by_tag()+0x3a2: can't find switch jump table net/ceph/messenger.o: warning: objtool: ceph_con_workfn()+0x47c: can't find switch jump table Make objtool's find_jump_table() smart enough to distinguish between a switch jump table (which has relocations to text addresses in the same function as the original instruction) and other anonymous rodata (which may have relocations to elsewhere). Reported-by: Nick Desaulniers Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Tested-by: Nick Desaulniers Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/485 Link: https://lkml.kernel.org/r/263f6aae46d33da0b86d7030ced878cb5cab1788.1581997059.git.jpoimboe@redhat.com --- tools/objtool/check.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b038de2ccd71..4d6e283951ec 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1025,7 +1025,7 @@ static struct rela *find_jump_table(struct objtool_file *file, struct instruction *insn) { struct rela *text_rela, *table_rela; - struct instruction *orig_insn = insn; + struct instruction *dest_insn, *orig_insn = insn; struct section *table_sec; unsigned long table_offset; @@ -1077,10 +1077,17 @@ static struct rela *find_jump_table(struct objtool_file *file, strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) continue; - /* Each table entry has a rela associated with it. */ + /* + * Each table entry has a rela associated with it. The rela + * should reference text in the same function as the original + * instruction. + */ table_rela = find_rela_by_dest(table_sec, table_offset); if (!table_rela) continue; + dest_insn = find_insn(file, table_rela->sym->sec, table_rela->addend); + if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) + continue; /* * Use of RIP-relative switch jumps is quite rare, and -- cgit v1.2.3 From 7acfe5315312fc56c2a94c9216448087b38ae909 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 17 Feb 2020 21:41:54 -0600 Subject: objtool: Improve call destination function detection A recent clang change, combined with a binutils bug, can trigger a situation where a ".Lprintk$local" STT_NOTYPE symbol gets created at the same offset as the "printk" STT_FUNC symbol. This confuses objtool: kernel/printk/printk.o: warning: objtool: ignore_loglevel_setup()+0x10: can't find call dest symbol at .text+0xc67 Improve the call destination detection by looking specifically for an STT_FUNC symbol. Reported-by: Nick Desaulniers Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Tested-by: Nick Desaulniers Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/872 Link: https://sourceware.org/bugzilla/show_bug.cgi?id=25551 Link: https://lkml.kernel.org/r/0a7ee320bc0ea4469bd3dc450a7b4725669e0ea9.1581997059.git.jpoimboe@redhat.com --- tools/objtool/check.c | 27 ++++++++++++++++++--------- tools/objtool/elf.c | 14 ++++++++++++-- tools/objtool/elf.h | 1 + 3 files changed, 31 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4d6e283951ec..6b6178ef4464 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -420,8 +420,8 @@ static void add_ignores(struct objtool_file *file) break; case STT_SECTION: - func = find_symbol_by_offset(rela->sym->sec, rela->addend); - if (!func || func->type != STT_FUNC) + func = find_func_by_offset(rela->sym->sec, rela->addend); + if (!func) continue; break; @@ -665,10 +665,14 @@ static int add_call_destinations(struct objtool_file *file) insn->len); if (!rela) { dest_off = insn->offset + insn->len + insn->immediate; - insn->call_dest = find_symbol_by_offset(insn->sec, - dest_off); + insn->call_dest = find_func_by_offset(insn->sec, dest_off); + if (!insn->call_dest) + insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); - if (!insn->call_dest && !insn->ignore) { + if (insn->ignore) + continue; + + if (!insn->call_dest) { WARN_FUNC("unsupported intra-function call", insn->sec, insn->offset); if (retpoline) @@ -676,11 +680,16 @@ static int add_call_destinations(struct objtool_file *file) return -1; } + if (insn->func && insn->call_dest->type != STT_FUNC) { + WARN_FUNC("unsupported call to non-function", + insn->sec, insn->offset); + return -1; + } + } else if (rela->sym->type == STT_SECTION) { - insn->call_dest = find_symbol_by_offset(rela->sym->sec, - rela->addend+4); - if (!insn->call_dest || - insn->call_dest->type != STT_FUNC) { + insn->call_dest = find_func_by_offset(rela->sym->sec, + rela->addend+4); + if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at %s+0x%x", insn->sec, insn->offset, rela->sym->sec->name, diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index edba4745f25a..cc4601c879ce 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -62,8 +62,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) struct symbol *sym; list_for_each_entry(sym, &sec->symbol_list, list) - if (sym->type != STT_SECTION && - sym->offset == offset) + if (sym->type != STT_SECTION && sym->offset == offset) + return sym; + + return NULL; +} + +struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type == STT_FUNC && sym->offset == offset) return sym; return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 44150204db4d..a1963259b930 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -77,6 +77,7 @@ struct elf { struct elf *elf_read(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); +struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); -- cgit v1.2.3 From 5c4b513e59505e9a75198bfdcf52f26f3fb472ae Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Fri, 21 Feb 2020 09:38:57 -0500 Subject: tc-testing: updated tdc tests for basic filter with u16 extended match rules Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/basic.json | 242 +++++++++++++++++++++ 1 file changed, 242 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index 75f547a5ee48..222174a2de01 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -614,5 +614,247 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "1d85", + "name": "Add basic filter with u32 ematch u16/zero offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x1122 0xffff at 0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/ffff0000 at 0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "3672", + "name": "Add basic filter with u32 ematch u16/zero offset and invalid value >0xFFFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x112233 0xffff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223300/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7fb0", + "name": "Add basic filter with u32 ematch u16/positive offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0x1fff at 12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17880000/1fff0000 at 12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "19af", + "name": "Add basic filter with u32 ematch u16/invalid mask >0xFFFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffffffff at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffffffff at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "446d", + "name": "Add basic filter with u32 ematch u16/missing offset", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff at)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "151b", + "name": "Add basic filter with u32 ematch u16/missing AT keyword", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "bb23", + "name": "Add basic filter with u32 ematch u16/missing value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "decc", + "name": "Add basic filter with u32 ematch u16/non-numeric value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 zero 0xffff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "e988", + "name": "Add basic filter with u32 ematch u16/non-numeric mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 mask at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/00000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "07d8", + "name": "Add basic filter with u32 ematch u16/negative offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xffff at -12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabb0000/ffff0000 at -12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "f474", + "name": "Add basic filter with u32 ematch u16/nexthdr+ offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] -- cgit v1.2.3 From 8ca30379a40103bf6734ae127ec940da798534dd Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:16 +0000 Subject: bpf, sockmap: Allow inserting listening TCP sockets into sockmap In order for sockmap/sockhash types to become generic collections for storing TCP sockets we need to loosen the checks during map update, while tightening the checks in redirect helpers. Currently sock{map,hash} require the TCP socket to be in established state, which prevents inserting listening sockets. Change the update pre-checks so the socket can also be in listening state. Since it doesn't make sense to redirect with sock{map,hash} to listening sockets, add appropriate socket state checks to BPF redirect helpers too. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-5-jakub@cloudflare.com --- net/core/sock_map.c | 59 ++++++++++++++++++++++++--------- tools/testing/selftests/bpf/test_maps.c | 6 +--- 2 files changed, 45 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 3a7a96ab088a..dd92a3556d73 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -391,7 +391,8 @@ out_free: static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) { return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB || - ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB; + ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB || + ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } static bool sock_map_sk_is_suitable(const struct sock *sk) @@ -400,6 +401,16 @@ static bool sock_map_sk_is_suitable(const struct sock *sk) sk->sk_protocol == IPPROTO_TCP; } +static bool sock_map_sk_state_allowed(const struct sock *sk) +{ + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); +} + +static bool sock_map_redirect_allowed(const struct sock *sk) +{ + return sk->sk_state != TCP_LISTEN; +} + static int sock_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -423,7 +434,7 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, } sock_map_sk_acquire(sk); - if (sk->sk_state != TCP_ESTABLISHED) + if (!sock_map_sk_state_allowed(sk)) ret = -EOPNOTSUPP; else ret = sock_map_update_common(map, idx, sk, flags); @@ -460,13 +471,17 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, struct bpf_map *, map, u32, key, u64, flags) { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct sock *sk; if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - tcb->bpf.flags = flags; - tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key); - if (!tcb->bpf.sk_redir) + + sk = __sock_map_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + tcb->bpf.flags = flags; + tcb->bpf.sk_redir = sk; return SK_PASS; } @@ -483,12 +498,17 @@ const struct bpf_func_proto bpf_sk_redirect_map_proto = { BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg, struct bpf_map *, map, u32, key, u64, flags) { + struct sock *sk; + if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - msg->flags = flags; - msg->sk_redir = __sock_map_lookup_elem(map, key); - if (!msg->sk_redir) + + sk = __sock_map_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + msg->flags = flags; + msg->sk_redir = sk; return SK_PASS; } @@ -748,7 +768,7 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key, } sock_map_sk_acquire(sk); - if (sk->sk_state != TCP_ESTABLISHED) + if (!sock_map_sk_state_allowed(sk)) ret = -EOPNOTSUPP; else ret = sock_hash_update_common(map, key, sk, flags); @@ -916,13 +936,17 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb, struct bpf_map *, map, void *, key, u64, flags) { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct sock *sk; if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - tcb->bpf.flags = flags; - tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key); - if (!tcb->bpf.sk_redir) + + sk = __sock_hash_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + tcb->bpf.flags = flags; + tcb->bpf.sk_redir = sk; return SK_PASS; } @@ -939,12 +963,17 @@ const struct bpf_func_proto bpf_sk_redirect_hash_proto = { BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg, struct bpf_map *, map, void *, key, u64, flags) { + struct sock *sk; + if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - msg->flags = flags; - msg->sk_redir = __sock_hash_lookup_elem(map, key); - if (!msg->sk_redir) + + sk = __sock_hash_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + msg->flags = flags; + msg->sk_redir = sk; return SK_PASS; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 02eae1e864c2..c6766b2cff85 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data) /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); - if (i < 2 && !err) { - printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n", - i, sfd[i]); - goto out_sockmap; - } else if (i >= 2 && err) { + if (err) { printf("Failed noprog update sockmap '%i:%i'\n", i, sfd[i]); goto out_sockmap; -- cgit v1.2.3 From 11318ba8cafd59105637b2b82b8a32719e44a2d2 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:22 +0000 Subject: selftests/bpf: Extend SK_REUSEPORT tests to cover SOCKMAP/SOCKHASH Parametrize the SK_REUSEPORT tests so that the map type for storing sockets is not hard-coded in the test setup routine. This, together with careful state cleaning after the tests, lets us run the test cases for REUSEPORT_ARRAY, SOCKMAP, and SOCKHASH to have test coverage for all supported map types. The last two support only TCP sockets at the moment. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-11-jakub@cloudflare.com --- .../selftests/bpf/prog_tests/select_reuseport.c | 63 ++++++++++++++++++---- 1 file changed, 53 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 098bcae5f827..9ed0ab06fd92 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -36,6 +36,7 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map; static __u32 expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; static int reuseport_array = -1, outer_map = -1; +static enum bpf_map_type inner_map_type; static int select_by_skb_data_prog; static int saved_tcp_syncookie = -1; static struct bpf_object *obj; @@ -63,13 +64,15 @@ static union sa46 { } \ }) -static int create_maps(void) +static int create_maps(enum bpf_map_type inner_type) { struct bpf_create_map_attr attr = {}; + inner_map_type = inner_type; + /* Creating reuseport_array */ attr.name = "reuseport_array"; - attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; + attr.map_type = inner_type; attr.key_size = sizeof(__u32); attr.value_size = sizeof(__u32); attr.max_entries = REUSEPORT_ARRAY_SIZE; @@ -726,12 +729,36 @@ static void cleanup_per_test(bool no_inner_map) static void cleanup(void) { - if (outer_map != -1) + if (outer_map != -1) { close(outer_map); - if (reuseport_array != -1) + outer_map = -1; + } + + if (reuseport_array != -1) { close(reuseport_array); - if (obj) + reuseport_array = -1; + } + + if (obj) { bpf_object__close(obj); + obj = NULL; + } + + memset(expected_results, 0, sizeof(expected_results)); +} + +static const char *maptype_str(enum bpf_map_type type) +{ + switch (type) { + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + return "reuseport_sockarray"; + case BPF_MAP_TYPE_SOCKMAP: + return "sockmap"; + case BPF_MAP_TYPE_SOCKHASH: + return "sockhash"; + default: + return "unknown"; + } } static const char *family_str(sa_family_t family) @@ -779,13 +806,21 @@ static void test_config(int sotype, sa_family_t family, bool inany) const struct test *t; for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { - snprintf(s, sizeof(s), "%s/%s %s %s", + snprintf(s, sizeof(s), "%s %s/%s %s %s", + maptype_str(inner_map_type), family_str(family), sotype_str(sotype), inany ? "INANY" : "LOOPBACK", t->name); if (!test__start_subtest(s)) continue; + if (sotype == SOCK_DGRAM && + inner_map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { + /* SOCKMAP/SOCKHASH don't support UDP yet */ + test__skip(); + continue; + } + setup_per_test(sotype, family, inany, t->no_inner_map); t->fn(sotype, family); cleanup_per_test(t->no_inner_map); @@ -814,13 +849,20 @@ static void test_all(void) test_config(c->sotype, c->family, c->inany); } -void test_select_reuseport(void) +void test_map_type(enum bpf_map_type mt) { - if (create_maps()) + if (create_maps(mt)) goto out; if (prepare_bpf_obj()) goto out; + test_all(); +out: + cleanup(); +} + +void test_select_reuseport(void) +{ saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL); if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0) @@ -831,8 +873,9 @@ void test_select_reuseport(void) if (disable_syncookie()) goto out; - test_all(); + test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + test_map_type(BPF_MAP_TYPE_SOCKMAP); + test_map_type(BPF_MAP_TYPE_SOCKHASH); out: - cleanup(); restore_sysctls(); } -- cgit v1.2.3 From 44d28be2b8d41e3198052b8c9eded2e23eb9e08b Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:23 +0000 Subject: selftests/bpf: Tests for sockmap/sockhash holding listening sockets Now that SOCKMAP and SOCKHASH map types can store listening sockets, user-space and BPF API is open to a new set of potential pitfalls. Exercise the map operations, with extra attention to code paths susceptible to races between map ops and socket cloning, and BPF helpers that work with SOCKMAP/SOCKHASH to gain confidence that all works as expected. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-12-jakub@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 1496 ++++++++++++++++++++ .../selftests/bpf/progs/test_sockmap_listen.c | 98 ++ 2 files changed, 1594 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/sockmap_listen.c create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_listen.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c new file mode 100644 index 000000000000..b1b2acea0638 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -0,0 +1,1496 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +/* + * Test suite for SOCKMAP/SOCKHASH holding listening sockets. + * Covers: + * 1. BPF map operations - bpf_map_{update,lookup delete}_elem + * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map + * 3. BPF reuseport helper - bpf_sk_select_reuseport + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bpf_util.h" +#include "test_progs.h" +#include "test_sockmap_listen.skel.h" + +#define MAX_STRERR_LEN 256 +#define MAX_TEST_NAME 80 + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + +/* Wrappers that fail the test on error and report it. */ + +#define xaccept(fd, addr, len) \ + ({ \ + int __ret = accept((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +#define xbpf_map_delete_elem(fd, key) \ + ({ \ + int __ret = bpf_map_delete_elem((fd), (key)); \ + if (__ret == -1) \ + FAIL_ERRNO("map_delete"); \ + __ret; \ + }) + +#define xbpf_map_lookup_elem(fd, key, val) \ + ({ \ + int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ + if (__ret == -1) \ + FAIL_ERRNO("map_lookup"); \ + __ret; \ + }) + +#define xbpf_map_update_elem(fd, key, val, flags) \ + ({ \ + int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("map_update"); \ + __ret; \ + }) + +#define xbpf_prog_attach(prog, target, type, flags) \ + ({ \ + int __ret = \ + bpf_prog_attach((prog), (target), (type), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("prog_attach(" #type ")"); \ + __ret; \ + }) + +#define xbpf_prog_detach2(prog, target, type) \ + ({ \ + int __ret = bpf_prog_detach2((prog), (target), (type)); \ + if (__ret == -1) \ + FAIL_ERRNO("prog_detach2(" #type ")"); \ + __ret; \ + }) + +#define xpthread_create(thread, attr, func, arg) \ + ({ \ + int __ret = pthread_create((thread), (attr), (func), (arg)); \ + errno = __ret; \ + if (__ret) \ + FAIL_ERRNO("pthread_create"); \ + __ret; \ + }) + +#define xpthread_join(thread, retval) \ + ({ \ + int __ret = pthread_join((thread), (retval)); \ + errno = __ret; \ + if (__ret) \ + FAIL_ERRNO("pthread_join"); \ + __ret; \ + }) + +static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static int listen_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static int listen_loopback(int family, int sotype) +{ + return listen_loopback_reuseport(family, sotype, -1); +} + +static void test_insert_invalid(int family, int sotype, int mapfd) +{ + u32 key = 0; + u64 value; + int err; + + value = -1; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EINVAL) + FAIL_ERRNO("map_update: expected EINVAL"); + + value = INT_MAX; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EBADF) + FAIL_ERRNO("map_update: expected EBADF"); +} + +static void test_insert_opened(int family, int sotype, int mapfd) +{ + u32 key = 0; + u64 value; + int err, s; + + s = xsocket(family, sotype, 0); + if (s == -1) + return; + + errno = 0; + value = s; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EOPNOTSUPP) + FAIL_ERRNO("map_update: expected EOPNOTSUPP"); + + xclose(s); +} + +static void test_insert_bound(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + socklen_t len; + u32 key = 0; + u64 value; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return; + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + errno = 0; + value = s; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EOPNOTSUPP) + FAIL_ERRNO("map_update: expected EOPNOTSUPP"); +close: + xclose(s); +} + +static void test_insert_listening(int family, int sotype, int mapfd) +{ + u64 value; + u32 key; + int s; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xclose(s); +} + +static void test_delete_after_insert(int family, int sotype, int mapfd) +{ + u64 value; + u32 key; + int s; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xbpf_map_delete_elem(mapfd, &key); + xclose(s); +} + +static void test_delete_after_close(int family, int sotype, int mapfd) +{ + int err, s; + u64 value; + u32 key; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + xclose(s); + + errno = 0; + err = bpf_map_delete_elem(mapfd, &key); + if (!err || (errno != EINVAL && errno != ENOENT)) + /* SOCKMAP and SOCKHASH return different error codes */ + FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); +} + +static void test_lookup_after_insert(int family, int sotype, int mapfd) +{ + u64 cookie, value; + socklen_t len; + u32 key; + int s; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + len = sizeof(cookie); + xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len); + + xbpf_map_lookup_elem(mapfd, &key, &value); + + if (value != cookie) { + FAIL("map_lookup: have %#llx, want %#llx", + (unsigned long long)value, (unsigned long long)cookie); + } + + xclose(s); +} + +static void test_lookup_after_delete(int family, int sotype, int mapfd) +{ + int err, s; + u64 value; + u32 key; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xbpf_map_delete_elem(mapfd, &key); + + errno = 0; + err = bpf_map_lookup_elem(mapfd, &key, &value); + if (!err || errno != ENOENT) + FAIL_ERRNO("map_lookup: expected ENOENT"); + + xclose(s); +} + +static void test_lookup_32_bit_value(int family, int sotype, int mapfd) +{ + u32 key, value32; + int err, s; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key), + sizeof(value32), 1, 0); + if (mapfd < 0) { + FAIL_ERRNO("map_create"); + goto close; + } + + key = 0; + value32 = s; + xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST); + + errno = 0; + err = bpf_map_lookup_elem(mapfd, &key, &value32); + if (!err || errno != ENOSPC) + FAIL_ERRNO("map_lookup: expected ENOSPC"); + + xclose(mapfd); +close: + xclose(s); +} + +static void test_update_listening(int family, int sotype, int mapfd) +{ + int s1, s2; + u64 value; + u32 key; + + s1 = listen_loopback(family, sotype); + if (s1 < 0) + return; + + s2 = listen_loopback(family, sotype); + if (s2 < 0) + goto close_s1; + + key = 0; + value = s1; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + value = s2; + xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST); + xclose(s2); +close_s1: + xclose(s1); +} + +/* Exercise the code path where we destroy child sockets that never + * got accept()'ed, aka orphans, when parent socket gets closed. + */ +static void test_destroy_orphan_child(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s, c; + u64 value; + u32 key; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + c = xsocket(family, sotype, 0); + if (c == -1) + goto close_srv; + + xconnect(c, sockaddr(&addr), len); + xclose(c); +close_srv: + xclose(s); +} + +/* Perform a passive open after removing listening socket from SOCKMAP + * to ensure that callbacks get restored properly. + */ +static void test_clone_after_delete(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s, c; + u64 value; + u32 key; + + s = listen_loopback(family, sotype); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xbpf_map_delete_elem(mapfd, &key); + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv; + + xconnect(c, sockaddr(&addr), len); + xclose(c); +close_srv: + xclose(s); +} + +/* Check that child socket that got created while parent was in a + * SOCKMAP, but got accept()'ed only after the parent has been removed + * from SOCKMAP, gets cloned without parent psock state or callbacks. + */ +static void test_accept_after_delete(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + const u32 zero = 0; + int err, s, c, p; + socklen_t len; + u64 value; + + s = listen_loopback(family, sotype); + if (s == -1) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + value = s; + err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c == -1) + goto close_srv; + + /* Create child while parent is in sockmap */ + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + /* Remove parent from sockmap */ + err = xbpf_map_delete_elem(mapfd, &zero); + if (err) + goto close_cli; + + p = xaccept(s, NULL, NULL); + if (p == -1) + goto close_cli; + + /* Check that child sk_user_data is not set */ + value = p; + xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +/* Check that child socket that got created and accepted while parent + * was in a SOCKMAP is cloned without parent psock state or callbacks. + */ +static void test_accept_before_delete(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + const u32 zero = 0, one = 1; + int err, s, c, p; + socklen_t len; + u64 value; + + s = listen_loopback(family, sotype); + if (s == -1) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + value = s; + err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c == -1) + goto close_srv; + + /* Create & accept child while parent is in sockmap */ + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + p = xaccept(s, NULL, NULL); + if (p == -1) + goto close_cli; + + /* Check that child sk_user_data is not set */ + value = p; + xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST); + + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +struct connect_accept_ctx { + int sockfd; + unsigned int done; + unsigned int nr_iter; +}; + +static bool is_thread_done(struct connect_accept_ctx *ctx) +{ + return READ_ONCE(ctx->done); +} + +static void *connect_accept_thread(void *arg) +{ + struct connect_accept_ctx *ctx = arg; + struct sockaddr_storage addr; + int family, socktype; + socklen_t len; + int err, i, s; + + s = ctx->sockfd; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto done; + + len = sizeof(family); + err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len); + if (err) + goto done; + + len = sizeof(socktype); + err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len); + if (err) + goto done; + + for (i = 0; i < ctx->nr_iter; i++) { + int c, p; + + c = xsocket(family, socktype, 0); + if (c < 0) + break; + + err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr)); + if (err) { + xclose(c); + break; + } + + p = xaccept(s, NULL, NULL); + if (p < 0) { + xclose(c); + break; + } + + xclose(p); + xclose(c); + } +done: + WRITE_ONCE(ctx->done, 1); + return NULL; +} + +static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) +{ + struct connect_accept_ctx ctx = { 0 }; + struct sockaddr_storage addr; + socklen_t len; + u32 zero = 0; + pthread_t t; + int err, s; + u64 value; + + s = listen_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close; + + ctx.sockfd = s; + ctx.nr_iter = 1000; + + err = xpthread_create(&t, NULL, connect_accept_thread, &ctx); + if (err) + goto close; + + value = s; + while (!is_thread_done(&ctx)) { + err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + if (err) + break; + + err = xbpf_map_delete_elem(mapfd, &zero); + if (err) + break; + } + + xpthread_join(t, NULL); +close: + xclose(s); +} + +static void *listen_thread(void *arg) +{ + struct sockaddr unspec = { AF_UNSPEC }; + struct connect_accept_ctx *ctx = arg; + int err, i, s; + + s = ctx->sockfd; + + for (i = 0; i < ctx->nr_iter; i++) { + err = xlisten(s, 1); + if (err) + break; + err = xconnect(s, &unspec, sizeof(unspec)); + if (err) + break; + } + + WRITE_ONCE(ctx->done, 1); + return NULL; +} + +static void test_race_insert_listen(int family, int socktype, int mapfd) +{ + struct connect_accept_ctx ctx = { 0 }; + const u32 zero = 0; + const int one = 1; + pthread_t t; + int err, s; + u64 value; + + s = xsocket(family, socktype, 0); + if (s < 0) + return; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + if (err) + goto close; + + ctx.sockfd = s; + ctx.nr_iter = 10000; + + err = pthread_create(&t, NULL, listen_thread, &ctx); + if (err) + goto close; + + value = s; + while (!is_thread_done(&ctx)) { + err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + /* Expecting EOPNOTSUPP before listen() */ + if (err && errno != EOPNOTSUPP) { + FAIL_ERRNO("map_update"); + break; + } + + err = bpf_map_delete_elem(mapfd, &zero); + /* Expecting no entry after unhash on connect(AF_UNSPEC) */ + if (err && errno != EINVAL && errno != ENOENT) { + FAIL_ERRNO("map_delete"); + break; + } + } + + xpthread_join(t, NULL); +close: + xclose(s); +} + +static void zero_verdict_count(int mapfd) +{ + unsigned int zero = 0; + int key; + + key = SK_DROP; + xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); + key = SK_PASS; + xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); +} + +enum redir_mode { + REDIR_INGRESS, + REDIR_EGRESS, +}; + +static const char *redir_mode_str(enum redir_mode mode) +{ + switch (mode) { + case REDIR_INGRESS: + return "ingress"; + case REDIR_EGRESS: + return "egress"; + default: + return "unknown"; + } +} + +static void redir_to_connected(int family, int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + struct sockaddr_storage addr; + int s, c0, c1, p0, p1; + unsigned int pass; + socklen_t len; + int err, n; + u64 value; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + s = listen_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + c0 = xsocket(family, sotype, 0); + if (c0 < 0) + goto close_srv; + err = xconnect(c0, sockaddr(&addr), len); + if (err) + goto close_cli0; + + p0 = xaccept(s, NULL, NULL); + if (p0 < 0) + goto close_cli0; + + c1 = xsocket(family, sotype, 0); + if (c1 < 0) + goto close_peer0; + err = xconnect(c1, sockaddr(&addr), len); + if (err) + goto close_cli1; + + p1 = xaccept(s, NULL, NULL); + if (p1 < 0) + goto close_cli1; + + key = 0; + value = p0; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer1; + + key = 1; + value = p1; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer1; + + n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close_peer1; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close_peer1; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + + n = read(c0, &b, 1); + if (n < 0) + FAIL_ERRNO("%s: read", log_prefix); + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close_peer1: + xclose(p1); +close_cli1: + xclose(c1); +close_peer0: + xclose(p0); +close_cli0: + xclose(c0); +close_srv: + xclose(s); +} + +static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int parser = bpf_program__fd(skel->progs.prog_skb_parser); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return; + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + goto detach; + + redir_to_connected(family, sotype, sock_map, verdict_map, + REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); +detach: + xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); +} + +static void test_msg_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); + if (err) + return; + + redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); +} + +static void redir_to_listening(int family, int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + struct sockaddr_storage addr; + int s, c, p, err, n; + unsigned int drop; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_mapfd); + + s = listen_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv; + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + p = xaccept(s, NULL, NULL); + if (p < 0) + goto close_cli; + + key = 0; + value = s; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer; + + key = 1; + value = p; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer; + + n = write(mode == REDIR_INGRESS ? c : p, "a", 1); + if (n < 0 && errno != EACCES) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close_peer; + + key = SK_DROP; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop); + if (err) + goto close_peer; + if (drop != 1) + FAIL("%s: want drop count 1, have %d", log_prefix, drop); + +close_peer: + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int parser = bpf_program__fd(skel->progs.prog_skb_parser); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return; + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + goto detach; + + redir_to_listening(family, sotype, sock_map, verdict_map, + REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); +detach: + xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); +} + +static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); + if (err) + return; + + redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); +} + +static void test_reuseport_select_listening(int family, int sotype, + int sock_map, int verd_map, + int reuseport_prog) +{ + struct sockaddr_storage addr; + unsigned int pass; + int s, c, p, err; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_map); + + s = listen_loopback_reuseport(family, sotype, reuseport_prog); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + key = 0; + value = s; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv; + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + p = xaccept(s, NULL, NULL); + if (p < 0) + goto close_cli; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_map, &key, &pass); + if (err) + goto close_peer; + if (pass != 1) + FAIL("want pass count 1, have %d", pass); + +close_peer: + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +static void test_reuseport_select_connected(int family, int sotype, + int sock_map, int verd_map, + int reuseport_prog) +{ + struct sockaddr_storage addr; + int s, c0, c1, p0, err; + unsigned int drop; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_map); + + s = listen_loopback_reuseport(family, sotype, reuseport_prog); + if (s < 0) + return; + + /* Populate sock_map[0] to avoid ENOENT on first connection */ + key = 0; + value = s; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + c0 = xsocket(family, sotype, 0); + if (c0 < 0) + goto close_srv; + + err = xconnect(c0, sockaddr(&addr), len); + if (err) + goto close_cli0; + + p0 = xaccept(s, NULL, NULL); + if (err) + goto close_cli0; + + /* Update sock_map[0] to redirect to a connected socket */ + key = 0; + value = p0; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST); + if (err) + goto close_peer0; + + c1 = xsocket(family, sotype, 0); + if (c1 < 0) + goto close_peer0; + + errno = 0; + err = connect(c1, sockaddr(&addr), len); + if (!err || errno != ECONNREFUSED) + FAIL_ERRNO("connect: expected ECONNREFUSED"); + + key = SK_DROP; + err = xbpf_map_lookup_elem(verd_map, &key, &drop); + if (err) + goto close_cli1; + if (drop != 1) + FAIL("want drop count 1, have %d", drop); + +close_cli1: + xclose(c1); +close_peer0: + xclose(p0); +close_cli0: + xclose(c0); +close_srv: + xclose(s); +} + +/* Check that redirecting across reuseport groups is not allowed. */ +static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, + int verd_map, int reuseport_prog) +{ + struct sockaddr_storage addr; + int s1, s2, c, err; + unsigned int drop; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_map); + + /* Create two listeners, each in its own reuseport group */ + s1 = listen_loopback_reuseport(family, sotype, reuseport_prog); + if (s1 < 0) + return; + + s2 = listen_loopback_reuseport(family, sotype, reuseport_prog); + if (s2 < 0) + goto close_srv1; + + key = 0; + value = s1; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + if (err) + goto close_srv2; + + key = 1; + value = s2; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + + /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ + len = sizeof(addr); + err = xgetsockname(s2, sockaddr(&addr), &len); + if (err) + goto close_srv2; + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv2; + + err = connect(c, sockaddr(&addr), len); + if (err && errno != ECONNREFUSED) { + FAIL_ERRNO("connect: expected ECONNREFUSED"); + goto close_cli; + } + + /* Expect drop, can't redirect outside of reuseport group */ + key = SK_DROP; + err = xbpf_map_lookup_elem(verd_map, &key, &drop); + if (err) + goto close_cli; + if (drop != 1) + FAIL("want drop count 1, have %d", drop); + +close_cli: + xclose(c); +close_srv2: + xclose(s2); +close_srv1: + xclose(s1); +} + +#define TEST(fn) \ + { \ + fn, #fn \ + } + +static void test_ops_cleanup(const struct bpf_map *map) +{ + const struct bpf_map_def *def; + int err, mapfd; + u32 key; + + def = bpf_map__def(map); + mapfd = bpf_map__fd(map); + + for (key = 0; key < def->max_entries; key++) { + err = bpf_map_delete_elem(mapfd, &key); + if (err && errno != EINVAL && errno != ENOENT) + FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); + } +} + +static const char *family_str(sa_family_t family) +{ + switch (family) { + case AF_INET: + return "IPv4"; + case AF_INET6: + return "IPv6"; + default: + return "unknown"; + } +} + +static const char *map_type_str(const struct bpf_map *map) +{ + const struct bpf_map_def *def; + + def = bpf_map__def(map); + if (IS_ERR(def)) + return "invalid"; + + switch (def->type) { + case BPF_MAP_TYPE_SOCKMAP: + return "sockmap"; + case BPF_MAP_TYPE_SOCKHASH: + return "sockhash"; + default: + return "unknown"; + } +} + +static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, + int family, int sotype) +{ + const struct op_test { + void (*fn)(int family, int sotype, int mapfd); + const char *name; + } tests[] = { + /* insert */ + TEST(test_insert_invalid), + TEST(test_insert_opened), + TEST(test_insert_bound), + TEST(test_insert_listening), + /* delete */ + TEST(test_delete_after_insert), + TEST(test_delete_after_close), + /* lookup */ + TEST(test_lookup_after_insert), + TEST(test_lookup_after_delete), + TEST(test_lookup_32_bit_value), + /* update */ + TEST(test_update_listening), + /* races with insert/delete */ + TEST(test_destroy_orphan_child), + TEST(test_syn_recv_insert_delete), + TEST(test_race_insert_listen), + /* child clone */ + TEST(test_clone_after_delete), + TEST(test_accept_after_delete), + TEST(test_accept_before_delete), + }; + const char *family_name, *map_name; + const struct op_test *t; + char s[MAX_TEST_NAME]; + int map_fd; + + family_name = family_str(family); + map_name = map_type_str(map); + map_fd = bpf_map__fd(map); + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, + t->name); + + if (!test__start_subtest(s)) + continue; + + t->fn(family, sotype, map_fd); + test_ops_cleanup(map); + } +} + +static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family, int sotype) +{ + const struct redir_test { + void (*fn)(struct test_sockmap_listen *skel, + struct bpf_map *map, int family, int sotype); + const char *name; + } tests[] = { + TEST(test_skb_redir_to_connected), + TEST(test_skb_redir_to_listening), + TEST(test_msg_redir_to_connected), + TEST(test_msg_redir_to_listening), + }; + const char *family_name, *map_name; + const struct redir_test *t; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, + t->name); + if (!test__start_subtest(s)) + continue; + + t->fn(skel, map, family, sotype); + } +} + +static void test_reuseport(struct test_sockmap_listen *skel, + struct bpf_map *map, int family, int sotype) +{ + const struct reuseport_test { + void (*fn)(int family, int sotype, int socket_map, + int verdict_map, int reuseport_prog); + const char *name; + } tests[] = { + TEST(test_reuseport_select_listening), + TEST(test_reuseport_select_connected), + TEST(test_reuseport_mixed_groups), + }; + int socket_map, verdict_map, reuseport_prog; + const char *family_name, *map_name; + const struct reuseport_test *t; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + + socket_map = bpf_map__fd(map); + verdict_map = bpf_map__fd(skel->maps.verdict_map); + reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, + t->name); + + if (!test__start_subtest(s)) + continue; + + t->fn(family, sotype, socket_map, verdict_map, reuseport_prog); + } +} + +static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) +{ + test_ops(skel, map, family, SOCK_STREAM); + test_redir(skel, map, family, SOCK_STREAM); + test_reuseport(skel, map, family, SOCK_STREAM); +} + +void test_sockmap_listen(void) +{ + struct test_sockmap_listen *skel; + + skel = test_sockmap_listen__open_and_load(); + if (!skel) { + FAIL("skeleton open/load failed"); + return; + } + + skel->bss->test_sockmap = true; + run_tests(skel, skel->maps.sock_map, AF_INET); + run_tests(skel, skel->maps.sock_map, AF_INET6); + + skel->bss->test_sockmap = false; + run_tests(skel, skel->maps.sock_hash, AF_INET); + run_tests(skel, skel->maps.sock_hash, AF_INET6); + + test_sockmap_listen__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c new file mode 100644 index 000000000000..a3a366c57ce1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare + +#include +#include +#include + +#include + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, unsigned int); +} verdict_map SEC(".maps"); + +static volatile bool test_sockmap; /* toggled by user-space */ + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + unsigned int *count; + __u32 zero = 0; + int verdict; + + if (test_sockmap) + verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0); + else + verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0); + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + +SEC("sk_msg") +int prog_msg_verdict(struct sk_msg_md *msg) +{ + unsigned int *count; + __u32 zero = 0; + int verdict; + + if (test_sockmap) + verdict = bpf_msg_redirect_map(msg, &sock_map, zero, 0); + else + verdict = bpf_msg_redirect_hash(msg, &sock_hash, &zero, 0); + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + +SEC("sk_reuseport") +int prog_reuseport(struct sk_reuseport_md *reuse) +{ + unsigned int *count; + int err, verdict; + __u32 zero = 0; + + if (test_sockmap) + err = bpf_sk_select_reuseport(reuse, &sock_map, &zero, 0); + else + err = bpf_sk_select_reuseport(reuse, &sock_hash, &zero, 0); + verdict = err ? SK_DROP : SK_PASS; + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 3c419a2cbc44761002cc6850b3966eec96afd8f8 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Sun, 23 Feb 2020 14:12:36 -0500 Subject: tc-testing: updated tdc tests for basic filter with u32 extended match rules Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/basic.json | 198 +++++++++++++++++++++ 1 file changed, 198 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index 222174a2de01..afb9187b46a7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -856,5 +856,203 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "47a0", + "name": "Add basic filter with u32 ematch u32/zero offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at 0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at 0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "849f", + "name": "Add basic filter with u32 ematch u32/positive offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227080/1ffff0f0 at 12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "d288", + "name": "Add basic filter with u32 ematch u32/missing offset", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0xffffffff at)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227788/ffffffff at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "4998", + "name": "Add basic filter with u32 ematch u32/missing AT keyword", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x77889900 0xfffff0f0 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77889900/fffff0f0 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "1f0a", + "name": "Add basic filter with u32 ematch u32/missing value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "848e", + "name": "Add basic filter with u32 ematch u32/non-numeric value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 zero 0xffff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "f748", + "name": "Add basic filter with u32 ematch u32/non-numeric mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11223344 mask at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223344/00000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "55a6", + "name": "Add basic filter with u32 ematch u32/negative offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aa00cc00/ff00ff00 at -12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7282", + "name": "Add basic filter with u32 ematch u32/nexthdr+ offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] -- cgit v1.2.3 From f09ab268bbb26d5d851636801cafe73456ff73ab Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:15 +0100 Subject: KVM: selftests: aarch64: Use stream when given I'm not sure how we ended up using printf instead of fprintf in virt_dump(). Fix it. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/processor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 86036a59a668..f9decadfbe71 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -197,7 +197,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p ptep = addr_gpa2hva(vm, pte); if (!*ptep) continue; - printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep); + fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep); pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1); } #endif @@ -215,7 +215,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; - printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep); + fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep); pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level); } } -- cgit v1.2.3 From 10d1a71b164e497587fd4d0bbf2537568d9f01d9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:13 +0100 Subject: KVM: selftests: Remove unnecessary defines BITS_PER_LONG and friends are provided by linux/bitops.h Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util_internal.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index ac50c42750cf..2fce6750b8b3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -12,17 +12,6 @@ #define KVM_DEV_PATH "/dev/kvm" -#ifndef BITS_PER_BYTE -#define BITS_PER_BYTE 8 -#endif - -#ifndef BITS_PER_LONG -#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) -#endif - -#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) - struct userspace_mem_region { struct userspace_mem_region *next, *prev; struct kvm_userspace_memory_region region; -- cgit v1.2.3 From 12c0d0f6d9df3a657817d639d236f3a9755640e4 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:14 +0100 Subject: KVM: selftests: aarch64: Remove unnecessary ifdefs Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 5614222a6628..3146302ac563 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -341,9 +341,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, #ifdef __x86_64__ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); #endif -#ifdef __aarch64__ ucall_init(vm, NULL); -#endif /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); @@ -433,9 +431,6 @@ int main(int argc, char *argv[]) uint64_t phys_offset = 0; unsigned int mode; int opt, i; -#ifdef __aarch64__ - unsigned int host_ipa_limit; -#endif #ifdef USE_CLEAR_DIRTY_LOG if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) { @@ -450,13 +445,15 @@ int main(int argc, char *argv[]) #ifdef __aarch64__ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); vm_guest_mode_params_init(VM_MODE_P40V48_64K, true, true); - - host_ipa_limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - if (host_ipa_limit >= 52) - vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true); - if (host_ipa_limit >= 48) { - vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true); - vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); + } } #endif #ifdef __s390x__ -- cgit v1.2.3 From f832485df2d46a43c2ef10be2676e3b5b5c7e7bb Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:18 +0100 Subject: KVM: selftests: Rename vm_guest_mode_params We're going to want this name in the library code, so use a shorter name in the tests. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 34 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 3146302ac563..e0f3337dfccb 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -386,15 +386,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct vm_guest_mode_params { +struct guest_mode { bool supported; bool enabled; }; -struct vm_guest_mode_params vm_guest_mode_params[NUM_VM_MODES]; +static struct guest_mode guest_modes[NUM_VM_MODES]; -#define vm_guest_mode_params_init(mode, supported, enabled) \ -({ \ - vm_guest_mode_params[mode] = (struct vm_guest_mode_params){ supported, enabled }; \ +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ }) static void help(char *name) @@ -417,7 +416,7 @@ static void help(char *name) " Guest mode IDs:\n"); for (i = 0; i < NUM_VM_MODES; ++i) { printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - vm_guest_mode_params[i].supported ? " (supported)" : ""); + guest_modes[i].supported ? " (supported)" : ""); } puts(""); exit(0); @@ -440,24 +439,25 @@ int main(int argc, char *argv[]) #endif #ifdef __x86_64__ - vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true); + guest_mode_init(VM_MODE_PXXV48_4K, true, true); #endif #ifdef __aarch64__ - vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); - vm_guest_mode_params_init(VM_MODE_P40V48_64K, true, true); + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); if (limit >= 52) - vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true); + guest_mode_init(VM_MODE_P52V48_64K, true, true); if (limit >= 48) { - vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true); - vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); } } #endif #ifdef __s390x__ - vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { @@ -474,13 +474,13 @@ int main(int argc, char *argv[]) case 'm': if (!mode_selected) { for (i = 0; i < NUM_VM_MODES; ++i) - vm_guest_mode_params[i].enabled = false; + guest_modes[i].enabled = false; mode_selected = true; } mode = strtoul(optarg, NULL, 10); TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode); - vm_guest_mode_params[mode].enabled = true; + guest_modes[mode].enabled = true; break; case 'h': default: @@ -498,9 +498,9 @@ int main(int argc, char *argv[]) srandom(time(0)); for (i = 0; i < NUM_VM_MODES; ++i) { - if (!vm_guest_mode_params[i].enabled) + if (!guest_modes[i].enabled) continue; - TEST_ASSERT(vm_guest_mode_params[i].supported, + TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); run_test(i, iterations, interval, phys_offset); -- cgit v1.2.3 From 377a41c9ef84181bff5a3af2da9dfd21d6a08911 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:19 +0100 Subject: KVM: selftests: Introduce vm_guest_mode_params This array will allow us to easily translate modes to their parameter values. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 52 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a6dd0401eb50..1b133583d6c7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -113,6 +113,25 @@ const char * const vm_guest_mode_string[] = { _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; + +static const struct vm_guest_mode_params vm_guest_mode_params[] = { + { 52, 48, 0x1000, 12 }, + { 52, 48, 0x10000, 16 }, + { 48, 48, 0x1000, 12 }, + { 48, 48, 0x10000, 16 }, + { 40, 48, 0x1000, 12 }, + { 40, 48, 0x10000, 16 }, + { 0, 0, 0x1000, 12 }, +}; +_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, + "Missing new mode params?"); + /* * VM Create * @@ -144,60 +163,39 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->mode = mode; vm->type = 0; + vm->pa_bits = vm_guest_mode_params[mode].pa_bits; + vm->va_bits = vm_guest_mode_params[mode].va_bits; + vm->page_size = vm_guest_mode_params[mode].page_size; + vm->page_shift = vm_guest_mode_params[mode].page_shift; + /* Setup mode specific traits. */ switch (vm->mode) { case VM_MODE_P52V48_4K: vm->pgtable_levels = 4; - vm->pa_bits = 52; - vm->va_bits = 48; - vm->page_size = 0x1000; - vm->page_shift = 12; break; case VM_MODE_P52V48_64K: vm->pgtable_levels = 3; - vm->pa_bits = 52; - vm->va_bits = 48; - vm->page_size = 0x10000; - vm->page_shift = 16; break; case VM_MODE_P48V48_4K: vm->pgtable_levels = 4; - vm->pa_bits = 48; - vm->va_bits = 48; - vm->page_size = 0x1000; - vm->page_shift = 12; break; case VM_MODE_P48V48_64K: vm->pgtable_levels = 3; - vm->pa_bits = 48; - vm->va_bits = 48; - vm->page_size = 0x10000; - vm->page_shift = 16; break; case VM_MODE_P40V48_4K: vm->pgtable_levels = 4; - vm->pa_bits = 40; - vm->va_bits = 48; - vm->page_size = 0x1000; - vm->page_shift = 12; break; case VM_MODE_P40V48_64K: vm->pgtable_levels = 3; - vm->pa_bits = 40; - vm->va_bits = 48; - vm->page_size = 0x10000; - vm->page_shift = 16; break; case VM_MODE_PXXV48_4K: #ifdef __x86_64__ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); TEST_ASSERT(vm->va_bits == 48, "Linear address width " "(%d bits) not supported", vm->va_bits); - vm->pgtable_levels = 4; - vm->page_size = 0x1000; - vm->page_shift = 12; DEBUG("Guest physical address width detected: %d\n", vm->pa_bits); + vm->pgtable_levels = 4; #else TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " "non-x86 platforms"); -- cgit v1.2.3 From 87a802d93e7ef55216d8884fdf7e5f491a6fe501 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:20 +0100 Subject: KVM: selftests: Introduce num-pages conversion utilities Guests and hosts don't have to have the same page size. This means calculations are necessary when selecting the number of guest pages to allocate in order to ensure the number is compatible with the host. Provide utilities to help with those calculations and apply them where appropriate. We also revert commit bffed38d4fb5 ("kvm: selftests: aarch64: dirty_log_test: fix unaligned memslot size") and then use vm_adjust_num_guest_pages() there instead. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 13 +++++---- tools/testing/selftests/kvm/include/kvm_util.h | 8 ++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 37 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index e0f3337dfccb..edc5c071bf02 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -178,12 +178,11 @@ static void *vcpu_worker(void *data) return NULL; } -static void vm_dirty_log_verify(unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) { + uint64_t step = vm_num_host_pages(mode, 1); uint64_t page; uint64_t *value_ptr; - uint64_t step = host_page_size >= guest_page_size ? 1 : - guest_page_size / host_page_size; for (page = 0; page < host_num_pages; page += step) { value_ptr = host_test_mem + page * host_page_size; @@ -289,14 +288,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (DIRTY_MEM_BITS - - vm_get_page_shift(vm))) + 16; + vm_get_page_shift(vm))) + 3; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; #endif host_page_size = getpagesize(); - host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + - !!((guest_num_pages * guest_page_size) % host_page_size); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); if (!phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - @@ -367,7 +366,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, host_num_pages); #endif - vm_dirty_log_verify(bmap); + vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); } diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index ae0d14c2540a..1dc13bfa88b7 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -164,6 +164,14 @@ unsigned int vm_get_page_size(struct kvm_vm *vm); unsigned int vm_get_page_shift(struct kvm_vm *vm); unsigned int vm_get_max_gfn(struct kvm_vm *vm); +unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); +static inline unsigned int +vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + return vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +} + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1b133583d6c7..67f5dc9a6a32 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -580,6 +580,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; size_t alignment; + TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, + "Number of guest pages is not compatible with the host. " + "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); + TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" " guest_paddr: 0x%lx vm->page_size: 0x%x", @@ -1701,3 +1705,36 @@ unsigned int vm_get_max_gfn(struct kvm_vm *vm) { return vm->max_gfn; } + +static unsigned int vm_calc_num_pages(unsigned int num_pages, + unsigned int page_shift, + unsigned int new_page_shift, + bool ceil) +{ + unsigned int n = 1 << (new_page_shift - page_shift); + + if (page_shift >= new_page_shift) + return num_pages * (1 << (page_shift - new_page_shift)); + + return num_pages / n + !!(ceil && num_pages % n); +} + +static inline int getpageshift(void) +{ + return __builtin_ffs(getpagesize()) - 1; +} + +unsigned int +vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + return vm_calc_num_pages(num_guest_pages, + vm_guest_mode_params[mode].page_shift, + getpageshift(), true); +} + +unsigned int +vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) +{ + return vm_calc_num_pages(num_host_pages, getpageshift(), + vm_guest_mode_params[mode].page_shift, false); +} -- cgit v1.2.3 From 025eed7b3519be30cc2310711137ab4ff827fbe3 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 23 Jan 2020 10:04:27 -0800 Subject: KVM: selftests: Create a demand paging test While userfaultfd, KVM's demand paging implementation, is not specific to KVM, having a benchmark for its performance will be useful for guiding performance improvements to KVM. As a first step towards creating a userfaultfd demand paging test, create a simple memory access test, based on dirty_log_test. Reviewed-by: Oliver Upton Signed-off-by: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 + tools/testing/selftests/kvm/demand_paging_test.c | 283 +++++++++++++++++++++++ 3 files changed, 287 insertions(+) create mode 100644 tools/testing/selftests/kvm/demand_paging_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 30072c3f52fb..9619d96e15c4 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -17,3 +17,4 @@ /clear_dirty_log_test /dirty_log_test /kvm_create_max_vcpus +/demand_paging_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index d91c53b726e6..1bc9f41d3fcd 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -29,16 +29,19 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/resets TEST_GEN_PROGS_s390x += dirty_log_test +TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c new file mode 100644 index 000000000000..e3d49172e2c3 --- /dev/null +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM demand paging test + * Adapted from dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 1 + +/* The memory slot index demand page */ +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +/* + * Guest/Host shared variables. Ensure addr_gva2hva() and/or + * sync_global_to/from_guest() are used when accessing from + * the host. READ/WRITE_ONCE() should also be used with anything + * that may change. + */ +static uint64_t host_page_size; +static uint64_t guest_page_size; +static uint64_t guest_num_pages; + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of each page in the demand paging + * memory region. + */ +static void guest_code(void) +{ + int i; + + for (i = 0; i < guest_num_pages; i++) { + uint64_t addr = guest_test_virt_mem; + + addr += i * guest_page_size; + addr &= ~(host_page_size - 1); + *(uint64_t *)addr = 0x0123456789ABCDEF; + } + + GUEST_SYNC(1); +} + +/* Points to the test VM memory region on which we are doing demand paging */ +static void *host_test_mem; +static uint64_t host_num_pages; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = data; + struct kvm_run *run; + + run = vcpu_state(vm, VCPU_ID); + + /* Let the guest access its memory */ + ret = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + if (get_ucall(vm, VCPU_ID, NULL) != UCALL_SYNC) { + TEST_ASSERT(false, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } + + return NULL; +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, + uint64_t extra_mem_pages, void *guest_code) +{ + struct kvm_vm *vm; + uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + vm_vcpu_add_default(vm, vcpuid, guest_code); + return vm; +} + +#define GUEST_MEM_SHIFT 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + +static void run_test(enum vm_guest_mode mode) +{ + pthread_t vcpu_thread; + struct kvm_vm *vm; + + /* + * We reserve page table for 2 times of extra dirty mem which + * will definitely cover the original (1G+) test range. Here + * we do the calculation with 4K page size which is the + * smallest so the page number will be enough for all archs + * (e.g., 64K page size guest will need even less memory for + * page tables). + */ + vm = create_vm(mode, VCPU_ID, + 2ul << (GUEST_MEM_SHIFT - PAGE_SHIFT_4K), + guest_code); + + guest_page_size = vm_get_page_size(vm); + /* + * A little more than 1G of guest page sized pages. Cover the + * case where the size is not aligned to 64 pages. + */ + guest_num_pages = (1ul << (GUEST_MEM_SHIFT - + vm_get_page_shift(vm))) + 16; +#ifdef __s390x__ + /* Round up to multiple of 1M (segment size) */ + guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; +#endif + + host_page_size = getpagesize(); + host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + + !!((guest_num_pages * guest_page_size) % + host_page_size); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + guest_page_size; + guest_test_phys_mem &= ~(host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + DEBUG("guest physical test memory offset: 0x%lx\n", + guest_test_phys_mem); + + + /* Add an extra memory slot for testing demand paging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, + guest_num_pages * guest_page_size, 0); + + /* Cache the HVA pointer of the region */ + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); +#endif +#ifdef __aarch64__ + ucall_init(vm, NULL); +#endif + + /* Export the shared variables to the guest */ + sync_global_to_guest(vm, host_page_size); + sync_global_to_guest(vm, guest_page_size); + sync_global_to_guest(vm, guest_test_virt_mem); + sync_global_to_guest(vm, guest_num_pages); + + pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); + + /* Wait for the vcpu thread to quit */ + pthread_join(vcpu_thread, NULL); + + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-m mode]\n", name); + printf(" -m: specify the guest mode ID to test\n" + " (default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + bool mode_selected = false; + unsigned int mode; + int opt, i; + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hm:")) != -1) { + switch (opt) { + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i); + } + + return 0; +} -- cgit v1.2.3 From 0facf109f69b7b6309de27e1b5350859b040c1a5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 24 Feb 2020 08:35:56 +0100 Subject: selftests: introduce test for mlxsw tc flower restrictions Include test of forbidding to have drop rule on mixed-bound shared block. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/tc_flower_restrictions.sh | 100 +++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh new file mode 100755 index 000000000000..58419c3a7d99 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="shared_block_drop_test" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +switch_create() +{ + simple_if_init $swp1 192.0.2.1/24 + simple_if_init $swp2 192.0.2.2/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.2/24 + simple_if_fini $swp1 192.0.2.1/24 +} + +shared_block_drop_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have mixed-bound + # shared block with a drop rule. + + tc qdisc add dev $swp1 ingress_block 22 clsact + check_err $? "Failed to create clsact with ingress block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add drop rule to ingress bound block" + + tc qdisc add dev $swp2 ingress_block 22 clsact + check_err $? "Failed to create another clsact with ingress shared block" + + tc qdisc del dev $swp2 clsact + + tc qdisc add dev $swp2 egress_block 22 clsact + check_fail $? "Incorrect success to create another clsact with egress shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc add dev $swp2 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add drop rule to mixed bound block" + + tc qdisc del dev $swp1 clsact + + tc qdisc add dev $swp1 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add drop rule to egress bound shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + log_test "shared block drop" +} + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + vrf_prepare + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + vrf_cleanup +} + +check_tc_shblock_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From c902a52c404835b240ccf194be573ef17110a18b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 24 Feb 2020 08:35:57 +0100 Subject: selftests: pass pref and handle to devlink_trap_drop_* helpers Currently the helpers assume pref 1 and handle 101. Make that explicit and pass the values from callers. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/devlink_trap_l2_drops.sh | 28 +++++++------- .../drivers/net/mlxsw/devlink_trap_l3_drops.sh | 44 +++++++++++----------- .../drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh | 4 +- .../selftests/net/forwarding/devlink_lib.sh | 7 +++- 4 files changed, 43 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index 58cdbfb608e9..e7aecb065409 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -107,11 +107,11 @@ source_mac_is_multicast_test() RET=0 - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 log_test "Source MAC is multicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 } __vlan_tag_mismatch_test() @@ -132,7 +132,7 @@ __vlan_tag_mismatch_test() $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Add PVID and make sure packets are no longer dropped. bridge vlan add vid 1 dev $swp1 pvid untagged master @@ -148,7 +148,7 @@ __vlan_tag_mismatch_test() devlink_trap_action_set $trap_name "drop" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 } vlan_tag_mismatch_untagged_test() @@ -193,7 +193,7 @@ ingress_vlan_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Add the VLAN on the bridge port and make sure packets are no longer # dropped. @@ -212,7 +212,7 @@ ingress_vlan_filter_test() log_test "Ingress VLAN filter" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 bridge vlan del vid $vid dev $swp1 master bridge vlan del vid $vid dev $swp2 master @@ -237,7 +237,7 @@ __ingress_stp_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Change STP state to forwarding and make sure packets are no longer # dropped. @@ -254,7 +254,7 @@ __ingress_stp_filter_test() devlink_trap_action_set $trap_name "drop" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 bridge vlan del vid $vid dev $swp1 master bridge vlan del vid $vid dev $swp2 master @@ -308,7 +308,7 @@ port_list_is_empty_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave flood on @@ -326,7 +326,7 @@ port_list_is_empty_uc_test() log_test "Port list is empty - unicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 ip link set dev $swp1 type bridge_slave flood on } @@ -354,7 +354,7 @@ port_list_is_empty_mc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave mcast_flood on @@ -372,7 +372,7 @@ port_list_is_empty_mc_test() log_test "Port list is empty - multicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 ip link set dev $swp1 type bridge_slave mcast_flood on } @@ -401,7 +401,7 @@ port_loopback_filter_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Allow packets to be flooded. ip link set dev $swp2 type bridge_slave flood on @@ -419,7 +419,7 @@ port_loopback_filter_uc_test() log_test "Port loopback filter - unicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 } port_loopback_filter_test() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index d88d8e47d11b..053e5c7b303d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -176,11 +176,11 @@ non_ip_test() 00:00 de:ad:be:ef" & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Non IP" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } __uc_dip_over_mc_dmac_test() @@ -206,11 +206,11 @@ __uc_dip_over_mc_dmac_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Unicast destination IP over multicast destination MAC: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } uc_dip_over_mc_dmac_test() @@ -242,11 +242,11 @@ __sip_is_loopback_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Source IP is loopback address: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } sip_is_loopback_test() @@ -277,11 +277,11 @@ __dip_is_loopback_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Destination IP is loopback address: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } dip_is_loopback_test() @@ -313,11 +313,11 @@ __sip_is_mc_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Source IP is multicast: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } sip_is_mc_test() @@ -345,11 +345,11 @@ ipv4_sip_is_limited_bc_test() -B $h2_ipv4 -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IPv4 source IP is limited broadcast" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } ipv4_payload_get() @@ -399,11 +399,11 @@ __ipv4_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IP header corrupted: $desc: IPv4" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } ipv6_payload_get() @@ -446,11 +446,11 @@ __ipv6_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IP header corrupted: $desc: IPv6" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } ip_header_corrupted_test() @@ -485,11 +485,11 @@ ipv6_mc_dip_reserved_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IPv6 multicast destination IP reserved scope" - devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101 } ipv6_mc_dip_interface_local_scope_test() @@ -511,11 +511,11 @@ ipv6_mc_dip_interface_local_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IPv6 multicast destination IP interface-local scope" - devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101 } __blackhole_route_test() @@ -542,10 +542,10 @@ __blackhole_route_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Blackhole route: IPv$flags" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 ip -$flags route del blackhole $subnet } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index fd19161dd4ec..e11a416323cf 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -314,11 +314,11 @@ overlay_smac_is_mc_test() -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp1 + devlink_trap_drop_test $trap_name $group_name $swp1 101 log_test "Overlay source MAC is multicast" - devlink_trap_drop_cleanup $mz_pid $swp1 "ip" + devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101 } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 40b076983239..24798ae846de 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -373,6 +373,7 @@ devlink_trap_drop_test() local trap_name=$1; shift local group_name=$1; shift local dev=$1; shift + local handle=$1; shift # This is the common part of all the tests. It checks that stats are # initially idle, then non-idle after changing the trap action and @@ -397,7 +398,7 @@ devlink_trap_drop_test() devlink_trap_group_stats_idle_test $group_name check_err $? "Trap group stats not idle after setting action to drop" - tc_check_packets "dev $dev egress" 101 0 + tc_check_packets "dev $dev egress" $handle 0 check_err $? "Packets were not dropped" } @@ -406,7 +407,9 @@ devlink_trap_drop_cleanup() local mz_pid=$1; shift local dev=$1; shift local proto=$1; shift + local pref=$1; shift + local handle=$1; shift kill $mz_pid && wait $mz_pid &> /dev/null - tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower + tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } -- cgit v1.2.3 From e3294d2b15afdfe5e16de2b2d2bd9fae2048db55 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 24 Feb 2020 08:35:58 +0100 Subject: selftests: devlink_trap_acl_drops: Add ACL traps test Add a test to check functionality of ACL traps. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/devlink_trap_acl_drops.sh | 151 +++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh new file mode 100755 index 000000000000..26044e397157 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap ACL drops functionality over mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ingress_flow_action_drop_test + egress_flow_action_drop_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ingress_flow_action_drop_test() +{ + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower src_mac $h1mac action pass + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101 + + log_test "ingress_flow_action_drop" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 +} + +egress_flow_action_drop_test() +{ + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \ + flower src_mac $h1mac action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102 + + log_test "egress_flow_action_drop" + + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 779e422d11985e408fc148f0cca0f4b403f6c5fa Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 24 Feb 2020 14:53:26 +0100 Subject: selftests/bpf: Run reuseport tests only with supported socket types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SOCKMAP and SOCKHASH map types can be used with reuseport BPF programs but don't support yet storing UDP sockets. Instead of marking UDP tests with SOCK{MAP,HASH} as skipped, don't run them at all. Skipped test might signal that the test environment is not suitable for running the test, while in reality the functionality is not implemented in the kernel yet. Before: sh# ./test_progs -t select_reuseport … #40 select_reuseport:OK Summary: 1/126 PASSED, 30 SKIPPED, 0 FAILED After: sh# ./test_progs -t select_reuseport … #40 select_reuseport:OK Summary: 1/98 PASSED, 2 SKIPPED, 0 FAILED The remaining two skipped tests are SYN cookies tests, which will be addressed in the subsequent patch. Fixes: 11318ba8cafd ("selftests/bpf: Extend SK_REUSEPORT tests to cover SOCKMAP/SOCKHASH") Reported-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200224135327.121542-1-jakub@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 68d452bb9fd9..8c41d6d63fcf 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -807,6 +807,12 @@ static void test_config(int sotype, sa_family_t family, bool inany) char s[MAX_TEST_NAME]; const struct test *t; + /* SOCKMAP/SOCKHASH don't support UDP yet */ + if (sotype == SOCK_DGRAM && + (inner_map_type == BPF_MAP_TYPE_SOCKMAP || + inner_map_type == BPF_MAP_TYPE_SOCKHASH)) + return; + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { snprintf(s, sizeof(s), "%s %s/%s %s %s", maptype_str(inner_map_type), @@ -816,13 +822,6 @@ static void test_config(int sotype, sa_family_t family, bool inany) if (!test__start_subtest(s)) continue; - if (sotype == SOCK_DGRAM && - inner_map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { - /* SOCKMAP/SOCKHASH don't support UDP yet */ - test__skip(); - continue; - } - setup_per_test(sotype, family, inany, t->no_inner_map); t->fn(sotype, family); cleanup_per_test(t->no_inner_map); -- cgit v1.2.3 From e0360423d0204eb22f97ed89ba56da496bb9a094 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 24 Feb 2020 14:53:27 +0100 Subject: selftests/bpf: Run SYN cookies with reuseport BPF test only for TCP Currently we run SYN cookies test for all socket types and mark the test as skipped if socket type is not compatible. This causes confusion because skipped test might indicate a problem with the testing environment. Instead, run the test only for the socket type which supports SYN cookies. Also, switch to using designated initializers when setting up tests, so that we can tweak only some test parameters, leaving the rest initialized to default values. Fixes: eecd618b4516 ("selftests/bpf: Mark SYN cookie test skipped for UDP sockets") Reported-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200224135327.121542-2-jakub@cloudflare.com --- .../testing/selftests/bpf/prog_tests/select_reuseport.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 8c41d6d63fcf..a1dd13b34d4b 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -509,11 +509,6 @@ static void test_syncookie(int type, sa_family_t family) .pass_on_failure = 0, }; - if (type != SOCK_STREAM) { - test__skip(); - return; - } - /* * +1 for TCP-SYN and * +1 for the TCP-ACK (ack the syncookie) @@ -787,7 +782,7 @@ static const char *sotype_str(int sotype) } } -#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ } +#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ } static void test_config(int sotype, sa_family_t family, bool inany) { @@ -795,12 +790,15 @@ static void test_config(int sotype, sa_family_t family, bool inany) void (*fn)(int sotype, sa_family_t family); const char *name; bool no_inner_map; + int need_sotype; } tests[] = { - TEST_INIT(test_err_inner_map, true /* no_inner_map */), + TEST_INIT(test_err_inner_map, + .no_inner_map = true), TEST_INIT(test_err_skb_data), TEST_INIT(test_err_sk_select_port), TEST_INIT(test_pass), - TEST_INIT(test_syncookie), + TEST_INIT(test_syncookie, + .need_sotype = SOCK_STREAM), TEST_INIT(test_pass_on_err), TEST_INIT(test_detach_bpf), }; @@ -814,6 +812,9 @@ static void test_config(int sotype, sa_family_t family, bool inany) return; for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + if (t->need_sotype && t->need_sotype != sotype) + continue; /* test not compatible with socket type */ + snprintf(s, sizeof(s), "%s %s/%s %s %s", maptype_str(inner_map_type), family_str(family), sotype_str(sotype), -- cgit v1.2.3 From 7a3c3f4440d840a6d36acbf5f012146361e2c51f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:27 +0100 Subject: selftests: netdevsim: Extend devlink trap test to include flow action cookie Extend existing devlink trap test to include metadata type for flow action cookie. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index f101ab9441e2..437d32bd4cfd 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -103,6 +103,11 @@ trap_metadata_test() for trap_name in $(devlink_traps_get); do devlink_trap_metadata_test $trap_name "input_port" check_err $? "Input port not reported as metadata of trap $trap_name" + if [ $trap_name == "ingress_flow_action_drop" ] || + [ $trap_name == "egress_flow_action_drop" ]; then + devlink_trap_metadata_test $trap_name "flow_action_cookie" + check_err $? "Flow action cookie not reported as metadata of trap $trap_name" + fi done log_test "Trap metadata" -- cgit v1.2.3 From 9fb156bb82a33f01708fbbb8828836a4219efc3d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 24 Feb 2020 16:08:47 -0800 Subject: selftests/bpf: Print backtrace on SIGSEGV in test_progs Due to various bugs in tests clean up code (usually), if host system is misconfigured, it happens that test_progs will just crash in the middle of running a test with little to no indication of where and why the crash happened. For cases where coredump is not readily available (e.g., inside a CI), it's very helpful to have a stack trace, which lead to crash, to be printed out. This change adds a signal handler that will capture and print out symbolized backtrace: $ sudo ./test_progs -t mmap test_mmap:PASS:skel_open_and_load 0 nsec test_mmap:PASS:bss_mmap 0 nsec test_mmap:PASS:data_mmap 0 nsec Caught signal #11! Stack trace: ./test_progs(crash_handler+0x18)[0x42a888] /lib64/libpthread.so.0(+0xf5d0)[0x7f2aab5175d0] ./test_progs(test_mmap+0x3c0)[0x41f0a0] ./test_progs(main+0x160)[0x407d10] /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2aab15d3d5] ./test_progs[0x407ebc] [1] 1988412 segmentation fault (core dumped) sudo ./test_progs -t mmap Unfortunately, glibc's symbolization support is unable to symbolize static functions, only global ones will be present in stack trace. But it's still a step forward without adding extra libraries to get a better symbolization. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225000847.3965188-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_progs.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2a583196fa51..50c63c21e6fd 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,7 +20,7 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index bab1e6f1d8f1..a969c77e9456 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -6,6 +6,8 @@ #include "bpf_rlimit.h" #include #include +#include +#include /* backtrace */ /* defined in test_progs.h */ struct test_env env = {}; @@ -617,6 +619,23 @@ int cd_flavor_subdir(const char *exec_name) return chdir(flavor); } +#define MAX_BACKTRACE_SZ 128 +void crash_handler(int signum) +{ + void *bt[MAX_BACKTRACE_SZ]; + size_t sz; + + sz = backtrace(bt, ARRAY_SIZE(bt)); + + if (env.test) + dump_test_log(env.test, true); + if (env.stdout) + stdio_restore(); + + fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); + backtrace_symbols_fd(bt, sz, STDERR_FILENO); +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -624,8 +643,14 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; + struct sigaction sigact = { + .sa_handler = crash_handler, + .sa_flags = SA_RESETHAND, + }; int err, i; + sigaction(SIGSEGV, &sigact, NULL); + err = argp_parse(&argp, argc, argv, 0, NULL, &env); if (err) return err; -- cgit v1.2.3 From 3494bec0f6ac8ac06e0ad7c35933db345b2c5a83 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Tue, 25 Feb 2020 14:34:41 -0800 Subject: bpftool: Support struct_ops, tracing, ext prog types Add support for prog types that were added to kernel but not present in bpftool yet: struct_ops, tracing, ext prog types and corresponding section names. Before: # bpftool p l ... 184: type 26 name test_subprog3 tag dda135a7dc0daf54 gpl loaded_at 2020-02-25T13:28:33-0800 uid 0 xlated 112B jited 103B memlock 4096B map_ids 136 btf_id 85 185: type 28 name new_get_skb_len tag d2de5b87d8e5dc49 gpl loaded_at 2020-02-25T13:28:33-0800 uid 0 xlated 72B jited 69B memlock 4096B map_ids 136 btf_id 85 After: # bpftool p l ... 184: tracing name test_subprog3 tag dda135a7dc0daf54 gpl loaded_at 2020-02-25T13:28:33-0800 uid 0 xlated 112B jited 103B memlock 4096B map_ids 136 btf_id 85 185: ext name new_get_skb_len tag d2de5b87d8e5dc49 gpl loaded_at 2020-02-25T13:28:33-0800 uid 0 xlated 72B jited 69B memlock 4096B map_ids 136 btf_id 85 Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225223441.689109-1-rdna@fb.com --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 3 ++- tools/bpf/bpftool/bash-completion/bpftool | 3 ++- tools/bpf/bpftool/main.h | 3 +++ tools/bpf/bpftool/prog.c | 4 ++-- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 64ddf8a4c518..46862e85fed2 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -42,7 +42,8 @@ PROG COMMANDS | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** +| **cgroup/getsockopt** | **cgroup/setsockopt** | +| **struct_ops** | **fentry** | **fexit** | **freplace** | } | *ATTACH_TYPE* := { | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 754d8395e451..ad4133b1f0cf 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -469,7 +469,8 @@ _bpftool() cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ - cgroup/setsockopt" -- \ + cgroup/setsockopt struct_ops \ + fentry fexit freplace" -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 4e75b58d3989..724ef9d941d3 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -76,6 +76,9 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", }; extern const char * const map_type_name[]; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index b352ab041160..1996e67a2f00 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1573,8 +1573,8 @@ static int do_help(int argc, char **argv) " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n" - " cgroup/recvmsg6 | cgroup/getsockopt |\n" - " cgroup/setsockopt }\n" + " cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n" + " struct_ops | fentry | fexit | freplace }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" " " HELP_SPEC_OPTIONS "\n" -- cgit v1.2.3 From 6b52ca44e8af2c508ffbcd03261cf849a740756a Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Wed, 26 Feb 2020 17:59:35 +0100 Subject: bpftool: Move out sections to separate functions Remove all calls of print_end_then_start_section function and for loops out from the do_probe function. Instead, provide separate functions for each section (like i.e. section_helpers) which are called in do_probe. This change is motivated by better readability. Signed-off-by: Michal Rostecki Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200226165941.6379-2-mrostecki@opensuse.org --- tools/bpf/bpftool/feature.c | 219 +++++++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 93 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 941873d778d8..345e4a2b4f53 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -112,18 +112,12 @@ print_start_section(const char *json_title, const char *plain_title, } } -static void -print_end_then_start_section(const char *json_title, const char *plain_title, - const char *define_comment, - const char *define_prefix) +static void print_end_section(void) { if (json_output) jsonw_end_object(json_wtr); else printf("\n"); - - print_start_section(json_title, plain_title, define_comment, - define_prefix); } /* Probing functions */ @@ -584,13 +578,130 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex) res, define_prefix); } +static void +section_system_config(enum probe_component target, const char *define_prefix) +{ + switch (target) { + case COMPONENT_KERNEL: + case COMPONENT_UNSPEC: + if (define_prefix) + break; + + print_start_section("system_config", + "Scanning system configuration...", + NULL, /* define_comment never used here */ + NULL); /* define_prefix always NULL here */ + if (check_procfs()) { + probe_unprivileged_disabled(); + probe_jit_enable(); + probe_jit_harden(); + probe_jit_kallsyms(); + probe_jit_limit(); + } else { + p_info("/* procfs not mounted, skipping related probes */"); + } + probe_kernel_image_config(); + print_end_section(); + break; + default: + break; + } +} + +static bool section_syscall_config(const char *define_prefix) +{ + bool res; + + print_start_section("syscall_config", + "Scanning system call availability...", + "/*** System call availability ***/", + define_prefix); + res = probe_bpf_syscall(define_prefix); + print_end_section(); + + return res; +} + +static void +section_program_types(bool *supported_types, const char *define_prefix, + __u32 ifindex) +{ + unsigned int i; + + print_start_section("program_types", + "Scanning eBPF program types...", + "/*** eBPF program types ***/", + define_prefix); + + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + probe_prog_type(i, supported_types, define_prefix, ifindex); + + print_end_section(); +} + +static void section_map_types(const char *define_prefix, __u32 ifindex) +{ + unsigned int i; + + print_start_section("map_types", + "Scanning eBPF map types...", + "/*** eBPF map types ***/", + define_prefix); + + for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) + probe_map_type(i, define_prefix, ifindex); + + print_end_section(); +} + +static void +section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) +{ + unsigned int i; + + print_start_section("helpers", + "Scanning eBPF helper functions...", + "/*** eBPF helper functions ***/", + define_prefix); + + if (define_prefix) + printf("/*\n" + " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n" + " * to determine if is available for ,\n" + " * e.g.\n" + " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n" + " * // do stuff with this helper\n" + " * #elif\n" + " * // use a workaround\n" + " * #endif\n" + " */\n" + "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n" + " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", + define_prefix, define_prefix, define_prefix, + define_prefix); + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + probe_helpers_for_progtype(i, supported_types[i], + define_prefix, ifindex); + + print_end_section(); +} + +static void section_misc(const char *define_prefix, __u32 ifindex) +{ + print_start_section("misc", + "Scanning miscellaneous eBPF features...", + "/*** eBPF misc features ***/", + define_prefix); + probe_large_insn_limit(define_prefix, ifindex); + print_end_section(); +} + static int do_probe(int argc, char **argv) { enum probe_component target = COMPONENT_UNSPEC; const char *define_prefix = NULL; bool supported_types[128] = {}; __u32 ifindex = 0; - unsigned int i; char *ifname; /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN). @@ -658,97 +769,19 @@ static int do_probe(int argc, char **argv) jsonw_start_object(json_wtr); } - switch (target) { - case COMPONENT_KERNEL: - case COMPONENT_UNSPEC: - if (define_prefix) - break; - - print_start_section("system_config", - "Scanning system configuration...", - NULL, /* define_comment never used here */ - NULL); /* define_prefix always NULL here */ - if (check_procfs()) { - probe_unprivileged_disabled(); - probe_jit_enable(); - probe_jit_harden(); - probe_jit_kallsyms(); - probe_jit_limit(); - } else { - p_info("/* procfs not mounted, skipping related probes */"); - } - probe_kernel_image_config(); - if (json_output) - jsonw_end_object(json_wtr); - else - printf("\n"); - break; - default: - break; - } - - print_start_section("syscall_config", - "Scanning system call availability...", - "/*** System call availability ***/", - define_prefix); - - if (!probe_bpf_syscall(define_prefix)) + section_system_config(target, define_prefix); + if (!section_syscall_config(define_prefix)) /* bpf() syscall unavailable, don't probe other BPF features */ goto exit_close_json; - - print_end_then_start_section("program_types", - "Scanning eBPF program types...", - "/*** eBPF program types ***/", - define_prefix); - - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) - probe_prog_type(i, supported_types, define_prefix, ifindex); - - print_end_then_start_section("map_types", - "Scanning eBPF map types...", - "/*** eBPF map types ***/", - define_prefix); - - for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) - probe_map_type(i, define_prefix, ifindex); - - print_end_then_start_section("helpers", - "Scanning eBPF helper functions...", - "/*** eBPF helper functions ***/", - define_prefix); - - if (define_prefix) - printf("/*\n" - " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n" - " * to determine if is available for ,\n" - " * e.g.\n" - " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n" - " * // do stuff with this helper\n" - " * #elif\n" - " * // use a workaround\n" - " * #endif\n" - " */\n" - "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n" - " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", - define_prefix, define_prefix, define_prefix, - define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) - probe_helpers_for_progtype(i, supported_types[i], - define_prefix, ifindex); - - print_end_then_start_section("misc", - "Scanning miscellaneous eBPF features...", - "/*** eBPF misc features ***/", - define_prefix); - probe_large_insn_limit(define_prefix, ifindex); + section_program_types(supported_types, define_prefix, ifindex); + section_map_types(define_prefix, ifindex); + section_helpers(supported_types, define_prefix, ifindex); + section_misc(define_prefix, ifindex); exit_close_json: - if (json_output) { - /* End current "section" of probes */ - jsonw_end_object(json_wtr); + if (json_output) /* End root object */ jsonw_end_object(json_wtr); - } return 0; } -- cgit v1.2.3 From 368cb0e7cdb5e67ad8c0cd52f1d71341c42a7e4b Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Wed, 26 Feb 2020 17:59:36 +0100 Subject: bpftool: Make probes which emit dmesg warnings optional Probes related to bpf_probe_write_user and bpf_trace_printk helpers emit dmesg warnings which might be confusing for people running bpftool on production environments. This change filters them out by default and introduces the new positional argument "full" which enables all available probes. Signed-off-by: Michal Rostecki Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200226165941.6379-3-mrostecki@opensuse.org --- tools/bpf/bpftool/feature.c | 70 +++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 345e4a2b4f53..88718ee6a438 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -513,14 +513,39 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, define_prefix); } +static void +probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, + const char *define_prefix, unsigned int id, + const char *ptype_name, __u32 ifindex) +{ + bool res; + + if (!supported_type) + res = false; + else + res = bpf_probe_helper(id, prog_type, ifindex); + + if (json_output) { + if (res) + jsonw_string(json_wtr, helper_name[id]); + } else if (define_prefix) { + printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n", + define_prefix, ptype_name, helper_name[id], + res ? "1" : "0"); + } else { + if (res) + printf("\n\t- %s", helper_name[id]); + } +} + static void probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, - const char *define_prefix, __u32 ifindex) + const char *define_prefix, bool full_mode, + __u32 ifindex) { const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; - bool res; if (ifindex) /* Only test helpers for offload-able program types */ @@ -542,21 +567,19 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } for (id = 1; id < ARRAY_SIZE(helper_name); id++) { - if (!supported_type) - res = false; - else - res = bpf_probe_helper(id, prog_type, ifindex); - - if (json_output) { - if (res) - jsonw_string(json_wtr, helper_name[id]); - } else if (define_prefix) { - printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n", - define_prefix, ptype_name, helper_name[id], - res ? "1" : "0"); - } else { - if (res) - printf("\n\t- %s", helper_name[id]); + /* Skip helper functions which emit dmesg messages when not in + * the full mode. + */ + switch (id) { + case BPF_FUNC_trace_printk: + case BPF_FUNC_probe_write_user: + if (!full_mode) + continue; + /* fallthrough */ + default: + probe_helper_for_progtype(prog_type, supported_type, + define_prefix, id, ptype_name, + ifindex); } } @@ -655,7 +678,8 @@ static void section_map_types(const char *define_prefix, __u32 ifindex) } static void -section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) +section_helpers(bool *supported_types, const char *define_prefix, + bool full_mode, __u32 ifindex) { unsigned int i; @@ -681,7 +705,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) define_prefix); for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) probe_helpers_for_progtype(i, supported_types[i], - define_prefix, ifindex); + define_prefix, full_mode, ifindex); print_end_section(); } @@ -701,6 +725,7 @@ static int do_probe(int argc, char **argv) enum probe_component target = COMPONENT_UNSPEC; const char *define_prefix = NULL; bool supported_types[128] = {}; + bool full_mode = false; __u32 ifindex = 0; char *ifname; @@ -740,6 +765,9 @@ static int do_probe(int argc, char **argv) strerror(errno)); return -1; } + } else if (is_prefix(*argv, "full")) { + full_mode = true; + NEXT_ARG(); } else if (is_prefix(*argv, "macros") && !define_prefix) { define_prefix = ""; NEXT_ARG(); @@ -775,7 +803,7 @@ static int do_probe(int argc, char **argv) goto exit_close_json; section_program_types(supported_types, define_prefix, ifindex); section_map_types(define_prefix, ifindex); - section_helpers(supported_types, define_prefix, ifindex); + section_helpers(supported_types, define_prefix, full_mode, ifindex); section_misc(define_prefix, ifindex); exit_close_json: @@ -794,7 +822,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n" + "Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n" " %s %s help\n" "\n" " COMPONENT := { kernel | dev NAME }\n" -- cgit v1.2.3 From bcdacab6e70cf90bf3d8db94ae9226ff5117a61e Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Wed, 26 Feb 2020 17:59:37 +0100 Subject: bpftool: Update documentation of "bpftool feature" command Update documentation of "bpftool feature" command with information about new arguments: "full". Signed-off-by: Michal Rostecki Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200226165941.6379-4-mrostecki@opensuse.org --- tools/bpf/bpftool/Documentation/bpftool-feature.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 4d08f35034a2..b04156cfd7a3 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -19,19 +19,24 @@ SYNOPSIS FEATURE COMMANDS ================ -| **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]] +| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]] | **bpftool** **feature help** | | *COMPONENT* := { **kernel** | **dev** *NAME* } DESCRIPTION =========== - **bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]] + **bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]] Probe the running kernel and dump a number of eBPF-related parameters, such as availability of the **bpf()** system call, JIT status, eBPF program types availability, eBPF helper functions availability, and more. + By default, bpftool **does not run probes** for + **bpf_probe_write_user**\ () and **bpf_trace_printk**\() + helpers which print warnings to kernel logs. To enable them + and run all probes, the **full** keyword should be used. + If the **macros** keyword (but not the **-j** option) is passed, a subset of the output is dumped as a list of **#define** macros that are ready to be included in a C @@ -44,16 +49,12 @@ DESCRIPTION Keyword **kernel** can be omitted. If no probe target is specified, probing the kernel is the default behaviour. - Note that when probed, some eBPF helpers (e.g. - **bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may - print warnings to kernel logs. - - **bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]] + **bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]] Probe network device for supported eBPF features and dump results to the console. - The two keywords **macros** and **prefix** have the same - role as when probing the kernel. + The keywords **full**, **macros** and **prefix** have the + same role as when probing the kernel. **bpftool feature help** Print short help message. -- cgit v1.2.3 From ad92b12a6e0ebfb6d32693f374ac8572f527c0c1 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Wed, 26 Feb 2020 17:59:38 +0100 Subject: bpftool: Update bash completion for "bpftool feature" command Update bash completion for "bpftool feature" command with the new argument: "full". Signed-off-by: Michal Rostecki Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200226165941.6379-5-mrostecki@opensuse.org --- tools/bpf/bpftool/bash-completion/bpftool | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index ad4133b1f0cf..f2838a658339 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -984,11 +984,12 @@ _bpftool() probe) [[ $prev == "prefix" ]] && return 0 if _bpftool_search_list 'macros'; then - COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) ) + _bpftool_once_attr 'prefix' else COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) ) fi _bpftool_one_of_list 'kernel dev' + _bpftool_once_attr 'full' return 0 ;; *) -- cgit v1.2.3 From 736332740e295d9b6fc524f0447448f6089911d9 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Wed, 26 Feb 2020 17:59:39 +0100 Subject: selftests/bpf: Add test for "bpftool feature" command Add Python module with tests for "bpftool feature" command, which mainly checks whether the "full" option is working properly. Signed-off-by: Michal Rostecki Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200226165941.6379-6-mrostecki@opensuse.org --- tools/testing/selftests/.gitignore | 5 +- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_bpftool.py | 178 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_bpftool.sh | 5 + 4 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_bpftool.py create mode 100755 tools/testing/selftests/bpf/test_bpftool.sh (limited to 'tools') diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index 61df01cdf0b2..304fdf1a21dc 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -3,4 +3,7 @@ gpiogpio-hammer gpioinclude/ gpiolsgpio tpm2/SpaceTest.log -tpm2/*.pyc + +# Python bytecode and cache +__pycache__/ +*.py[cod] diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 50c63c21e6fd..2d7f5df33f04 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -62,7 +62,8 @@ TEST_PROGS := test_kmod.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ test_xdping.sh \ - test_bpftool_build.sh + test_bpftool_build.sh \ + test_bpftool.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py new file mode 100644 index 000000000000..4fed2dc25c0a --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool.py @@ -0,0 +1,178 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2020 SUSE LLC. + +import collections +import functools +import json +import os +import socket +import subprocess +import unittest + + +# Add the source tree of bpftool and /usr/local/sbin to PATH +cur_dir = os.path.dirname(os.path.realpath(__file__)) +bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..", + "tools", "bpf", "bpftool")) +os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"] + + +class IfaceNotFoundError(Exception): + pass + + +class UnprivilegedUserError(Exception): + pass + + +def _bpftool(args, json=True): + _args = ["bpftool"] + if json: + _args.append("-j") + _args.extend(args) + + return subprocess.check_output(_args) + + +def bpftool(args): + return _bpftool(args, json=False).decode("utf-8") + + +def bpftool_json(args): + res = _bpftool(args) + return json.loads(res) + + +def get_default_iface(): + for iface in socket.if_nameindex(): + if iface[1] != "lo": + return iface[1] + raise IfaceNotFoundError("Could not find any network interface to probe") + + +def default_iface(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + iface = get_default_iface() + return f(*args, iface, **kwargs) + return wrapper + + +class TestBpftool(unittest.TestCase): + @classmethod + def setUpClass(cls): + if os.getuid() != 0: + raise UnprivilegedUserError( + "This test suite needs root privileges") + + @default_iface + def test_feature_dev_json(self, iface): + unexpected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] + expected_keys = [ + "syscall_config", + "program_types", + "map_types", + "helpers", + "misc", + ] + + res = bpftool_json(["feature", "probe", "dev", iface]) + # Check if the result has all expected keys. + self.assertCountEqual(res.keys(), expected_keys) + # Check if unexpected helpers are not included in helpers probes + # result. + for helpers in res["helpers"].values(): + for unexpected_helper in unexpected_helpers: + self.assertNotIn(unexpected_helper, helpers) + + def test_feature_kernel(self): + test_cases = [ + bpftool_json(["feature", "probe", "kernel"]), + bpftool_json(["feature", "probe"]), + bpftool_json(["feature"]), + ] + unexpected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] + expected_keys = [ + "syscall_config", + "system_config", + "program_types", + "map_types", + "helpers", + "misc", + ] + + for tc in test_cases: + # Check if the result has all expected keys. + self.assertCountEqual(tc.keys(), expected_keys) + # Check if unexpected helpers are not included in helpers probes + # result. + for helpers in tc["helpers"].values(): + for unexpected_helper in unexpected_helpers: + self.assertNotIn(unexpected_helper, helpers) + + def test_feature_kernel_full(self): + test_cases = [ + bpftool_json(["feature", "probe", "kernel", "full"]), + bpftool_json(["feature", "probe", "full"]), + ] + expected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] + + for tc in test_cases: + # Check if expected helpers are included at least once in any + # helpers list for any program type. Unfortunately we cannot assume + # that they will be included in all program types or a specific + # subset of programs. It depends on the kernel version and + # configuration. + found_helpers = False + + for helpers in tc["helpers"].values(): + if all(expected_helper in helpers + for expected_helper in expected_helpers): + found_helpers = True + break + + self.assertTrue(found_helpers) + + def test_feature_kernel_full_vs_not_full(self): + full_res = bpftool_json(["feature", "probe", "full"]) + not_full_res = bpftool_json(["feature", "probe"]) + not_full_set = set() + full_set = set() + + for helpers in full_res["helpers"].values(): + for helper in helpers: + full_set.add(helper) + + for helpers in not_full_res["helpers"].values(): + for helper in helpers: + not_full_set.add(helper) + + self.assertCountEqual(full_set - not_full_set, + {"bpf_probe_write_user", "bpf_trace_printk"}) + self.assertCountEqual(not_full_set - full_set, set()) + + def test_feature_macros(self): + expected_patterns = [ + r"/\*\*\* System call availability \*\*\*/", + r"#define HAVE_BPF_SYSCALL", + r"/\*\*\* eBPF program types \*\*\*/", + r"#define HAVE.*PROG_TYPE", + r"/\*\*\* eBPF map types \*\*\*/", + r"#define HAVE.*MAP_TYPE", + r"/\*\*\* eBPF helper functions \*\*\*/", + r"#define HAVE.*HELPER", + r"/\*\*\* eBPF misc features \*\*\*/", + ] + + res = bpftool(["feature", "probe", "macros"]) + for pattern in expected_patterns: + self.assertRegex(res, pattern) diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh new file mode 100755 index 000000000000..66690778e36d --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2020 SUSE LLC. + +python3 -m unittest -v test_bpftool.TestBpftool -- cgit v1.2.3 From 34eee836a9dd3e1987c10ed6afc7ece4131a993d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 27 Feb 2020 12:25:47 -0500 Subject: radix tree test suite: Support kmem_cache alignment The radix tree doesn't use alignment, so the argument was ignored. The maple tree needs its nodes to be aligned, so we need to pay attention to the alignment argument. Also change the types of 'size' and 'align' to unsigned int to match commit f4957d5bd0916. Signed-off-by: Matthew Wilcox (Oracle) --- tools/testing/radix-tree/linux.c | 32 ++++++++++++++++++++------------ tools/testing/radix-tree/linux/slab.h | 6 +++--- 2 files changed, 23 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 44a0d1ad4408..2d9c59df60de 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -19,37 +19,44 @@ int test_verbose; struct kmem_cache { pthread_mutex_t lock; - int size; + unsigned int size; + unsigned int align; int nr_objs; void *objs; void (*ctor)(void *); }; -void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) +void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp) { - struct radix_tree_node *node; + void *p; - if (!(flags & __GFP_DIRECT_RECLAIM)) + if (!(gfp & __GFP_DIRECT_RECLAIM)) return NULL; pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs) { + struct radix_tree_node *node = cachep->objs; cachep->nr_objs--; - node = cachep->objs; cachep->objs = node->parent; pthread_mutex_unlock(&cachep->lock); node->parent = NULL; + p = node; } else { pthread_mutex_unlock(&cachep->lock); - node = malloc(cachep->size); + if (cachep->align) + posix_memalign(&p, cachep->align, cachep->size); + else + p = malloc(cachep->size); if (cachep->ctor) - cachep->ctor(node); + cachep->ctor(p); + else if (gfp & __GFP_ZERO) + memset(p, 0, cachep->size); } uatomic_inc(&nr_allocated); if (kmalloc_verbose) - printf("Allocating %p from slab\n", node); - return node; + printf("Allocating %p from slab\n", p); + return p; } void kmem_cache_free(struct kmem_cache *cachep, void *objp) @@ -59,7 +66,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) if (kmalloc_verbose) printf("Freeing %p to slab\n", objp); pthread_mutex_lock(&cachep->lock); - if (cachep->nr_objs > 10) { + if (cachep->nr_objs > 10 || cachep->align) { memset(objp, POISON_FREE, cachep->size); free(objp); } else { @@ -98,13 +105,14 @@ void kfree(void *p) } struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t offset, - unsigned long flags, void (*ctor)(void *)) +kmem_cache_create(const char *name, unsigned int size, unsigned int align, + unsigned int flags, void (*ctor)(void *)) { struct kmem_cache *ret = malloc(sizeof(*ret)); pthread_mutex_init(&ret->lock, NULL); ret->size = size; + ret->align = align; ret->nr_objs = 0; ret->objs = NULL; ret->ctor = ctor; diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index a037def0dec6..2958830ce4d7 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -20,8 +20,8 @@ static inline void *kzalloc(size_t size, gfp_t gfp) void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); void kmem_cache_free(struct kmem_cache *cachep, void *objp); -struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t offset, - unsigned long flags, void (*ctor)(void *)); +struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, + unsigned int align, unsigned int flags, + void (*ctor)(void *)); #endif /* SLAB_H */ -- cgit v1.2.3 From 4113b04823948e40eda1c54dfadadad4bc01dcdb Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 27 Feb 2020 08:50:06 +0100 Subject: selftests: forwarding: lib.sh: Add start_tcp_traffic Extract a helper __start_traffic() configurable by protocol type. Allow passing through extra mausezahn arguments. Add a wrapper, start_tcp_traffic(). Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 2f5da414aaa7..f80f384978ce 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1132,18 +1132,29 @@ flood_test() flood_multicast_test $br_port $host1_if $host2_if } -start_traffic() +__start_traffic() { + local proto=$1; shift local h_in=$1; shift # Where the traffic egresses the host local sip=$1; shift local dip=$1; shift local dmac=$1; shift $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ - -a own -b $dmac -t udp -q & + -a own -b $dmac -t "$proto" -q "$@" & sleep 1 } +start_traffic() +{ + __start_traffic udp "$@" +} + +start_tcp_traffic() +{ + __start_traffic tcp "$@" +} + stop_traffic() { # Suppress noise from killing mausezahn. -- cgit v1.2.3 From 3de611b507628abac081630d8180a9a369c83668 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 27 Feb 2020 08:50:07 +0100 Subject: selftests: mlxsw: Add a RED selftest This tests that below the queue minimum length, there is no dropping / marking, and above max, everything is dropped / marked. The test is structured as a core file with topology and test code, and three wrappers: one for RED used as a root Qdisc, and two for testing (W)RED under PRIO and ETS. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/sch_red_core.sh | 499 +++++++++++++++++++++ .../selftests/drivers/net/mlxsw/sch_red_ets.sh | 83 ++++ .../selftests/drivers/net/mlxsw/sch_red_prio.sh | 5 + .../selftests/drivers/net/mlxsw/sch_red_root.sh | 60 +++ tools/testing/selftests/net/forwarding/lib.sh | 10 + 5 files changed, 657 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh new file mode 100644 index 000000000000..ebf7752f6d93 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -0,0 +1,499 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends a >1Gbps stream of traffic from H1, to the switch, which +# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the +# switch and forwarded to the port under test $swp3, which is also 1Gbps. +# +# This way, $swp3 should be 100% filled with traffic without any of it spilling +# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That +# is what H2 is used for--it sends the extra traffic to create backlog. +# +# A RED Qdisc is installed on $swp3. The configuration is such that the minimum +# and maximum size are 1 byte apart, so there is a very clear border under which +# no marking or dropping takes place, and above which everything is marked or +# dropped. +# +# The test uses the buffer build-up behavior to test the installed RED. +# +# In order to test WRED, $swp3 actually contains RED under PRIO, with two +# different configurations. Traffic is prioritized using 802.1p and relies on +# the implicit mlxsw configuration, where packet priority is taken 1:1 from the +# 802.1p marking. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | $h1.11 + | | | $h2.11 + | +# | | 192.0.2.17/28 | | | | 192.0.2.18/28 | | +# | | | | | | | | +# | \______ ______/ | | \______ ______/ | +# | \ / | | \ / | +# | + $h1 | | + $h2 | +# +-------------|------------+ +-------------|------------+ +# | >1Gbps | +# +-------------|------------------------------------------------|------------+ +# | SW + $swp1 + $swp2 | +# | _______/ \___________ ___________/ \_______ | +# | / \ / \ | +# | +-|-----------------+ | +-|-----------------+ | | +# | | + $swp1.10 | | | + $swp2.10 | | | +# | | | | .-------------+ $swp5.10 | | | +# | | BR1_10 | | | | | | | +# | | | | | | BR2_10 | | | +# | | + $swp2.10 | | | | | | | +# | +-|-----------------+ | | | + $swp3.10 | | | +# | | | | +-|-----------------+ | | +# | | +-----------------|-+ | | +-----------------|-+ | +# | | | $swp1.11 + | | | | $swp2.11 + | | +# | | | | | .-----------------+ $swp5.11 | | +# | | | BR1_11 | | | | | | | +# | | | | | | | | BR2_11 | | +# | | | $swp2.11 + | | | | | | | +# | | +-----------------|-+ | | | | $swp3.11 + | | +# | | | | | | +-----------------|-+ | +# | \_______ ___________/ | | \___________ _______/ | +# | \ / \ / \ / | +# | + $swp4 + $swp5 + $swp3 | +# +-------------|----------------------|-------------------------|------------+ +# | | | 1Gbps +# \________1Gbps_________/ | +# +----------------------------|------------+ +# | H3 + $h3 | +# | _____________________/ \_______ | +# | / \ | +# | | | | +# | + $h3.10 $h3.11 + | +# | 192.0.2.3/28 192.0.2.19/28 | +# +-----------------------------------------+ + +NUM_NETIFS=8 +CHECK_TC="yes" +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +ipaddr() +{ + local host=$1; shift + local vlan=$1; shift + + echo 192.0.2.$((16 * (vlan - 10) + host)) +} + +host_create() +{ + local dev=$1; shift + local host=$1; shift + + simple_if_init $dev + mtu_set $dev 10000 + + vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + ip link set dev $dev.10 type vlan egress 0:0 + + vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + ip link set dev $dev.11 type vlan egress 0:1 +} + +host_destroy() +{ + local dev=$1; shift + + vlan_destroy $dev 11 + vlan_destroy $dev 10 + mtu_restore $dev + simple_if_fini $dev +} + +h1_create() +{ + host_create $h1 1 +} + +h1_destroy() +{ + host_destroy $h1 +} + +h2_create() +{ + host_create $h2 2 + + # Some of the tests in this suite use multicast traffic. As this traffic + # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus + # e.g. traffic ingressing through $swp2 is flooded to $swp3 (the + # intended destination) and $swp5 (which is intended as ingress for + # another stream of traffic). + # + # This is generally not a problem, but if the $swp5 throughput is lower + # than $swp2 throughput, there will be a build-up at $swp5. That may + # cause packets to fail to queue up at $swp3 due to shared buffer + # quotas, and the test to spuriously fail. + # + # Prevent this by setting the speed of $h2 to 1Gbps. + + ethtool -s $h2 speed 1000 autoneg off +} + +h2_destroy() +{ + ethtool -s $h2 autoneg on + host_destroy $h2 +} + +h3_create() +{ + host_create $h3 3 + ethtool -s $h3 speed 1000 autoneg off +} + +h3_destroy() +{ + ethtool -s $h3 autoneg on + host_destroy $h3 +} + +switch_create() +{ + local intf + local vlan + + ip link add dev br1_10 type bridge + ip link add dev br1_11 type bridge + + ip link add dev br2_10 type bridge + ip link add dev br2_11 type bridge + + for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do + ip link set dev $intf up + mtu_set $intf 10000 + done + + for intf in $swp1 $swp4; do + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br1_$vlan + ip link set dev $intf.$vlan up + done + done + + for intf in $swp2 $swp3 $swp5; do + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br2_$vlan + ip link set dev $intf.$vlan up + done + done + + ip link set dev $swp4.10 type vlan egress 0:0 + ip link set dev $swp4.11 type vlan egress 0:1 + for intf in $swp1 $swp2 $swp5; do + for vlan in 10 11; do + ip link set dev $intf.$vlan type vlan ingress 0:0 1:1 + done + done + + for intf in $swp2 $swp3 $swp4 $swp5; do + ethtool -s $intf speed 1000 autoneg off + done + + ip link set dev br1_10 up + ip link set dev br1_11 up + ip link set dev br2_10 up + ip link set dev br2_11 up + + local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_set $swp3 8 $size +} + +switch_destroy() +{ + local intf + local vlan + + devlink_port_pool_th_restore $swp3 8 + + tc qdisc del dev $swp3 root 2>/dev/null + + ip link set dev br2_11 down + ip link set dev br2_10 down + ip link set dev br1_11 down + ip link set dev br1_10 down + + for intf in $swp5 $swp4 $swp3 $swp2; do + ethtool -s $intf autoneg on + done + + for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do + for vlan in 11 10; do + ip link set dev $intf.$vlan down + ip link set dev $intf.$vlan nomaster + vlan_destroy $intf $vlan + done + + mtu_restore $intf + ip link set dev $intf down + done + + ip link del dev br2_11 + ip link del dev br2_10 + ip link del dev br1_11 + ip link del dev br1_10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + swp4=${NETIFS[p7]} + swp5=${NETIFS[p8]} + + h3_mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10" + ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11" + ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10" + ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11" +} + +get_tc() +{ + local vlan=$1; shift + + echo $((vlan - 10)) +} + +get_qdisc_handle() +{ + local vlan=$1; shift + + local tc=$(get_tc $vlan) + local band=$((8 - tc)) + + # Handle is 107: for TC1, 108: for TC0. + echo "10$band:" +} + +get_qdisc_backlog() +{ + local vlan=$1; shift + + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog +} + +get_mc_transmit_queue() +{ + local vlan=$1; shift + + local tc=$(($(get_tc $vlan) + 8)) + ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc +} + +get_nmarked() +{ + local vlan=$1; shift + + ethtool_stats_get $swp3 ecn_marked +} + +get_qdisc_npackets() +{ + local vlan=$1; shift + + busywait_for_counter 1100 +1 \ + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets +} + +# This sends traffic in an attempt to build a backlog of $size. Returns 0 on +# success. After 10 failed attempts it bails out and returns 1. It dumps the +# backlog size to stdout. +build_backlog() +{ + local vlan=$1; shift + local size=$1; shift + local proto=$1; shift + + local tc=$((vlan - 10)) + local band=$((8 - tc)) + local cur=-1 + local i=0 + + while :; do + local cur=$(busywait 1100 until_counter_is $((cur + 1)) \ + get_qdisc_backlog $vlan) + local diff=$((size - cur)) + local pkts=$(((diff + 7999) / 8000)) + + if ((cur >= size)); then + echo $cur + return 0 + elif ((i++ > 10)); then + echo $cur + return 1 + fi + + $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \ + -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \ + -t $proto -q -c $pkts "$@" + done +} + +check_marking() +{ + local vlan=$1; shift + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets $vlan) + local nmarked_0=$(get_nmarked $vlan) + sleep 5 + local npackets_1=$(get_qdisc_npackets $vlan) + local nmarked_1=$(get_nmarked $vlan) + + local nmarked_d=$((nmarked_1 - nmarked_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmarked_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + # Main stream. + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + + # Build the below-the-limit backlog using UDP. We could use TCP just + # fine, but this way we get a proof that UDP is accepted when queue + # length is below the limit. The main stream is using TCP, and if the + # limit is misconfigured, we would see this traffic being ECN marked. + RET=0 + backlog=$(build_backlog $vlan $((2 * limit / 3)) udp) + check_err $? "Could not build the requested backlog" + pct=$(check_marking $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "TC $((vlan - 10)): ECN backlog < limit" + + # Now push TCP, because non-TCP traffic would be early-dropped after the + # backlog crosses the limit, and we want to make sure that the backlog + # is above the limit. + RET=0 + backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking $vlan ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." + log_test "TC $((vlan - 10)): ECN backlog > limit" + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, it should all get dropped, and backlog + # building should fail. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_fail $? "UDP traffic went into backlog instead of being early-dropped" + log_test "TC $((vlan - 10)): ECN backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_red_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + # Use ECN-capable TCP to verify there's no marking even though the queue + # is above limit. + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + + # Pushing below the queue limit should work. + RET=0 + backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "TC $((vlan - 10)): RED backlog < limit" + + # Pushing above should not. + RET=0 + backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) + check_fail $? "Traffic went into backlog instead of being early-dropped" + pct=$(check_marking $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + local diff=$((limit - backlog)) + pct=$((100 * diff / limit)) + ((0 <= pct && pct <= 5)) + check_err $? "backlog $backlog / $limit expected <= 5% distance" + log_test "TC $((vlan - 10)): RED backlog > limit" + + stop_traffic + sleep 1 +} + +do_mc_backlog_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + RET=0 + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc + start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc + + qbl=$(busywait 5000 until_counter_is 500000 \ + get_qdisc_backlog $vlan) + check_err $? "Could not build MC backlog" + + # Verify that we actually see the backlog on BUM TC. Do a busywait as + # well, performance blips might cause false fail. + local ebl + ebl=$(busywait 5000 until_counter_is 500000 \ + get_mc_transmit_queue $vlan) + check_err $? "MC backlog reported by qdisc not visible in ethtool" + + stop_traffic + stop_traffic + + log_test "TC $((vlan - 10)): Qdisc reports MC backlog" +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh new file mode 100755 index 000000000000..af83efe9ccf1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ecn_test + red_test + mc_backlog_test +" +: ${QDISC:=ets} +source sch_red_core.sh + +# do_ecn_test first build 2/3 of the requested backlog and expects no marking, +# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and +# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do +# see (do not see) marking, it is actually due to the configuration of that one +# TC, and not due to configuration of the other TC leaking over. +BACKLOG1=200000 +BACKLOG2=500000 + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 root handle 10: $QDISC \ + bands 8 priomap 7 6 5 4 3 2 1 0 + tc qdisc add dev $swp3 parent 10:8 handle 108: red \ + limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ + probability 1.0 avpkt 8000 burst 38 "${args[@]}" + tc qdisc add dev $swp3 parent 10:7 handle 107: red \ + limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ + probability 1.0 avpkt 8000 burst 63 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 parent 10:7 + tc qdisc del dev $swp3 parent 10:8 + tc qdisc del dev $swp3 root +} + +ecn_test() +{ + install_qdisc ecn + + do_ecn_test 10 $BACKLOG1 + do_ecn_test 11 $BACKLOG2 + + uninstall_qdisc +} + +red_test() +{ + install_qdisc + + do_red_test 10 $BACKLOG1 + do_red_test 11 $BACKLOG2 + + uninstall_qdisc +} + +mc_backlog_test() +{ + install_qdisc + + # Note that the backlog numbers here do not correspond to RED + # configuration, but are arbitrary. + do_mc_backlog_test 10 $BACKLOG1 + do_mc_backlog_test 11 $BACKLOG2 + + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +bail_on_lldpad +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh new file mode 100755 index 000000000000..76820a0e9a1b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC=prio +source sch_red_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh new file mode 100755 index 000000000000..b2217493a88e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ecn_test + red_test + mc_backlog_test +" +source sch_red_core.sh + +BACKLOG=300000 + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 root handle 108: red \ + limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \ + probability 1.0 avpkt 8000 burst 38 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 root +} + +ecn_test() +{ + install_qdisc ecn + do_ecn_test 10 $BACKLOG + uninstall_qdisc +} + +red_test() +{ + install_qdisc + do_red_test 10 $BACKLOG + uninstall_qdisc +} + +mc_backlog_test() +{ + install_qdisc + # Note that the backlog value here does not correspond to RED + # configuration, but is arbitrary. + do_mc_backlog_test 10 $BACKLOG + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +bail_on_lldpad +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index f80f384978ce..aff3178edf6d 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -607,6 +607,16 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +qdisc_stats_get() +{ + local dev=$1; shift + local handle=$1; shift + local selector=$1; shift + + tc -j -s qdisc show dev "$dev" \ + | jq '.[] | select(.handle == "'"$handle"'") | '"$selector" +} + humanize() { local speed=$1; shift -- cgit v1.2.3 From c84e903f6227a4e7cf9a925fc491a4206009cd75 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Feb 2020 08:50:08 +0100 Subject: selftests: add egress redirect test to mlxsw tc flower restrictions Include test of forbidding to have redirect rule on egress-bound block. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/tc_flower_restrictions.sh | 60 +++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh index 58419c3a7d99..67e0c25adcee 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh @@ -3,7 +3,10 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="shared_block_drop_test" +ALL_TESTS=" + shared_block_drop_test + egress_redirect_test +" NUM_NETIFS=2 source $lib_dir/tc_common.sh @@ -69,6 +72,61 @@ shared_block_drop_test() log_test "shared block drop" } +egress_redirect_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have mirred redirect on + # egress-bound block. + + tc qdisc add dev $swp1 ingress_block 22 clsact + check_err $? "Failed to create clsact with ingress block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_err $? "Failed to add redirect rule to ingress bound block" + + tc qdisc add dev $swp2 ingress_block 22 clsact + check_err $? "Failed to create another clsact with ingress shared block" + + tc qdisc del dev $swp2 clsact + + tc qdisc add dev $swp2 egress_block 22 clsact + check_fail $? "Incorrect success to create another clsact with egress shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc add dev $swp2 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to mixed bound block" + + tc qdisc del dev $swp1 clsact + + tc qdisc add dev $swp1 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to egress bound shared block" + + tc qdisc del dev $swp2 clsact + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to egress bound block" + + tc qdisc del dev $swp1 clsact + + log_test "shared block drop" +} + setup_prepare() { swp1=${NETIFS[p1]} -- cgit v1.2.3 From ab2b8ab253d17a81a3d905d4c3e215391c725771 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Feb 2020 08:50:09 +0100 Subject: selftests: add a mirror test to mlxsw tc flower restrictions Include test of forbidding to have multiple mirror actions. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/tc_flower_restrictions.sh | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh index 67e0c25adcee..68c80d0ec1ec 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh @@ -6,6 +6,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" shared_block_drop_test egress_redirect_test + multi_mirror_test " NUM_NETIFS=2 @@ -127,6 +128,33 @@ egress_redirect_test() log_test "shared block drop" } +multi_mirror_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have multiple mirror + # actions in a single rule. + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress mirror dev $swp2 + check_err $? "Failed to add rule with single mirror action" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress mirror dev $swp2 \ + action mirred egress mirror dev $swp1 + check_fail $? "Incorrect success to add rule with two mirror actions" + + tc qdisc del dev $swp1 clsact + + log_test "multi mirror" +} + setup_prepare() { swp1=${NETIFS[p1]} -- cgit v1.2.3 From 5d66773f4158894f2266398ecbdaf2f94a89348b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 27 Feb 2020 08:50:10 +0100 Subject: selftests: devlink_trap_l3_drops: Avoid race condition The test checks that packets are trapped when they should egress a router interface (RIF) that has become disabled. This is a temporary state in a RIF's deletion sequence. Currently, the test deletes the RIF by flushing all the IP addresses configured on the associated netdev (br0). However, this is racy, as this also flushes all the routes pointing to the netdev and if the routes are deleted from the device before the RIF is disabled, then no packets will try to egress the disabled RIF and the trap will not be triggered. Instead, trigger the deletion of the RIF by unlinking the mlxsw port from the bridge that is backing the RIF. Unlike before, this will not cause the kernel to delete the routes pointing to the bridge. Note that due to current mlxsw locking scheme the RIF is always deleted first, but this is going to change. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 053e5c7b303d..616f47d86a61 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -641,13 +641,9 @@ erif_disabled_test() mz_pid=$! sleep 5 - # In order to see this trap we need a route that points to disabled RIF. - # When ipv6 address is flushed, there is a delay and the routes are - # deleted before the RIF and we cannot get state that we have route - # to disabled RIF. - # Delete IPv6 address first and then check this trap with flushing IPv4. - ip -6 add flush dev br0 - ip -4 add flush dev br0 + # Unlinking the port from the bridge will disable the RIF associated + # with br0 as it is no longer an upper of any mlxsw port. + ip link set dev $rp1 nomaster t1_packets=$(devlink_trap_rx_packets_get $trap_name) t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) @@ -659,7 +655,6 @@ erif_disabled_test() log_test "Egress RIF disabled" kill $mz_pid && wait $mz_pid &> /dev/null - ip link set dev $rp1 nomaster __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 ip link del dev br0 type bridge devlink_trap_action_set $trap_name "drop" -- cgit v1.2.3 From 0c22f993c91a7c215466b9aa833d25df05367dee Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Feb 2020 08:50:11 +0100 Subject: selftests: mlxsw: Use busywait helper in blackhole routes test Blackhole routes test uses offload indication checks. Use busywait helper and wait until the routes offload indication is set or fail if it reaches timeout. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh | 5 +++-- tools/testing/selftests/net/forwarding/lib.sh | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh index 5ba5bef44d5b..bdffe698e1d1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh @@ -45,6 +45,7 @@ ALL_TESTS=" blackhole_ipv6 " NUM_NETIFS=4 +: ${TIMEOUT:=20000} # ms source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -123,7 +124,7 @@ blackhole_ipv4() skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \ action pass - ip -4 route show 198.51.100.0/30 | grep -q offload + busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30 check_err $? "route not marked as offloaded when should" ping_do $h1 198.51.100.1 @@ -147,7 +148,7 @@ blackhole_ipv6() skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \ ip_proto icmpv6 action pass - ip -6 route show 2001:db8:2::/120 | grep -q offload + busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120 check_err $? "route not marked as offloaded when should" ping6_do $h1 2001:db8:2::1 diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index aff3178edf6d..5ea33c72f468 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -248,6 +248,11 @@ busywait() done } +wait_for_offload() +{ + "$@" | grep -q offload +} + until_counter_is() { local value=$1; shift -- cgit v1.2.3 From 05ef614c559ee8a496d590f0f468f9f883a06ca9 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Feb 2020 08:50:12 +0100 Subject: selftests: mlxsw: Use busywait helper in vxlan test Vxlan test uses offload indication checks. Use a busywait helper and wait until the offload indication is set or fail if it reaches timeout. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 206 ++++++++++++--------- tools/testing/selftests/net/forwarding/lib.sh | 22 +++ 2 files changed, 139 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 15eb0dc9a685..729a86cc4ede 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -9,6 +9,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="sanitization_test offload_indication_test \ sanitization_vlan_aware_test offload_indication_vlan_aware_test" NUM_NETIFS=2 +: ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh setup_prepare() @@ -470,8 +471,8 @@ offload_indication_fdb_flood_test() bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 - bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \ + bridge fdb show brport vxlan0 check_err $? bridge fdb del 00:00:00:00:00:00 dev vxlan0 self @@ -486,11 +487,11 @@ offload_indication_fdb_bridge_test() bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ dst 198.51.100.2 - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 check_err $? log_test "vxlan entry offload indication - initial state" @@ -500,9 +501,9 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 + check_err $? log_test "vxlan entry offload indication - after removal from bridge" @@ -511,11 +512,11 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 check_err $? log_test "vxlan entry offload indication - after re-add to bridge" @@ -525,9 +526,9 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 + check_err $? log_test "vxlan entry offload indication - after removal from vxlan" @@ -536,11 +537,11 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 check_err $? log_test "vxlan entry offload indication - after re-add to vxlan" @@ -558,27 +559,32 @@ offload_indication_decap_route_test() { RET=0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan0 down - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan1 down - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vxlan decap route - vxlan device down" RET=0 ip link set dev vxlan1 up - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan0 up - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vxlan device up" @@ -586,11 +592,13 @@ offload_indication_decap_route_test() RET=0 ip address delete 198.51.100.1/32 dev lo - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? ip address add 198.51.100.1/32 dev lo - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - add local route" @@ -598,16 +606,19 @@ offload_indication_decap_route_test() RET=0 ip link set dev $swp1 nomaster - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev $swp2 nomaster - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - local ports enslavement" @@ -615,12 +626,14 @@ offload_indication_decap_route_test() RET=0 ip link del dev br0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link del dev br1 - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vxlan decap route - bridge device deletion" @@ -632,16 +645,19 @@ offload_indication_decap_route_test() ip link set dev $swp2 master br1 ip link set dev vxlan0 master br0 ip link set dev vxlan1 master br1 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link del dev vxlan0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link del dev vxlan1 - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vxlan decap route - vxlan device deletion" @@ -656,12 +672,15 @@ check_fdb_offloaded() local mac=00:11:22:33:44:55 local zmac=00:00:00:00:00:00 - bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \ + bridge fdb show dev vxlan0 check_err $? - bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \ + bridge fdb show dev vxlan0 check_err $? - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 check_err $? } @@ -672,13 +691,15 @@ check_vxlan_fdb_not_offloaded() bridge fdb show dev vxlan0 | grep $mac | grep -q self check_err $? - bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \ + bridge fdb show dev vxlan0 + check_err $? bridge fdb show dev vxlan0 | grep $zmac | grep -q self check_err $? - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? } check_bridge_fdb_not_offloaded() @@ -688,8 +709,9 @@ check_bridge_fdb_not_offloaded() bridge fdb show dev vxlan0 | grep $mac | grep -q master check_err $? - bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \ + bridge fdb show dev vxlan0 + check_err $? } __offload_indication_join_vxlan_first() @@ -771,12 +793,14 @@ __offload_indication_join_vxlan_last() ip link set dev $swp1 master br0 - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? ip link set dev vxlan0 master br0 - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 check_err $? log_test "offload indication - attach vxlan last" @@ -866,8 +890,9 @@ sanitization_vlan_aware_test() ip link set dev $swp1 master br0 &> /dev/null check_fail $? - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vlan-aware - failed enslavement to bridge due to conflict" @@ -929,11 +954,11 @@ offload_indication_vlan_aware_fdb_test() bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ dst 198.51.100.2 vlan 10 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 check_err $? log_test "vxlan entry offload indication - initial state" @@ -943,9 +968,9 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 + check_err $? log_test "vxlan entry offload indication - after removal from bridge" @@ -954,11 +979,11 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 check_err $? log_test "vxlan entry offload indication - after re-add to bridge" @@ -968,9 +993,9 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 + check_err $? log_test "vxlan entry offload indication - after removal from vxlan" @@ -979,11 +1004,11 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 check_err $? log_test "vxlan entry offload indication - after re-add to vxlan" @@ -995,28 +1020,31 @@ offload_indication_vlan_aware_decap_route_test() { RET=0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag on one VxLAN device and make sure route is still # marked as offloaded bridge vlan add vid 10 dev vxlan10 untagged - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag on second VxLAN device and make sure route is no # longer marked as offloaded bridge vlan add vid 20 dev vxlan20 untagged - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? # Toggle PVID flag back and make sure route is marked as offloaded bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vni map/unmap" @@ -1069,33 +1097,33 @@ offload_indication_vlan_aware_l3vni_test() ip link set dev vxlan0 master br0 bridge vlan add dev vxlan0 vid 10 pvid untagged - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 check_err $? "vxlan tunnel not offloaded when should" # Configure a VLAN interface and make sure tunnel is offloaded ip link add link br0 name br10 up type vlan id 10 sysctl_set net.ipv6.conf.br10.disable_ipv6 0 ip -6 address add 2001:db8:1::1/64 dev br10 - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 check_err $? "vxlan tunnel not offloaded when should" # Unlink the VXLAN device, make sure tunnel is no longer offloaded, # then add it back to the bridge and make sure it is offloaded ip link set dev vxlan0 nomaster - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload - check_fail $? "vxlan tunnel offloaded after unlinked from bridge" + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel offloaded after unlinked from bridge" ip link set dev vxlan0 master br0 - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload - check_fail $? "vxlan tunnel offloaded despite no matching vid" + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel offloaded despite no matching vid" bridge vlan add dev vxlan0 vid 10 pvid untagged - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 check_err $? "vxlan tunnel not offloaded after adding vid" log_test "vxlan - l3 vni" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 5ea33c72f468..83fd15e3e545 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -248,6 +248,28 @@ busywait() done } +not() +{ + "$@" + [[ $? != 0 ]] +} + +grep_bridge_fdb() +{ + local addr=$1; shift + local word + local flag + + if [ "$1" == "self" ] || [ "$1" == "master" ]; then + word=$1; shift + if [ "$1" == "-v" ]; then + flag=$1; shift + fi + fi + + $@ | grep $addr | grep $flag "$word" +} + wait_for_offload() { "$@" | grep -q offload -- cgit v1.2.3 From 1cbe65e09b5a6379dd31b85879a7d8fded3fdc42 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Feb 2020 08:50:13 +0100 Subject: selftests: mlxsw: Use busywait helper in rtnetlink test Rtnetlink test uses offload indication checks. Use a busywait helper and wait until the offload indication is set or fail if it reaches timeout. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 68 ++++++++++++++-------- 1 file changed, 44 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 5c39e5f6a480..f4031002d5e9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -32,6 +32,7 @@ ALL_TESTS=" devlink_reload_test " NUM_NETIFS=2 +: ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh @@ -360,20 +361,24 @@ vlan_rif_refcount_test() ip link add link br0 name br0.10 up type vlan id 10 ip -6 address add 2001:db8:1::1/64 dev br0.10 - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 check_err $? "vlan rif was not created before adding port to vlan" bridge vlan add vid 10 dev $swp1 - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 check_err $? "vlan rif was destroyed after adding port to vlan" bridge vlan del vid 10 dev $swp1 - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 check_err $? "vlan rif was destroyed after removing port from vlan" ip link set dev $swp1 nomaster - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload - check_fail $? "vlan rif was not destroyed after unlinking port from bridge" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 + check_err $? "vlan rif was not destroyed after unlinking port from bridge" log_test "vlan rif refcount" @@ -401,22 +406,28 @@ subport_rif_refcount_test() ip -6 address add 2001:db8:1::1/64 dev bond1 ip -6 address add 2001:db8:2::1/64 dev bond1.10 - ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 check_err $? "subport rif was not created on lag device" - ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 check_err $? "subport rif was not created on vlan device" ip link set dev $swp1 nomaster - ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 check_err $? "subport rif of lag device was destroyed when should not" - ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 check_err $? "subport rif of vlan device was destroyed when should not" ip link set dev $swp2 nomaster - ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload - check_fail $? "subport rif of lag device was not destroyed when should" - ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload - check_fail $? "subport rif of vlan device was not destroyed when should" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was not destroyed when should" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 + check_err $? "subport rif of vlan device was not destroyed when should" log_test "subport rif refcount" @@ -575,7 +586,8 @@ bridge_extern_learn_test() bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn - bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + bridge fdb show brport $swp1 de:ad:be:ef:13:37 check_err $? "fdb entry not marked as offloaded when should" log_test "externally learned fdb entry" @@ -595,9 +607,11 @@ neigh_offload_test() ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \ dev $swp1 - ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -4 neigh show dev $swp1 192.0.2.2 check_err $? "ipv4 neigh entry not marked as offloaded when should" - ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 neigh show dev $swp1 2001:db8:1::2 check_err $? "ipv6 neigh entry not marked as offloaded when should" log_test "neighbour offload indication" @@ -623,25 +637,31 @@ nexthop_offload_test() ip -6 route add 2001:db8:2::/64 vrf v$swp1 \ nexthop via 2001:db8:1::2 dev $swp1 - ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 check_err $? "ipv4 nexthop not marked as offloaded when should" - ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 check_err $? "ipv6 nexthop not marked as offloaded when should" ip link set dev $swp2 down sleep 1 - ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload - check_fail $? "ipv4 nexthop marked as offloaded when should not" - ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload - check_fail $? "ipv6 nexthop marked as offloaded when should not" + busywait "$TIMEOUT" not wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 + check_err $? "ipv4 nexthop marked as offloaded when should not" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 + check_err $? "ipv6 nexthop marked as offloaded when should not" ip link set dev $swp2 up setup_wait - ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 check_err $? "ipv4 nexthop not marked as offloaded after neigh add" - ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 check_err $? "ipv6 nexthop not marked as offloaded after neigh add" log_test "nexthop offload indication" -- cgit v1.2.3 From 6697b51ed340f8cd3d983418bd4e4eab30810b5d Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Thu, 27 Feb 2020 08:50:14 +0100 Subject: selftests: mlxsw: Add shared buffer configuration test Test physical ports' shared buffer configuration options using random values related to a specific configuration option. There are 3 configuration options: pool, TC bind and portpool. Each sub-test, test a different configuration option and random the related values as the follow: * For pools, pool's size will be randomized. * For TC bind, pool number and threshold will be randomized. * For portpools, threshold will be randomized. Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/mlxsw/sharedbuffer_configuration.py | 416 +++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py new file mode 100755 index 000000000000..0d4b9327c9b3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py @@ -0,0 +1,416 @@ +#!/usr/bin/python +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import json as j +import random + + +class SkipTest(Exception): + pass + + +class RandomValuePicker: + """ + Class for storing shared buffer configuration. Can handle 3 different + objects, pool, tcbind and portpool. Provide an interface to get random + values for a specific object type as the follow: + 1. Pool: + - random size + + 2. TcBind: + - random pool number + - random threshold + + 3. PortPool: + - random threshold + """ + def __init__(self, pools): + self._pools = [] + for pool in pools: + self._pools.append(pool) + + def _cell_size(self): + return self._pools[0]["cell_size"] + + def _get_static_size(self, th): + # For threshold of 16, this works out to be about 12MB on Spectrum-1, + # and about 17MB on Spectrum-2. + return th * 8000 * self._cell_size() + + def _get_size(self): + return self._get_static_size(16) + + def _get_thtype(self): + return "static" + + def _get_th(self, pool): + # Threshold value could be any integer between 3 to 16 + th = random.randint(3, 16) + if pool["thtype"] == "dynamic": + return th + else: + return self._get_static_size(th) + + def _get_pool(self, direction): + ing_pools = [] + egr_pools = [] + for pool in self._pools: + if pool["type"] == "ingress": + ing_pools.append(pool) + else: + egr_pools.append(pool) + if direction == "ingress": + arr = ing_pools + else: + arr = egr_pools + return arr[random.randint(0, len(arr) - 1)] + + def get_value(self, objid): + if isinstance(objid, Pool): + if objid["pool"] in [4, 8, 9, 10]: + # The threshold type of pools 4, 8, 9 and 10 cannot be changed + raise SkipTest() + else: + return (self._get_size(), self._get_thtype()) + if isinstance(objid, TcBind): + if objid["tc"] >= 8: + # Multicast TCs cannot be changed + raise SkipTest() + else: + pool = self._get_pool(objid["type"]) + th = self._get_th(pool) + pool_n = pool["pool"] + return (pool_n, th) + if isinstance(objid, PortPool): + pool_n = objid["pool"] + pool = self._pools[pool_n] + assert pool["pool"] == pool_n + th = self._get_th(pool) + return (th,) + + +class RecordValuePickerException(Exception): + pass + + +class RecordValuePicker: + """ + Class for storing shared buffer configuration. Can handle 2 different + objects, pool and tcbind. Provide an interface to get the stored values per + object type. + """ + def __init__(self, objlist): + self._recs = [] + for item in objlist: + self._recs.append({"objid": item, "value": item.var_tuple()}) + + def get_value(self, objid): + if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]: + # The threshold type of pools 4, 8, 9 and 10 cannot be changed + raise SkipTest() + if isinstance(objid, TcBind) and objid["tc"] >= 8: + # Multicast TCs cannot be changed + raise SkipTest() + for rec in self._recs: + if rec["objid"].weak_eq(objid): + return rec["value"] + raise RecordValuePickerException() + + +def run_cmd(cmd, json=False): + out = subprocess.check_output(cmd, shell=True) + if json: + return j.loads(out) + return out + + +def run_json_cmd(cmd): + return run_cmd(cmd, json=True) + + +def log_test(test_name, err_msg=None): + if err_msg: + print("\t%s" % err_msg) + print("TEST: %-80s [FAIL]" % test_name) + else: + print("TEST: %-80s [ OK ]" % test_name) + + +class CommonItem(dict): + varitems = [] + + def var_tuple(self): + ret = [] + self.varitems.sort() + for key in self.varitems: + ret.append(self[key]) + return tuple(ret) + + def weak_eq(self, other): + for key in self: + if key in self.varitems: + continue + if self[key] != other[key]: + return False + return True + + +class CommonList(list): + def get_by(self, by_obj): + for item in self: + if item.weak_eq(by_obj): + return item + return None + + def del_by(self, by_obj): + for item in self: + if item.weak_eq(by_obj): + self.remove(item) + + +class Pool(CommonItem): + varitems = ["size", "thtype"] + + def dl_set(self, dlname, size, thtype): + run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"], + self["pool"], + size, thtype)) + + +class PoolList(CommonList): + pass + + +def get_pools(dlname, direction=None): + d = run_json_cmd("devlink sb pool show -j") + pools = PoolList() + for pooldict in d["pool"][dlname]: + if not direction or direction == pooldict["type"]: + pools.append(Pool(pooldict)) + return pools + + +def do_check_pools(dlname, pools, vp): + for pool in pools: + pre_pools = get_pools(dlname) + try: + (size, thtype) = vp.get_value(pool) + except SkipTest: + continue + pool.dl_set(dlname, size, thtype) + post_pools = get_pools(dlname) + pool = post_pools.get_by(pool) + + err_msg = None + if pool["size"] != size: + err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size) + if pool["thtype"] != thtype: + err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype) + + pre_pools.del_by(pool) + post_pools.del_by(pool) + if pre_pools != post_pools: + err_msg = "Other pool setup changed as well" + log_test("pool {} of sb {} set verification".format(pool["pool"], + pool["sb"]), err_msg) + + +def check_pools(dlname, pools): + # Save defaults + record_vp = RecordValuePicker(pools) + + # For each pool, set random size and static threshold type + do_check_pools(dlname, pools, RandomValuePicker(pools)) + + # Restore defaults + do_check_pools(dlname, pools, record_vp) + + +class TcBind(CommonItem): + varitems = ["pool", "threshold"] + + def __init__(self, port, d): + super(TcBind, self).__init__(d) + self["dlportname"] = port.name + + def dl_set(self, pool, th): + run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"], + self["sb"], + self["tc"], + self["type"], + pool, th)) + + +class TcBindList(CommonList): + pass + + +def get_tcbinds(ports, verify_existence=False): + d = run_json_cmd("devlink sb tc bind show -j -n") + tcbinds = TcBindList() + for port in ports: + err_msg = None + if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0: + err_msg = "No tc bind for port" + else: + for tcbinddict in d["tc_bind"][port.name]: + tcbinds.append(TcBind(port, tcbinddict)) + if verify_existence: + log_test("tc bind existence for port {} verification".format(port.name), err_msg) + return tcbinds + + +def do_check_tcbind(ports, tcbinds, vp): + for tcbind in tcbinds: + pre_tcbinds = get_tcbinds(ports) + try: + (pool, th) = vp.get_value(tcbind) + except SkipTest: + continue + tcbind.dl_set(pool, th) + post_tcbinds = get_tcbinds(ports) + tcbind = post_tcbinds.get_by(tcbind) + + err_msg = None + if tcbind["pool"] != pool: + err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool) + if tcbind["threshold"] != th: + err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th) + + pre_tcbinds.del_by(tcbind) + post_tcbinds.del_by(tcbind) + if pre_tcbinds != post_tcbinds: + err_msg = "Other tc bind setup changed as well" + log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"], + tcbind["tc"], + tcbind["sb"]), err_msg) + + +def check_tcbind(dlname, ports, pools): + tcbinds = get_tcbinds(ports, verify_existence=True) + + # Save defaults + record_vp = RecordValuePicker(tcbinds) + + # Bind each port and unicast TC (TCs < 8) to a random pool and a random + # threshold + do_check_tcbind(ports, tcbinds, RandomValuePicker(pools)) + + # Restore defaults + do_check_tcbind(ports, tcbinds, record_vp) + + +class PortPool(CommonItem): + varitems = ["threshold"] + + def __init__(self, port, d): + super(PortPool, self).__init__(d) + self["dlportname"] = port.name + + def dl_set(self, th): + run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"], + self["sb"], + self["pool"], th)) + + +class PortPoolList(CommonList): + pass + + +def get_portpools(ports, verify_existence=False): + d = run_json_cmd("devlink sb port pool -j -n") + portpools = PortPoolList() + for port in ports: + err_msg = None + if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0: + err_msg = "No port pool for port" + else: + for portpooldict in d["port_pool"][port.name]: + portpools.append(PortPool(port, portpooldict)) + if verify_existence: + log_test("port pool existence for port {} verification".format(port.name), err_msg) + return portpools + + +def do_check_portpool(ports, portpools, vp): + for portpool in portpools: + pre_portpools = get_portpools(ports) + (th,) = vp.get_value(portpool) + portpool.dl_set(th) + post_portpools = get_portpools(ports) + portpool = post_portpools.get_by(portpool) + + err_msg = None + if portpool["threshold"] != th: + err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th) + + pre_portpools.del_by(portpool) + post_portpools.del_by(portpool) + if pre_portpools != post_portpools: + err_msg = "Other port pool setup changed as well" + log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"], + portpool["pool"], + portpool["sb"]), err_msg) + + +def check_portpool(dlname, ports, pools): + portpools = get_portpools(ports, verify_existence=True) + + # Save defaults + record_vp = RecordValuePicker(portpools) + + # For each port pool, set a random threshold + do_check_portpool(ports, portpools, RandomValuePicker(pools)) + + # Restore defaults + do_check_portpool(ports, portpools, record_vp) + + +class Port: + def __init__(self, name): + self.name = name + + +class PortList(list): + pass + + +def get_ports(dlname): + d = run_json_cmd("devlink port show -j") + ports = PortList() + for name in d["port"]: + if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical": + ports.append(Port(name)) + return ports + + +def get_device(): + devices_info = run_json_cmd("devlink -j dev info")["info"] + for d in devices_info: + if "mlxsw_spectrum" in devices_info[d]["driver"]: + return d + return None + + +class UnavailableDevlinkNameException(Exception): + pass + + +def test_sb_configuration(): + # Use static seed + random.seed(0) + + dlname = get_device() + if not dlname: + raise UnavailableDevlinkNameException() + + ports = get_ports(dlname) + pools = get_pools(dlname) + + check_pools(dlname, pools) + check_tcbind(dlname, ports, pools) + check_portpool(dlname, ports, pools) + + +test_sb_configuration() -- cgit v1.2.3 From 552ec3d9d2aa81681ac27d510bd11d29a5522840 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Thu, 27 Feb 2020 08:50:15 +0100 Subject: selftests: devlink_lib: Check devlink info command is supported Sanity check for devlink info command. Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 24798ae846de..07e360e2f275 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then exit 1 fi +devlink dev help 2>&1 | grep info &> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing devlink dev info support" + exit 1 +fi + ############################################################################## # Devlink helpers -- cgit v1.2.3 From 9fb74734f4f896c03e1ce31278b2d4f8fe2ac033 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Thu, 27 Feb 2020 08:50:16 +0100 Subject: selftests: devlink_lib: Add devlink port helpers Add two devlink port helpers: * devlink port get by netdev * devlink cpu port get Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 07e360e2f275..0df6d8942721 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -419,3 +419,19 @@ devlink_trap_drop_cleanup() kill $mz_pid && wait $mz_pid &> /dev/null tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } + +devlink_port_by_netdev() +{ + local if_name=$1 + + devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]' +} + +devlink_cpu_port_get() +{ + local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" | + grep cpu | cut -d/ -f3 | cut -d: -f1 | + sed -n '1p') + + echo "$DEVLINK_DEV/$cpu_dl_port_num" +} -- cgit v1.2.3 From 4240dbd8f384cbe227ee654b46f2dd711326eec1 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Thu, 27 Feb 2020 08:50:17 +0100 Subject: selftests: mlxsw: Add mlxsw lib Add mlxsw lib for common defines, helpers etc. Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh new file mode 100644 index 000000000000..cbe50f260a40 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +if [[ ! -v MLXSW_CHIP ]]; then + MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]') + if [ -z "$MLXSW_CHIP" ]; then + echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command" + exit 1 + fi +fi -- cgit v1.2.3 From a865ad9996039ea7be932917e5f08daceb1c3f4b Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Thu, 27 Feb 2020 08:50:18 +0100 Subject: selftests: mlxsw: Add shared buffer traffic test Test the max shared buffer occupancy for port's pool and port's TC's (using different types of packets). Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/sharedbuffer.sh | 222 +++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh new file mode 100755 index 000000000000..58f3a05f08af --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -0,0 +1,222 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + port_pool_test + port_tc_ip_test + port_tc_arp_test +" + +NUM_NETIFS=2 +source ../../../net/forwarding/lib.sh +source ../../../net/forwarding/devlink_lib.sh +source mlxsw_lib.sh + +SB_POOL_ING=0 +SB_POOL_EGR_CPU=10 + +SB_ITC_CPU_IP=3 +SB_ITC_CPU_ARP=2 +SB_ITC=0 + +h1_create() +{ + simple_if_init $h1 192.0.1.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.1.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.1.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.1.2/24 +} + +sb_occ_pool_check() +{ + local dl_port=$1; shift + local pool=$1; shift + local exp_max_occ=$1 + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +sb_occ_itc_check() +{ + local dl_port=$1; shift + local itc=$1; shift + local exp_max_occ=$1 + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +sb_occ_etc_check() +{ + local dl_port=$1; shift + local etc=$1; shift + local exp_max_occ=$1; shift + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +port_pool_test() +{ + local exp_max_occ=288 + local max_occ + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h1) ingress pool" + + RET=0 + max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress pool" + + RET=0 + max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) + check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress pool" +} + +port_tc_ip_test() +{ + local exp_max_occ=288 + local max_occ + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h1) ingress TC - IP packet" + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress TC - IP packet" + + RET=0 + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - IP packet" +} + +port_tc_arp_test() +{ + local exp_max_occ=96 + local max_occ + + if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then + exp_max_occ=144 + fi + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h1) ingress TC - ARP packet" + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress TC - ARP packet" + + RET=0 + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - ARP packet" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + dl_port1=$(devlink_port_by_netdev $h1) + dl_port2=$(devlink_port_by_netdev $h2) + + cpu_dl_port=$(devlink_cpu_port_get) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From abfce9e0620216ba8098346172165370a875e3e5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Feb 2020 08:50:19 +0100 Subject: selftests: mlxsw: Reduce running time using offload indication After adding a given number of flower rules for different IPv6 addresses, the test generates traffic and ensures that each packet is received, which is time-consuming. Instead, test the offload indication of the tc flower rules and reduce the running time by half. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/tc_flower_scale.sh | 31 +++++++++------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index a6d733d2a4b4..cc0f07e72cf2 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -2,9 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 # Test for resource limit of offloaded flower rules. The test adds a given -# number of flower matches for different IPv6 addresses, then generates traffic, -# and ensures each was hit exactly once. This file contains functions to set up -# a testing topology and run the test, and is meant to be sourced from a test +# number of flower matches for different IPv6 addresses, then check the offload +# indication for all of the tc flower rules. This file contains functions to set +# up a testing topology and run the test, and is meant to be sourced from a test # script that calls the testing routine with a given number of rules. TC_FLOWER_NUM_NETIFS=2 @@ -94,22 +94,15 @@ __tc_flower_test() tc_flower_rules_create $count $should_fail - for ((i = 0; i < count; ++i)); do - $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \ - -A 2001:db8:2::1 \ - -B $(tc_flower_addr $i) - done - - MISMATCHES=$( - tc -j -s filter show dev $h2 ingress | - jq -r '[ .[] | select(.kind == "flower") | .options | - values as $rule | .actions[].stats.packets | - select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] | - join(", ")' - ) - - test -z "$MISMATCHES" - check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES" + offload_count=$(tc -j -s filter show dev $h2 ingress | + jq -r '[ .[] | select(.kind == "flower") | + .options | .in_hw ]' | jq .[] | wc -l) + [[ $((offload_count - 1)) -eq $count ]] + if [[ $should_fail -eq 0 ]]; then + check_err $? "Offload mismatch" + else + check_err_fail $should_fail $? "Offload more than expacted" + fi } tc_flower_test() -- cgit v1.2.3 From e781eedae215ed42d6967d396f64ffad8e427a39 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Feb 2020 08:50:20 +0100 Subject: selftests: mlxsw: Reduce router scale running time using offload indication Currently, the test inserts X /32 routes and for each route it is testing that a packet sent from the first host is received by the second host, which is very time-consuming. Instead only validate the offload flag of each route and get the same result. Wait between the creation of the routes and the offload validation in order to make sure that all the routes were successfully offloaded. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/router_scale.sh | 53 ++++++---------------- 1 file changed, 14 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh index d231649b4f01..e93878d42596 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh @@ -2,16 +2,15 @@ # SPDX-License-Identifier: GPL-2.0 ROUTER_NUM_NETIFS=4 +: ${TIMEOUT:=20000} # ms router_h1_create() { simple_if_init $h1 192.0.1.1/24 - ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1 } router_h1_destroy() { - ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1 simple_if_fini $h1 192.0.1.1/24 } @@ -64,13 +63,15 @@ router_setup_prepare() router_create } -router_offload_validate() +wait_for_routes() { - local route_count=$1 - local offloaded_count + local t0=$1; shift + local route_count=$1; shift - offloaded_count=$(ip route | grep -o 'offload' | wc -l) - [[ $offloaded_count -ge $route_count ]] + local t1=$(ip route | grep -o 'offload' | wc -l) + local delta=$((t1 - t0)) + echo $delta + [[ $delta -ge $route_count ]] } router_routes_create() @@ -90,8 +91,8 @@ router_routes_create() break 3 fi - echo route add 193.${i}.${j}.${k}/32 via \ - 192.0.2.1 dev $rp2 >> $ROUTE_FILE + echo route add 193.${i}.${j}.${k}/32 dev $rp2 \ + >> $ROUTE_FILE ((count++)) done done @@ -111,45 +112,19 @@ router_test() { local route_count=$1 local should_fail=$2 - local count=0 + local delta RET=0 + local t0=$(ip route | grep -o 'offload' | wc -l) router_routes_create $route_count + delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count) - router_offload_validate $route_count - check_err_fail $should_fail $? "Offload of $route_count routes" + check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta." if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then return fi - tc filter add dev $h2 ingress protocol ip pref 1 flower \ - skip_sw dst_ip 193.0.0.0/8 action drop - - for i in {0..255} - do - for j in {0..255} - do - for k in {0..255} - do - if [[ $count -eq $route_count ]]; then - break 3 - fi - - $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \ - -A 192.0.1.1 -B 193.${i}.${j}.${k} \ - -t ip -q - ((count++)) - done - done - done - - tc_check_packets "dev $h2 ingress" 1 $route_count - check_err $? "Offload mismatch" - - tc filter del dev $h2 ingress protocol ip pref 1 flower \ - skip_sw dst_ip 193.0.0.0/8 action drop - router_routes_destroy } -- cgit v1.2.3 From 3eba4137130af19543f1c0fb3169cf92715c38ff Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 27 Feb 2020 08:50:21 +0100 Subject: selftests: mlxsw: resource_scale: Invoke for Spectrum-3 The scale test for Spectrum-2 should be invoked for Spectrum-2 and Spectrum-3. Add the appropriate device ID. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 7b2acba82a49..fd583a171db7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -8,8 +8,9 @@ source $lib_dir/lib.sh source $lib_dir/tc_common.sh source $lib_dir/devlink_lib.sh -if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then - echo "SKIP: test is tailored for Mellanox Spectrum-2" +if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \ + "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then + echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3" exit 1 fi -- cgit v1.2.3 From ca7dc2791b507f842d73e46b1a0453b36b69ffc2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 29 Feb 2020 15:11:09 -0800 Subject: bpftool: Add header guards to generated vmlinux.h Add canonical #ifndef/#define/#endif guard for generated vmlinux.h header with __VMLINUX_H__ symbol. __VMLINUX_H__ is also going to play double role of identifying whether vmlinux.h is being used, versus, say, BCC or non-CO-RE libbpf modes with dependency on kernel headers. This will make it possible to write helper macro/functions, agnostic to exact BPF program set up. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200229231112.1240137-2-andriin@fb.com --- tools/bpf/bpftool/btf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index b3745ed711ba..bcaf55b59498 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -389,6 +389,9 @@ static int dump_btf_c(const struct btf *btf, if (IS_ERR(d)) return PTR_ERR(d); + printf("#ifndef __VMLINUX_H__\n"); + printf("#define __VMLINUX_H__\n"); + printf("\n"); printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); printf("#endif\n\n"); @@ -412,6 +415,8 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute pop\n"); printf("#endif\n"); + printf("\n"); + printf("#endif /* __VMLINUX_H__ */\n"); done: btf_dump__free(d); -- cgit v1.2.3 From fd56e0058412fb542db0e9556f425747cf3f8366 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 29 Feb 2020 15:11:10 -0800 Subject: libbpf: Fix use of PT_REGS_PARM macros with vmlinux.h Add detection of vmlinux.h to bpf_tracing.h header for PT_REGS macro. Currently, BPF applications have to define __KERNEL__ symbol to use correct definition of struct pt_regs on x86 arch. This is due to different field names under internal kernel vs UAPI conditions. To make this more transparent for users, detect vmlinux.h by checking __VMLINUX_H__ symbol. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200229231112.1240137-3-andriin@fb.com --- tools/lib/bpf/bpf_tracing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index b0dafe8b4ebc..8376f22b0e36 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -49,7 +49,7 @@ #if defined(bpf_target_x86) -#ifdef __KERNEL__ +#if defined(__KERNEL__) || defined(__VMLINUX_H__) #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) -- cgit v1.2.3 From 396f544ed5e5a9c40de5663b774f643644cba059 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 29 Feb 2020 15:11:11 -0800 Subject: selftests/bpf: Fix BPF_KRETPROBE macro and use it in attach_probe test For kretprobes, there is no point in capturing input arguments from pt_regs, as they are going to be, most probably, clobbered by the time probed kernel function returns. So switch BPF_KRETPROBE to accept zero or one argument (optional return result). Fixes: ac065870d928 ("selftests/bpf: Add BPF_PROG, BPF_KPROBE, and BPF_KRETPROBE macros") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200229231112.1240137-4-andriin@fb.com --- tools/testing/selftests/bpf/bpf_trace_helpers.h | 13 +++++++------ tools/testing/selftests/bpf/progs/test_attach_probe.c | 3 ++- tools/testing/selftests/bpf/progs/test_overhead.c | 6 ++---- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h index c6f1354d93fb..83b8e02f5ee9 100644 --- a/tools/testing/selftests/bpf/bpf_trace_helpers.h +++ b/tools/testing/selftests/bpf/bpf_trace_helpers.h @@ -96,15 +96,16 @@ typeof(name(0)) name(struct pt_regs *ctx) \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_argsN(x, args...) \ - ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) +#define ___bpf_kretprobe_args1(x) \ + ___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx) #define ___bpf_kretprobe_args(args...) \ - ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) + ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* - * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all - * input kprobe arguments, one last extra argument has to be specified, which - * captures kprobe return value. + * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional + * return value (in addition to `struct pt_regs *ctx`), but no input + * arguments, because they will be clobbered by the time probed function + * returns. */ #define BPF_KRETPROBE(name, args...) \ name(struct pt_regs *ctx); \ diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index dd8fae6660ab..38ed8c3bf922 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -4,6 +4,7 @@ #include #include #include +#include "bpf_trace_helpers.h" int kprobe_res = 0; int kretprobe_res = 0; @@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx) } SEC("kretprobe/sys_nanosleep") -int handle_kretprobe(struct pt_regs *ctx) +int BPF_KRETPROBE(handle_kretprobe) { kretprobe_res = 2; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index bfe9fbcb9684..f43714c69cc8 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -17,11 +17,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec) } SEC("kretprobe/__set_task_comm") -int BPF_KRETPROBE(prog2, - struct task_struct *tsk, const char *buf, bool exec, - int ret) +int BPF_KRETPROBE(prog2, int ret) { - return !PT_REGS_PARM1(ctx) && ret; + return ret; } SEC("raw_tp/task_rename") -- cgit v1.2.3 From df8ff35311c8d10d90b4604c02b32c361dc997aa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 29 Feb 2020 15:11:12 -0800 Subject: libbpf: Merge selftests' bpf_trace_helpers.h into libbpf's bpf_tracing.h Move BPF_PROG, BPF_KPROBE, and BPF_KRETPROBE macro into libbpf's bpf_tracing.h header to make it available for non-selftests users. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200229231112.1240137-5-andriin@fb.com --- tools/lib/bpf/bpf_tracing.h | 118 ++++++++++++++++++++ tools/testing/selftests/bpf/bpf_tcp_helpers.h | 2 +- tools/testing/selftests/bpf/bpf_trace_helpers.h | 121 --------------------- tools/testing/selftests/bpf/progs/bpf_dctcp.c | 2 +- tools/testing/selftests/bpf/progs/fentry_test.c | 2 +- tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c | 2 +- .../selftests/bpf/progs/fexit_bpf2bpf_simple.c | 2 +- tools/testing/selftests/bpf/progs/fexit_test.c | 2 +- tools/testing/selftests/bpf/progs/kfree_skb.c | 2 +- .../selftests/bpf/progs/test_attach_probe.c | 2 +- tools/testing/selftests/bpf/progs/test_overhead.c | 1 - .../selftests/bpf/progs/test_perf_branches.c | 2 +- .../testing/selftests/bpf/progs/test_perf_buffer.c | 2 +- .../testing/selftests/bpf/progs/test_probe_user.c | 1 - .../selftests/bpf/progs/test_trampoline_count.c | 3 +- .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 +- 16 files changed, 131 insertions(+), 135 deletions(-) delete mode 100644 tools/testing/selftests/bpf/bpf_trace_helpers.h (limited to 'tools') diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 8376f22b0e36..379d03b211ea 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -192,4 +192,122 @@ struct pt_regs; (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif +#define ___bpf_concat(a, b) a ## b +#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_empty(...) \ + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast(args...) \ + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) + +/* + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and + * similar kinds of BPF programs, that accept input arguments as a single + * pointer to untyped u64 array, where each u64 can actually be a typed + * pointer or integer of different size. Instead of requring user to write + * manual casts and work with array elements by index, BPF_PROG macro + * allows user to declare a list of named and typed input arguments in the + * same syntax as for normal C function. All the casting is hidden and + * performed transparently, while user code can just assume working with + * function arguments of specified type and name. + * + * Original raw context argument is preserved as well as 'ctx' argument. + * This is useful when using BPF helpers that expect original context + * as one of the parameters (e.g., for bpf_perf_event_output()). + */ +#define BPF_PROG(name, args...) \ +name(unsigned long long *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_ctx_cast(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args) + +struct pt_regs; + +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) \ + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) \ + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) \ + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) \ + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) \ + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) \ + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific + * low-level way of getting kprobe input arguments from struct pt_regs, and + * provides a familiar typed and named function arguments syntax and + * semantics of accessing kprobe input paremeters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + */ +#define BPF_KPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_args1(x) \ + ___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx) +#define ___bpf_kretprobe_args(args...) \ + ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional + * return value (in addition to `struct pt_regs *ctx`), but no input + * arguments, because they will be clobbered by the time probed function + * returns. + */ +#define BPF_KRETPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kretprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) + #endif diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 8f21965ffc6c..5bf2fe9b1efa 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -6,7 +6,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h deleted file mode 100644 index 83b8e02f5ee9..000000000000 --- a/tools/testing/selftests/bpf/bpf_trace_helpers.h +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_TRACE_HELPERS_H -#define __BPF_TRACE_HELPERS_H - -#include - -#define ___bpf_concat(a, b) a ## b -#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) -#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N -#define ___bpf_narg(...) \ - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#define ___bpf_empty(...) \ - ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) - -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] -#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] -#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] -#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) \ - ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) - -/* - * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and - * similar kinds of BPF programs, that accept input arguments as a single - * pointer to untyped u64 array, where each u64 can actually be a typed - * pointer or integer of different size. Instead of requring user to write - * manual casts and work with array elements by index, BPF_PROG macro - * allows user to declare a list of named and typed input arguments in the - * same syntax as for normal C function. All the casting is hidden and - * performed transparently, while user code can just assume working with - * function arguments of specified type and name. - * - * Original raw context argument is preserved as well as 'ctx' argument. - * This is useful when using BPF helpers that expect original context - * as one of the parameters (e.g., for bpf_perf_event_output()). - */ -#define BPF_PROG(name, args...) \ -name(unsigned long long *ctx); \ -static __always_inline typeof(name(0)) \ -____##name(unsigned long long *ctx, ##args); \ -typeof(name(0)) name(unsigned long long *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_ctx_cast(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) \ -____##name(unsigned long long *ctx, ##args) - -struct pt_regs; - -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) \ - ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) \ - ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) \ - ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) \ - ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) \ - ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) \ - ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) - -/* - * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for - * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific - * low-level way of getting kprobe input arguments from struct pt_regs, and - * provides a familiar typed and named function arguments syntax and - * semantics of accessing kprobe input paremeters. - * - * Original struct pt_regs* context is preserved as 'ctx' argument. This might - * be necessary when using BPF helpers like bpf_perf_event_output(). - */ -#define BPF_KPROBE(name, args...) \ -name(struct pt_regs *ctx); \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_kprobe_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) - -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) \ - ___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx) -#define ___bpf_kretprobe_args(args...) \ - ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) - -/* - * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional - * return value (in addition to `struct pt_regs *ctx`), but no input - * arguments, because they will be clobbered by the time probed function - * returns. - */ -#define BPF_KRETPROBE(name, args...) \ -name(struct pt_regs *ctx); \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_kretprobe_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) -#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index b631fb5032d2..127ea762a062 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -9,7 +9,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include #include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c index 38d3a82144ca..9365b686f84b 100644 --- a/tools/testing/selftests/bpf/progs/fentry_test.c +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include #include -#include "bpf_trace_helpers.h" +#include char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index c329fccf9842..98e1efe14549 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -5,7 +5,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include struct sk_buff { unsigned int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c index 92f3fa47cf40..85c0b516d6ee 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include #include -#include "bpf_trace_helpers.h" +#include struct sk_buff { unsigned int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c index 348109b9ea07..bd1e17d8024c 100644 --- a/tools/testing/selftests/bpf/progs/fexit_test.c +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include #include -#include "bpf_trace_helpers.h" +#include char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 8f48a909f079..a46a264ce24e 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -4,7 +4,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include char _license[] SEC("license") = "GPL"; struct { diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 38ed8c3bf922..8056a4c6d918 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -4,7 +4,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include int kprobe_res = 0; int kretprobe_res = 0; diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index f43714c69cc8..56a50b25cd33 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -6,7 +6,6 @@ #include #include #include -#include "bpf_trace_helpers.h" struct task_struct; diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c index 0f7e27d97567..a1ccc831c882 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_branches.c +++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c @@ -5,7 +5,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include int valid = 0; int required_size_out = 0; diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index ebfcc9f50c35..ad59c4c9aba8 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -4,7 +4,7 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index d556b1572cc6..89b3532ccc75 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,7 +7,6 @@ #include #include -#include "bpf_trace_helpers.h" static struct sockaddr_in old; diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c index e51e6e3a81c2..f030e469d05b 100644 --- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -2,7 +2,8 @@ #include #include #include -#include "bpf_trace_helpers.h" +#include +#include struct task_struct; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index b840fc9e3ed5..42dd2fedd588 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include -#include "bpf_trace_helpers.h" struct net_device { /* Structure does not need to contain all entries, -- cgit v1.2.3 From 775a2be52da1c55fc810a5d151049f86f0fd5362 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 2 Mar 2020 15:53:48 +0100 Subject: selftests/bpf: Declare bpf_log_buf variables as static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cgroup selftests did not declare the bpf_log_buf variable as static, leading to a linker error with GCC 10 (which defaults to -fno-common). Fix this by adding the missing static declarations. Fixes: 257c88559f36 ("selftests/bpf: Convert test_cgroup_attach to prog_tests") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/20200302145348.559177-1-toke@redhat.com --- tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c | 2 +- tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c | 2 +- tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 5b13f2c6c402..70e94e783070 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -6,7 +6,7 @@ #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int prog_load(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 2ff21dbce179..139f8e82c7c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -6,7 +6,7 @@ #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int map_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 9d8cb48b99de..9e96f8d87fea 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -8,7 +8,7 @@ #define BAR "/foo/bar/" #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int prog_load(int verdict) { -- cgit v1.2.3 From c016b68edc7a2adf3db0f11fb649797c1f9216ea Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 20:31:58 -0800 Subject: libbpf: Add bpf_link pinning/unpinning With bpf_link abstraction supported by kernel explicitly, add pinning/unpinning API for links. Also allow to create (open) bpf_link from BPF FS file. This API allows to have an "ephemeral" FD-based BPF links (like raw tracepoint or fexit/freplace attachments) surviving user process exit, by pinning them in a BPF FS, which is an important use case for long-running BPF programs. As part of this, expose underlying FD for bpf_link. While legacy bpf_link's might not have a FD associated with them (which will be expressed as a bpf_link with fd=-1), kernel's abstraction is based around FD-based usage, so match it closely. This, subsequently, allows to have a generic pinning/unpinning API for generalized bpf_link. For some types of bpf_links kernel might not support pinning, in which case bpf_link__pin() will return error. With FD being part of generic bpf_link, also get rid of bpf_link_fd in favor of using vanialla bpf_link. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303043159.323675-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 131 +++++++++++++++++++++++++++++++++++++---------- tools/lib/bpf/libbpf.h | 5 ++ tools/lib/bpf/libbpf.map | 5 ++ 3 files changed, 114 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 996162801f7a..f8c4042e5855 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6931,6 +6931,8 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_link { int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ bool disconnected; }; @@ -6960,26 +6962,109 @@ int bpf_link__destroy(struct bpf_link *link) err = link->detach(link); if (link->destroy) link->destroy(link); + if (link->pin_path) + free(link->pin_path); free(link); return err; } -struct bpf_link_fd { - struct bpf_link link; /* has to be at the top of struct */ - int fd; /* hook FD */ -}; +int bpf_link__fd(const struct bpf_link *link) +{ + return link->fd; +} + +const char *bpf_link__pin_path(const struct bpf_link *link) +{ + return link->pin_path; +} + +static int bpf_link__detach_fd(struct bpf_link *link) +{ + return close(link->fd); +} + +struct bpf_link *bpf_link__open(const char *path) +{ + struct bpf_link *link; + int fd; + + fd = bpf_obj_get(path); + if (fd < 0) { + fd = -errno; + pr_warn("failed to open link at %s: %d\n", path, fd); + return ERR_PTR(fd); + } + + link = calloc(1, sizeof(*link)); + if (!link) { + close(fd); + return ERR_PTR(-ENOMEM); + } + link->detach = &bpf_link__detach_fd; + link->fd = fd; + + link->pin_path = strdup(path); + if (!link->pin_path) { + bpf_link__destroy(link); + return ERR_PTR(-ENOMEM); + } + + return link; +} + +int bpf_link__pin(struct bpf_link *link, const char *path) +{ + int err; + + if (link->pin_path) + return -EBUSY; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); + if (err) + return err; + + link->pin_path = strdup(path); + if (!link->pin_path) + return -ENOMEM; + + if (bpf_obj_pin(link->fd, link->pin_path)) { + err = -errno; + zfree(&link->pin_path); + return err; + } + + pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); + return 0; +} + +int bpf_link__unpin(struct bpf_link *link) +{ + int err; + + if (!link->pin_path) + return -EINVAL; + + err = unlink(link->pin_path); + if (err != 0) + return -errno; + + pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); + zfree(&link->pin_path); + return 0; +} static int bpf_link__detach_perf_event(struct bpf_link *link) { - struct bpf_link_fd *l = (void *)link; int err; - err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); + err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0); if (err) err = -errno; - close(l->fd); + close(link->fd); return err; } @@ -6987,7 +7072,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, err; if (pfd < 0) { @@ -7005,7 +7090,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_perf_event; + link->detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -7024,7 +7109,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } - return (struct bpf_link *)link; + return link; } /* @@ -7312,18 +7397,11 @@ out: return link; } -static int bpf_link__detach_fd(struct bpf_link *link) -{ - struct bpf_link_fd *l = (void *)link; - - return close(l->fd); -} - struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, pfd; prog_fd = bpf_program__fd(prog); @@ -7336,7 +7414,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_fd; + link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -7348,7 +7426,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, @@ -7362,7 +7440,7 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, pfd; prog_fd = bpf_program__fd(prog); @@ -7375,7 +7453,7 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_fd; + link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { @@ -7409,10 +7487,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog) static int bpf_link__detach_struct_ops(struct bpf_link *link) { - struct bpf_link_fd *l = (void *)link; __u32 zero = 0; - if (bpf_map_delete_elem(l->fd, &zero)) + if (bpf_map_delete_elem(link->fd, &zero)) return -errno; return 0; @@ -7421,7 +7498,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link) struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) { struct bpf_struct_ops *st_ops; - struct bpf_link_fd *link; + struct bpf_link *link; __u32 i, zero = 0; int err; @@ -7453,10 +7530,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) return ERR_PTR(err); } - link->link.detach = bpf_link__detach_struct_ops; + link->detach = bpf_link__detach_struct_ops; link->fd = map->fd; - return (struct bpf_link *)link; + return link; } enum bpf_perf_event_ret diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 02fc58a21a7f..d38d7a629417 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -219,6 +219,11 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_link; +LIBBPF_API struct bpf_link *bpf_link__open(const char *path); +LIBBPF_API int bpf_link__fd(const struct bpf_link *link); +LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); +LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7b014c8cdece..5129283c0284 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -238,5 +238,10 @@ LIBBPF_0.0.7 { LIBBPF_0.0.8 { global: + bpf_link__fd; + bpf_link__open; + bpf_link__pin; + bpf_link__pin_path; + bpf_link__unpin; bpf_program__set_attach_target; } LIBBPF_0.0.7; -- cgit v1.2.3 From 6489b8e1e3cf0eb8639e96610002837c53a677cd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 20:31:59 -0800 Subject: selftests/bpf: Add link pinning selftests Add selftests validating link pinning/unpinning and associated BPF link (attachment) lifetime. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303043159.323675-4-andriin@fb.com --- .../selftests/bpf/prog_tests/link_pinning.c | 105 +++++++++++++++++++++ .../selftests/bpf/progs/test_link_pinning.c | 25 +++++ 2 files changed, 130 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/link_pinning.c create mode 100644 tools/testing/selftests/bpf/progs/test_link_pinning.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c new file mode 100644 index 000000000000..a743288cf384 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include + +#include "test_link_pinning.skel.h" + +static int duration = 0; + +void test_link_pinning_subtest(struct bpf_program *prog, + struct test_link_pinning__bss *bss) +{ + const char *link_pin_path = "/sys/fs/bpf/pinned_link_test"; + struct stat statbuf = {}; + struct bpf_link *link; + int err, i; + + link = bpf_program__attach(prog); + if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + bss->in = 1; + usleep(1); + CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out); + + /* pin link */ + err = bpf_link__pin(link, link_pin_path); + if (CHECK(err, "link_pin", "err: %d\n", err)) + goto cleanup; + + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1", + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); + + /* check that link was pinned */ + err = stat(link_pin_path, &statbuf); + if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss->in = 2; + usleep(1); + CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out); + + /* destroy link, pinned link should keep program attached */ + bpf_link__destroy(link); + link = NULL; + + bss->in = 3; + usleep(1); + CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out); + + /* re-open link from BPFFS */ + link = bpf_link__open(link_pin_path); + if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2", + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); + + /* unpin link from BPFFS, program still attached */ + err = bpf_link__unpin(link); + if (CHECK(err, "link_unpin", "err: %d\n", err)) + goto cleanup; + + /* still active, as we have FD open now */ + bss->in = 4; + usleep(1); + CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out); + + bpf_link__destroy(link); + link = NULL; + + /* Validate it's finally detached. + * Actual detachment might get delayed a bit, so there is no reliable + * way to validate it immediately here, let's count up for long enough + * and see if eventually output stops being updated + */ + for (i = 5; i < 10000; i++) { + bss->in = i; + usleep(1); + if (bss->out == i - 1) + break; + } + CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i); + +cleanup: + if (!IS_ERR(link)) + bpf_link__destroy(link); +} + +void test_link_pinning(void) +{ + struct test_link_pinning* skel; + + skel = test_link_pinning__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + if (test__start_subtest("pin_raw_tp")) + test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss); + if (test__start_subtest("pin_tp_btf")) + test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss); + + test_link_pinning__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_link_pinning.c b/tools/testing/selftests/bpf/progs/test_link_pinning.c new file mode 100644 index 000000000000..bbf2a5264dc0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_link_pinning.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include + +int in = 0; +int out = 0; + +SEC("raw_tp/sys_enter") +int raw_tp_prog(const void *ctx) +{ + out = in; + return 0; +} + +SEC("tp_btf/sys_enter") +int tp_btf_prog(const void *ctx) +{ + out = in; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 306b69dce9269df116bfa94319b1b827c22d5794 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Mar 2020 20:24:40 +0900 Subject: bootconfig: Support O= option Support O= option to build bootconfig tool in the other directory. As same as other tools, if you specify O=, bootconfig command is build under . Link: http://lkml.kernel.org/r/158323468033.10560.14661631369326294355.stgit@devnote2 Reported-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/Makefile | 27 +++++++++++++++++---------- tools/bootconfig/test-bootconfig.sh | 14 ++++++++++---- 2 files changed, 27 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile index a6146ac64458..da5975775337 100644 --- a/tools/bootconfig/Makefile +++ b/tools/bootconfig/Makefile @@ -1,23 +1,30 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for bootconfig command +include ../scripts/Makefile.include bindir ?= /usr/bin -HEADER = include/linux/bootconfig.h -CFLAGS = -Wall -g -I./include +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif -PROGS = bootconfig +LIBSRC = $(srctree)/lib/bootconfig.c $(srctree)/include/linux/bootconfig.h +CFLAGS = -Wall -g -I$(CURDIR)/include -all: $(PROGS) +ALL_TARGETS := bootconfig +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) -bootconfig: ../../lib/bootconfig.c main.c $(HEADER) +all: $(ALL_PROGRAMS) + +$(OUTPUT)bootconfig: main.c $(LIBSRC) $(CC) $(filter %.c,$^) $(CFLAGS) -o $@ -install: $(PROGS) - install bootconfig $(DESTDIR)$(bindir) +test: $(ALL_PROGRAMS) test-bootconfig.sh + ./test-bootconfig.sh $(OUTPUT) -test: bootconfig - ./test-bootconfig.sh +install: $(ALL_PROGRAMS) + install $(OUTPUT)bootconfig $(DESTDIR)$(bindir) clean: - $(RM) -f *.o bootconfig + $(RM) -f $(OUTPUT)*.o $(ALL_PROGRAMS) diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 1411f4c3454f..81b350ffd03f 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -3,9 +3,16 @@ echo "Boot config test script" -BOOTCONF=./bootconfig -INITRD=`mktemp initrd-XXXX` -TEMPCONF=`mktemp temp-XXXX.bconf` +if [ -d "$1" ]; then + TESTDIR=$1 +else + TESTDIR=. +fi +BOOTCONF=${TESTDIR}/bootconfig + +INITRD=`mktemp ${TESTDIR}/initrd-XXXX` +TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf` +OUTFILE=`mktemp ${TESTDIR}/tempout-XXXX` NG=0 cleanup() { @@ -65,7 +72,6 @@ new_size=$(stat -c %s $INITRD) xpass test $new_size -eq $initrd_size echo "No error messge while applying" -OUTFILE=`mktemp tempout-XXXX` dd if=/dev/zero of=$INITRD bs=4096 count=1 printf " \0\0\0 \0\0\0" >> $INITRD $BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1 -- cgit v1.2.3 From 89b74cac7834734d6b2733204c639917d3826083 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Mar 2020 20:24:50 +0900 Subject: tools/bootconfig: Show line and column in parse error Show line and column when we got a parse error in bootconfig tool. Current lib/bootconfig shows the parse error with byte offset, but that is not human readable. This makes xbc_init() not showing error message itself but able to pass the error message and position to caller, so that the caller can decode it and show the error message with line number and columns. With this patch, bootconfig tool shows an error with line:column as below. $ cat samples/bad-dotword.bconf # do not start keyword with . key { .word = 1 } $ ./bootconfig -a samples/bad-dotword.bconf initrd Parse Error: Invalid keyword at 3:3 Link: http://lkml.kernel.org/r/158323469002.10560.4023923847704522760.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 ++- init/main.c | 14 ++++++++++---- lib/bootconfig.c | 35 ++++++++++++++++++++++++++--------- tools/bootconfig/main.c | 35 +++++++++++++++++++++++++++++++---- 4 files changed, 69 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index d11e183fcb54..9903088891fa 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -216,7 +216,8 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node, } /* XBC node initializer */ -int __init xbc_init(char *buf); +int __init xbc_init(char *buf, const char **emsg, int *epos); + /* XBC cleanup data structures */ void __init xbc_destroy_all(void); diff --git a/init/main.c b/init/main.c index ee4947af823f..e488213857e2 100644 --- a/init/main.c +++ b/init/main.c @@ -353,6 +353,8 @@ static int __init bootconfig_params(char *param, char *val, static void __init setup_boot_config(const char *cmdline) { static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; + const char *msg; + int pos; u32 size, csum; char *data, *copy; u32 *hdr; @@ -400,10 +402,14 @@ static void __init setup_boot_config(const char *cmdline) memcpy(copy, data, size); copy[size] = '\0'; - ret = xbc_init(copy); - if (ret < 0) - pr_err("Failed to parse bootconfig\n"); - else { + ret = xbc_init(copy, &msg, &pos); + if (ret < 0) { + if (pos < 0) + pr_err("Failed to init bootconfig: %s.\n", msg); + else + pr_err("Failed to parse bootconfig: %s at %d.\n", + msg, pos); + } else { pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); diff --git a/lib/bootconfig.c b/lib/bootconfig.c index ec3ce7fd299f..912ef4921398 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -29,12 +29,14 @@ static int xbc_node_num __initdata; static char *xbc_data __initdata; static size_t xbc_data_size __initdata; static struct xbc_node *last_parent __initdata; +static const char *xbc_err_msg __initdata; +static int xbc_err_pos __initdata; static int __init xbc_parse_error(const char *msg, const char *p) { - int pos = p - xbc_data; + xbc_err_msg = msg; + xbc_err_pos = (int)(p - xbc_data); - pr_err("Parse error at pos %d: %s\n", pos, msg); return -EINVAL; } @@ -738,33 +740,44 @@ void __init xbc_destroy_all(void) /** * xbc_init() - Parse given XBC file and build XBC internal tree * @buf: boot config text + * @emsg: A pointer of const char * to store the error message + * @epos: A pointer of int to store the error position * * This parses the boot config text in @buf. @buf must be a * null terminated string and smaller than XBC_DATA_MAX. * Return the number of stored nodes (>0) if succeeded, or -errno * if there is any error. + * In error cases, @emsg will be updated with an error message and + * @epos will be updated with the error position which is the byte offset + * of @buf. If the error is not a parser error, @epos will be -1. */ -int __init xbc_init(char *buf) +int __init xbc_init(char *buf, const char **emsg, int *epos) { char *p, *q; int ret, c; + if (epos) + *epos = -1; + if (xbc_data) { - pr_err("Error: bootconfig is already initialized.\n"); + if (emsg) + *emsg = "Bootconfig is already initialized"; return -EBUSY; } ret = strlen(buf); if (ret > XBC_DATA_MAX - 1 || ret == 0) { - pr_err("Error: Config data is %s.\n", - ret ? "too big" : "empty"); + if (emsg) + *emsg = ret ? "Config data is too big" : + "Config data is empty"; return -ERANGE; } xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX, SMP_CACHE_BYTES); if (!xbc_nodes) { - pr_err("Failed to allocate memory for bootconfig nodes.\n"); + if (emsg) + *emsg = "Failed to allocate bootconfig nodes"; return -ENOMEM; } memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); @@ -814,9 +827,13 @@ int __init xbc_init(char *buf) if (!ret) ret = xbc_verify_tree(); - if (ret < 0) + if (ret < 0) { + if (epos) + *epos = xbc_err_pos; + if (emsg) + *emsg = xbc_err_msg; xbc_destroy_all(); - else + } else ret = xbc_node_num; return ret; diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index a9b97814d1a9..16b9a420e6fd 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -130,6 +130,7 @@ int load_xbc_from_initrd(int fd, char **buf) int ret; u32 size = 0, csum = 0, rcsum; char magic[BOOTCONFIG_MAGIC_LEN]; + const char *msg; ret = fstat(fd, &stat); if (ret < 0) @@ -182,10 +183,12 @@ int load_xbc_from_initrd(int fd, char **buf) return -EINVAL; } - ret = xbc_init(*buf); + ret = xbc_init(*buf, &msg, NULL); /* Wrong data */ - if (ret < 0) + if (ret < 0) { + pr_err("parse error: %s.\n", msg); return ret; + } return size; } @@ -244,11 +247,34 @@ int delete_xbc(const char *path) return ret; } +static void show_xbc_error(const char *data, const char *msg, int pos) +{ + int lin = 1, col, i; + + if (pos < 0) { + pr_err("Error: %s.\n", msg); + return; + } + + /* Note that pos starts from 0 but lin and col should start from 1. */ + col = pos + 1; + for (i = 0; i < pos; i++) { + if (data[i] == '\n') { + lin++; + col = pos - i; + } + } + pr_err("Parse Error: %s at %d:%d\n", msg, lin, col); + +} + int apply_xbc(const char *path, const char *xbc_path) { u32 size, csum; char *buf, *data; int ret, fd; + const char *msg; + int pos; ret = load_xbc_file(xbc_path, &buf); if (ret < 0) { @@ -267,11 +293,12 @@ int apply_xbc(const char *path, const char *xbc_path) *(u32 *)(data + size + 4) = csum; /* Check the data format */ - ret = xbc_init(buf); + ret = xbc_init(buf, &msg, &pos); if (ret < 0) { - pr_err("Failed to parse %s: %d\n", xbc_path, ret); + show_xbc_error(data, msg, pos); free(data); free(buf); + return ret; } printf("Apply %s to %s\n", xbc_path, path); -- cgit v1.2.3 From b0ac4941aa2a249bbb06de86110cd9e2e53980ca Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 3 Mar 2020 15:05:02 -0500 Subject: bpf: Sync uapi bpf.h to tools/ sync tools/include/uapi/linux/bpf.h to match include/uapi/linux/bpf.h Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303200503.226217-3-willemdebruijn.kernel@gmail.com --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 906e9f2752db..7c689f4552dd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3176,6 +3176,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 62511ceadf6e217f09d4ab1f9198d2bb5cc70e7c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 3 Mar 2020 15:05:03 -0500 Subject: selftests/bpf: Test new __sk_buff field gso_size Analogous to the gso_segs selftests introduced in commit d9ff286a0f59 ("bpf: allow BPF programs access skb_shared_info->gso_segs field"). Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303200503.226217-4-willemdebruijn.kernel@gmail.com --- tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 1 + tools/testing/selftests/bpf/progs/test_skb_ctx.c | 2 + tools/testing/selftests/bpf/verifier/ctx_skb.c | 47 ++++++++++++++++++++++++ 3 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index c6d6b685a946..4538bd08203f 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -14,6 +14,7 @@ void test_skb_ctx(void) .wire_len = 100, .gso_segs = 8, .mark = 9, + .gso_size = 10, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 202de3938494..b02ea589ce7e 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -23,6 +23,8 @@ int process(struct __sk_buff *skb) return 1; if (skb->gso_segs != 8) return 1; + if (skb->gso_size != 10) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index d438193804b2..2e16b8e268f2 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -1010,6 +1010,53 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "read gso_size from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_size from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "write gso_size from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=176 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_size from CLS", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, { "check wire_len is not readable by sockets", .insns = { -- cgit v1.2.3 From 320a36063e1441210106aa33997ad3770d4c86b4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 3 Mar 2020 10:08:00 -0800 Subject: libbpf: Fix handling of optional field_name in btf_dump__emit_type_decl Internal functions, used by btf_dump__emit_type_decl(), assume field_name is never going to be NULL. Ensure it's always the case. Fixes: 9f81654eebe8 ("libbpf: Expose BTF-to-C type declaration emitting API") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303180800.3303471-1-andriin@fb.com --- tools/lib/bpf/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index bd09ed1710f1..dc451e4de5ad 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) return -EINVAL; - fname = OPTS_GET(opts, field_name, NULL); + fname = OPTS_GET(opts, field_name, ""); lvl = OPTS_GET(opts, indent_level, 0); btf_dump_emit_type_decl(d, id, fname, lvl); return 0; -- cgit v1.2.3 From 84ea1f8541721c1852bc95f4d50a603c661eabc6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Mar 2020 19:56:02 +0200 Subject: selftests: forwarding: lib: Add tc_rule_handle_stats_get() The function tc_rule_stats_get() fetches a given statistic of a TC rule given the rule preference. Another common way to reference a rule is using its handle. Introduce a dual to the aforementioned function that gets a statistic given rule handle. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 83fd15e3e545..de57e8887a7c 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -626,6 +626,17 @@ tc_rule_stats_get() | jq ".[1].options.actions[].stats$selector" } +tc_rule_handle_stats_get() +{ + local id=$1; shift + local handle=$1; shift + local selector=${1:-.packets}; shift + + tc -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats$selector" +} + ethtool_stats_get() { local dev=$1; shift -- cgit v1.2.3 From 844f0556546900a658b241e5aea7b8dc7cb3ff72 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Mar 2020 19:56:03 +0200 Subject: selftests: forwarding: Convert until_counter_is() to take expression until_counter_is() currently takes as an argument a number and the condition holds when the current counter value is >= that number. Make the function more generic by taking a partial expression instead of just the number. Convert the two existing users. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh | 6 +++--- tools/testing/selftests/net/forwarding/lib.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index ebf7752f6d93..8f833678ac4d 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -351,7 +351,7 @@ build_backlog() local i=0 while :; do - local cur=$(busywait 1100 until_counter_is $((cur + 1)) \ + local cur=$(busywait 1100 until_counter_is "> $cur" \ get_qdisc_backlog $vlan) local diff=$((size - cur)) local pkts=$(((diff + 7999) / 8000)) @@ -481,14 +481,14 @@ do_mc_backlog_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc - qbl=$(busywait 5000 until_counter_is 500000 \ + qbl=$(busywait 5000 until_counter_is ">= 500000" \ get_qdisc_backlog $vlan) check_err $? "Could not build MC backlog" # Verify that we actually see the backlog on BUM TC. Do a busywait as # well, performance blips might cause false fail. local ebl - ebl=$(busywait 5000 until_counter_is 500000 \ + ebl=$(busywait 5000 until_counter_is ">= 500000" \ get_mc_transmit_queue $vlan) check_err $? "MC backlog reported by qdisc not visible in ethtool" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index de57e8887a7c..7ecce65d08f9 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -277,11 +277,11 @@ wait_for_offload() until_counter_is() { - local value=$1; shift + local expr=$1; shift local current=$("$@") echo $((current)) - ((current >= value)) + ((current $expr)) } busywait_for_counter() @@ -290,7 +290,7 @@ busywait_for_counter() local delta=$1; shift local base=$("$@") - busywait "$timeout" until_counter_is $((base + delta)) "$@" + busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } setup_wait_dev() -- cgit v1.2.3 From 47b0e096a938b020742b4f12afdc17aff21af6ca Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Mar 2020 19:56:04 +0200 Subject: selftests: forwarding: tc_common: Convert to use busywait A function busywait() was recently added based on the logic in __tc_check_packets(). Convert the code in tc_common to use the new function. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- .../testing/selftests/net/forwarding/tc_common.sh | 32 +++------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 64f652633585..0e18e8be6e2a 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -6,39 +6,14 @@ CHECK_TC="yes" # Can be overridden by the configuration file. See lib.sh TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms -__tc_check_packets() -{ - local id=$1 - local handle=$2 - local count=$3 - local operator=$4 - - start_time="$(date -u +%s%3N)" - while true - do - cmd_jq "tc -j -s filter show $id" \ - ".[] | select(.options.handle == $handle) | \ - select(.options.actions[0].stats.packets $operator $count)" \ - &> /dev/null - ret=$? - if [[ $ret -eq 0 ]]; then - return $ret - fi - current_time="$(date -u +%s%3N)" - diff=$(expr $current_time - $start_time) - if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then - return 1 - fi - done -} - tc_check_packets() { local id=$1 local handle=$2 local count=$3 - __tc_check_packets "$id" "$handle" "$count" "==" + busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null } tc_check_packets_hitting() @@ -46,5 +21,6 @@ tc_check_packets_hitting() local id=$1 local handle=$2 - __tc_check_packets "$id" "$handle" 0 ">" + busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null } -- cgit v1.2.3 From 7b522ba27636ccd86beb8afe71c6ee731cf7b718 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Mar 2020 19:56:05 +0200 Subject: selftests: mlxsw: qos_defprio: Use until_counter_is Instead of hand-coding the busywait() predicate, use the until_counter_is() introduced recently. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/qos_defprio.sh | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh index eff6393ce974..71066bc4b886 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh @@ -114,23 +114,12 @@ ping_ipv4() ping_test $h1 192.0.2.2 } -wait_for_packets() -{ - local t0=$1; shift - local prio_observe=$1; shift - - local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) - local delta=$((t1 - t0)) - echo $delta - ((delta >= 10)) -} - __test_defprio() { local prio_install=$1; shift local prio_observe=$1; shift - local delta local key + local t1 local i RET=0 @@ -139,9 +128,10 @@ __test_defprio() local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) mausezahn -q $h1 -d 100m -c 10 -t arp reply - delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe) + t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \ + ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) - check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta." + check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))." log_test "Default priority $prio_install/$prio_observe" defprio_uninstall $swp1 $prio_install -- cgit v1.2.3 From c395c3553d6870541f1c283479aea2a6f26364d5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sun, 23 Feb 2020 11:34:49 -0800 Subject: perf diff: Fix undefined string comparison spotted by clang's -Wstring-compare clang warns: util/block-info.c:298:18: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:51: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:18: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:51: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/map.c:434:15: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if (srcline != SRCLINE_UNKNOWN) ^ ~~~~~~~~~~~~~~~ Reviewer Notes: Looks good to me. Some more context: https://clang.llvm.org/docs/DiagnosticsReference.html#wstring-compare The spec says: J.1 Unspecified behavior The following are unspecified: .. Whether two string literals result in distinct arrays (6.4.5). Signed-off-by: Nick Desaulniers Reviewed-by: Ian Rogers Cc: Alexander Shishkin Cc: Changbin Du Cc: Jin Yao Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/900 Link: http://lore.kernel.org/lkml/20200223193456.25291-1-nick.desaulniers@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 3 ++- tools/perf/util/block-info.c | 3 ++- tools/perf/util/map.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8b6ae557d8b..c03c36fde7e2 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1312,7 +1312,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", start_line, end_line, block_he->diff.cycles); } else { diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index c4b030bf6ec2..fbbb6d640dad 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -295,7 +295,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s]", start_line, end_line); } else { diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index a08ca276098e..95428511300d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -431,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { char *srcline = map__srcline(map, addr, NULL); - if (srcline != SRCLINE_UNKNOWN) + if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); } -- cgit v1.2.3 From 7982a898515064ba180d958bfc89ed22d13905ee Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 27 Jan 2020 19:00:31 +0900 Subject: tools lib api fs: Move cgroupsfs_find_mountpoint() Move it from tools/perf/util/cgroup.c as it can be used by other places. Note that cgroup filesystem is different from others since it's usually mounted separately (in v1) for each subsystem. I just copied the code with a little modification to pass a name of subsystem. Suggested-by: Jiri Olsa Signed-off-by: Namhyung Kim Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200127100031.1368732-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/Build | 1 + tools/lib/api/fs/cgroup.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/api/fs/fs.h | 2 ++ tools/perf/util/cgroup.c | 63 ++------------------------------------------ 4 files changed, 72 insertions(+), 61 deletions(-) create mode 100644 tools/lib/api/fs/cgroup.c (limited to 'tools') diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build index f4ed9629ae85..0f75b28654de 100644 --- a/tools/lib/api/fs/Build +++ b/tools/lib/api/fs/Build @@ -1,2 +1,3 @@ libapi-y += fs.o libapi-y += tracing_path.o +libapi-y += cgroup.o diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c new file mode 100644 index 000000000000..889a6eb4aaca --- /dev/null +++ b/tools/lib/api/fs/cgroup.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include "fs.h" + +int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) +{ + FILE *fp; + char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; + char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; + char *token, *saved_ptr = NULL; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return -1; + + /* + * in order to handle split hierarchy, we need to scan /proc/mounts + * and inspect every cgroupfs mount point to find one that has + * perf_event subsystem + */ + path_v1[0] = '\0'; + path_v2[0] = '\0'; + + while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" + __stringify(PATH_MAX)"s %*d %*d\n", + mountpoint, type, tokens) == 3) { + + if (!path_v1[0] && !strcmp(type, "cgroup")) { + + token = strtok_r(tokens, ",", &saved_ptr); + + while (token != NULL) { + if (subsys && !strcmp(token, subsys)) { + strcpy(path_v1, mountpoint); + break; + } + token = strtok_r(NULL, ",", &saved_ptr); + } + } + + if (!path_v2[0] && !strcmp(type, "cgroup2")) + strcpy(path_v2, mountpoint); + + if (path_v1[0] && path_v2[0]) + break; + } + fclose(fp); + + if (path_v1[0]) + path = path_v1; + else if (path_v2[0]) + path = path_v2; + else + return -1; + + if (strlen(path) < maxlen) { + strcpy(buf, path); + return 0; + } + return -1; +} diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 92d03b8396b1..936edb95e1f3 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -28,6 +28,8 @@ FS(bpf_fs) #undef FS +int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys); + int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); int filename__read_xll(const char *filename, unsigned long long *value); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 4881d4af3381..5bc9d3b01bd9 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -3,75 +3,16 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" -#include #include #include #include #include #include #include +#include int nr_cgroups; -static int -cgroupfs_find_mountpoint(char *buf, size_t maxlen) -{ - FILE *fp; - char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; - char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; - char *token, *saved_ptr = NULL; - - fp = fopen("/proc/mounts", "r"); - if (!fp) - return -1; - - /* - * in order to handle split hierarchy, we need to scan /proc/mounts - * and inspect every cgroupfs mount point to find one that has - * perf_event subsystem - */ - path_v1[0] = '\0'; - path_v2[0] = '\0'; - - while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" - __stringify(PATH_MAX)"s %*d %*d\n", - mountpoint, type, tokens) == 3) { - - if (!path_v1[0] && !strcmp(type, "cgroup")) { - - token = strtok_r(tokens, ",", &saved_ptr); - - while (token != NULL) { - if (!strcmp(token, "perf_event")) { - strcpy(path_v1, mountpoint); - break; - } - token = strtok_r(NULL, ",", &saved_ptr); - } - } - - if (!path_v2[0] && !strcmp(type, "cgroup2")) - strcpy(path_v2, mountpoint); - - if (path_v1[0] && path_v2[0]) - break; - } - fclose(fp); - - if (path_v1[0]) - path = path_v1; - else if (path_v2[0]) - path = path_v2; - else - return -1; - - if (strlen(path) < maxlen) { - strcpy(buf, path); - return 0; - } - return -1; -} - static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; @@ -79,7 +20,7 @@ static int open_cgroup(const char *name) int fd; - if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event")) return -1; scnprintf(path, PATH_MAX, "%s/%s", mnt, name); -- cgit v1.2.3 From 1af62ce61cd80a25590d5abeccfb5c3a666527dd Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 14 Feb 2020 16:04:52 +0800 Subject: perf stat: Show percore counts in per CPU output We have supported the event modifier "percore" which sums up the event counts for all hardware threads in a core and show the counts per core. For example, # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1 Performance counter stats for 'system wide': S0-D0-C0 395,072 cpu/event=cpu-cycles,percore/ S0-D0-C1 851,248 cpu/event=cpu-cycles,percore/ S0-D0-C2 954,226 cpu/event=cpu-cycles,percore/ S0-D0-C3 1,233,659 cpu/event=cpu-cycles,percore/ This patch provides a new option "--percore-show-thread". It is used with event modifier "percore" together to sum up the event counts for all hardware threads in a core but show the counts per hardware thread. This is essentially a replacement for the any bit (which is gone in Icelake). Per core counts are useful for some formulas, e.g. CoreIPC. The original percore version was inconvenient to post process. This variant matches the output of the any bit. With this patch, for example, # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1 Performance counter stats for 'system wide': CPU0 2,453,061 cpu/event=cpu-cycles,percore/ CPU1 1,823,921 cpu/event=cpu-cycles,percore/ CPU2 1,383,166 cpu/event=cpu-cycles,percore/ CPU3 1,102,652 cpu/event=cpu-cycles,percore/ CPU4 2,453,061 cpu/event=cpu-cycles,percore/ CPU5 1,823,921 cpu/event=cpu-cycles,percore/ CPU6 1,383,166 cpu/event=cpu-cycles,percore/ CPU7 1,102,652 cpu/event=cpu-cycles,percore/ We can see counts are duplicated in CPU pairs (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7). The interval mode also works. For example, # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -I 1000 # time CPU counts unit events 1.000425421 CPU0 925,032 cpu/event=cpu-cycles,percore/ 1.000425421 CPU1 430,202 cpu/event=cpu-cycles,percore/ 1.000425421 CPU2 436,843 cpu/event=cpu-cycles,percore/ 1.000425421 CPU3 1,192,504 cpu/event=cpu-cycles,percore/ 1.000425421 CPU4 925,032 cpu/event=cpu-cycles,percore/ 1.000425421 CPU5 430,202 cpu/event=cpu-cycles,percore/ 1.000425421 CPU6 436,843 cpu/event=cpu-cycles,percore/ 1.000425421 CPU7 1,192,504 cpu/event=cpu-cycles,percore/ If we offline CPU5, the result is: # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1 Performance counter stats for 'system wide': CPU0 2,752,148 cpu/event=cpu-cycles,percore/ CPU1 1,009,312 cpu/event=cpu-cycles,percore/ CPU2 2,784,072 cpu/event=cpu-cycles,percore/ CPU3 2,427,922 cpu/event=cpu-cycles,percore/ CPU4 2,752,148 cpu/event=cpu-cycles,percore/ CPU6 2,784,072 cpu/event=cpu-cycles,percore/ CPU7 2,427,922 cpu/event=cpu-cycles,percore/ 1.001416041 seconds time elapsed v4: --- Ravi Bangoria reports an issue in v3. Once we offline a CPU, the output is not correct. The issue is we should use the cpu idx in print_percore_thread rather than using the cpu value. v3: --- 1. Fix the interval mode output error 2. Use cpu value (not cpu index) in config->aggr_get_id(). 3. Refine the code according to Jiri's comments. v2: --- Add the explanation in change log. This is essentially a replacement for the any bit. No code change. Signed-off-by: Jin Yao Tested-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200214080452.26402-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 9 +++++++++ tools/perf/builtin-stat.c | 4 ++++ tools/perf/util/stat-display.c | 33 ++++++++++++++++++++++++++++----- tools/perf/util/stat.h | 1 + 4 files changed, 42 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9431b8066fb4..4d56586b2fb9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -334,6 +334,15 @@ Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--percore-show-thread:: +The event modifier "percore" has supported to sum up the event counts +for all hardware threads in a core and show the counts per core. + +This option with event modifier "percore" enabled also sums up the event +counts for all hardware threads in a core but show the sum counts per +hardware thread. This is essentially a replacement for the any bit and +convenient for post processing. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a098c2ebf4ea..ec053dc1e35c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -929,6 +929,10 @@ static struct option stat_options[] = { OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, "Configure all used events to run in user space.", PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread, + "Use with 'percore' event qualifier to show the event " + "counts of one hardware thread by sum up total hardware " + "threads of same physical core"), OPT_END() }; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bc31fccc0057..d89cb0da90f8 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config, config->csv_sep); break; case AGGR_NONE: - if (evsel->percore) { + if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), cpu_map__id_to_die(id), @@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first) + bool *first, int cpu) { struct aggr_data ad; FILE *output = config->output; @@ -654,7 +654,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, + printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); @@ -687,7 +687,7 @@ static void print_aggr(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first); + &first, -1); } if (metric_only) fputc('\n', output); @@ -1146,6 +1146,26 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } +static void print_percore_thread(struct perf_stat_config *config, + struct evsel *counter, char *prefix) +{ + int s, s2, id; + bool first = true; + + for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { + s2 = config->aggr_get_id(config, evsel__cpus(counter), i); + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; + if (s2 == id) + break; + } + + print_counter_aggrdata(config, counter, s, + prefix, false, + &first, i); + } +} + static void print_percore(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1157,13 +1177,16 @@ static void print_percore(struct perf_stat_config *config, if (!(config->aggr_map || config->aggr_get_id)) return; + if (config->percore_show_thread) + return print_percore_thread(config, counter, prefix); + for (s = 0; s < config->aggr_map->nr; s++) { if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first); + &first, -1); } if (metric_only) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fb990efa54a8..b4fdfaa7f2c0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -109,6 +109,7 @@ struct perf_stat_config { bool walltime_run_table; bool all_kernel; bool all_user; + bool percore_show_thread; FILE *output; unsigned int interval; unsigned int timeout; -- cgit v1.2.3 From 357a5d24c4715d454f8acd6796e2adaaef0c48ed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Feb 2020 11:43:53 -0300 Subject: perf llvm: Add debug hint message about missing kernel-devel package To help in debugging, add this extra message: detect_kbuild_dir: Couldn't find "/lib/modules/5.4.20-200.fc31.x86_64/build/include/generated/autoconf.h", missing kernel-devel package?. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index b5af680fc667..dbdffb6673fe 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir) return -ENOMEM; return 0; } + pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", + __func__, autoconf_path); free(autoconf_path); return -ENOENT; } -- cgit v1.2.3 From dabce16bd2926b82ef1bef70acd8b24828e9448b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 4 Feb 2020 10:22:33 +0530 Subject: perf annotate: Get rid of annotation->nr_jumps The 'nr_jumps' field in 'struct annotation' is not used since it's inception in commit 2402e4a936a0 ("perf annotate browser: Show 'jumpy' functions"). Get rid of it. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Ian Rogers Cc: Jin Yao Cc: Namhyung Kim Cc: Song Liu Link: http://lore.kernel.org/lkml/20200204045233.474937-7-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 -- tools/perf/util/annotate.h | 1 - 2 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0ea95be84b3b..f1ea0d61eb5b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2611,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (++al->jump_sources > notes->max_jump_sources) notes->max_jump_sources = al->jump_sources; - - ++notes->nr_jumps; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 001258601a37..07c775938d46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -279,7 +279,6 @@ struct annotation { struct annotation_options *options; struct annotation_line **offsets; int nr_events; - int nr_jumps; int max_jump_sources; int nr_entries; int nr_asm_entries; -- cgit v1.2.3 From 76ce02651dabee19df018097db4485800e05e177 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Thu, 27 Feb 2020 20:44:24 +0100 Subject: libperf: Add counting example Current libperf man pages mention file counting.c "coming with libperf package", however, the file is missing. Add the file then. Fixes: 81de3bf37a8b ("libperf: Add man pages") Signed-off-by: Michael Petlan Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra LPU-Reference: 20200227194424.28210-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/examples/counting.c | 83 ++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tools/lib/perf/Documentation/examples/counting.c (limited to 'tools') diff --git a/tools/lib/perf/Documentation/examples/counting.c b/tools/lib/perf/Documentation/examples/counting.c new file mode 100644 index 000000000000..6085693571ef --- /dev/null +++ b/tools/lib/perf/Documentation/examples/counting.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + +int main(int argc, char **argv) +{ + int count = 100000, err = 0; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_thread_map *threads; + struct perf_counts_values counts; + + struct perf_event_attr attr1 = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + struct perf_event_attr attr2 = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + + libperf_init(libperf_print); + threads = perf_thread_map__new_dummy(); + if (!threads) { + fprintf(stderr, "failed to create threads\n"); + return -1; + } + perf_thread_map__set_pid(threads, 0, 0); + evlist = perf_evlist__new(); + if (!evlist) { + fprintf(stderr, "failed to create evlist\n"); + goto out_threads; + } + evsel = perf_evsel__new(&attr1); + if (!evsel) { + fprintf(stderr, "failed to create evsel1\n"); + goto out_evlist; + } + perf_evlist__add(evlist, evsel); + evsel = perf_evsel__new(&attr2); + if (!evsel) { + fprintf(stderr, "failed to create evsel2\n"); + goto out_evlist; + } + perf_evlist__add(evlist, evsel); + perf_evlist__set_maps(evlist, NULL, threads); + err = perf_evlist__open(evlist); + if (err) { + fprintf(stderr, "failed to open evsel\n"); + goto out_evlist; + } + perf_evlist__enable(evlist); + while (count--); + perf_evlist__disable(evlist); + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts); + fprintf(stdout, "count %llu, enabled %llu, run %llu\n", + counts.val, counts.ena, counts.run); + } + perf_evlist__close(evlist); +out_evlist: + perf_evlist__delete(evlist); +out_threads: + perf_thread_map__put(threads); + return err; +} -- cgit v1.2.3 From 401d61cbd4d4c6b44b2a895ab4073c7f214c096b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 3 Mar 2020 23:18:52 -0500 Subject: tools lib traceevent: Remove extra '\n' in print_event_time() If the precision of print_event_time() is zero or greater than the timestamp, it uses a different format. But that format had an extra new line at the end, and caused the output to not look right: cpus=2 sleep-3946 [001]111264306005 : function: inotify_inode_queue_event sleep-3946 [001]111264307158 : function: __fsnotify_parent sleep-3946 [001]111264307637 : function: inotify_dentry_parent_queue_event sleep-3946 [001]111264307989 : function: fsnotify sleep-3946 [001]111264308401 : function: audit_syscall_exit Fixes: 38847db9740a ("libtraceevent, perf tools: Changes in tep_print_event_* APIs") Signed-off-by: Steven Rostedt (VMware) Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200303231852.6ab6882f@oasis.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index beaa8b8c08ff..e1bd2a93c6db 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5541,7 +5541,7 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s, if (p10 > 1 && p10 < time) trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); else - trace_seq_printf(s, "%12llu\n", time); + trace_seq_printf(s, "%12llu", time); } struct print_event_type { -- cgit v1.2.3 From 1aae4bdd787998ea331a56f3db9d8595790fe2f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 16:32:31 -0800 Subject: bpf: Switch BPF UAPI #define constants used from BPF program side to enums Switch BPF UAPI constants, previously defined as #define macro, to anonymous enum values. This preserves constants values and behavior in expressions, but has added advantaged of being captured as part of DWARF and, subsequently, BTF type info. Which, in turn, greatly improves usefulness of generated vmlinux.h for BPF applications, as it will not require BPF users to copy/paste various flags and constants, which are frequently used with BPF helpers. Only those constants that are used/useful from BPF program side are converted. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200303003233.3496043-2-andriin@fb.com --- include/uapi/linux/bpf.h | 175 +++++++++++++++++++++++++--------------- tools/include/uapi/linux/bpf.h | 177 +++++++++++++++++++++++++---------------- 2 files changed, 219 insertions(+), 133 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 180337fae97e..d6b33ea27bcc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -325,44 +325,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +393,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -3045,72 +3049,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3529,13 +3561,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3614,8 +3647,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3623,12 +3658,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3644,8 +3683,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3717,9 +3758,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7c689f4552dd..d6b33ea27bcc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { @@ -325,44 +325,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +393,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -3045,72 +3049,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3529,13 +3561,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3614,8 +3647,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3623,12 +3658,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3644,8 +3683,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3717,9 +3758,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; -- cgit v1.2.3 From 7cb30aaab3f277aa88e20a008faf57e0fb1119ec Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 16:32:32 -0800 Subject: libbpf: Assume unsigned values for BTF_KIND_ENUM Currently, BTF_KIND_ENUM type doesn't record whether enum values should be interpreted as signed or unsigned. In Linux, most enums are unsigned, though, so interpreting them as unsigned matches real world better. Change btf_dump test case to test maximum 32-bit value, instead of negative value. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200303003233.3496043-3-andriin@fb.com --- tools/lib/bpf/btf_dump.c | 8 ++++---- tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index dc451e4de5ad..0c28ee82834b 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, /* enumerators share namespace with typedef idents */ dup_cnt = btf_dump_name_dups(d, d->ident_names, name); if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %d,", + btf_dump_printf(d, "\n%s%s___%zu = %u,", pfx(lvl + 1), name, dup_cnt, - (__s32)v->val); + (__u32)v->val); } else { - btf_dump_printf(d, "\n%s%s = %d,", + btf_dump_printf(d, "\n%s%s = %u,", pfx(lvl + 1), name, - (__s32)v->val); + (__u32)v->val); } } btf_dump_printf(d, "\n%s}", pfx(lvl)); diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index d4a02fe44a12..31975c96e2c9 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -13,7 +13,7 @@ enum e1 { enum e2 { C = 100, - D = -100, + D = 4294967295, E = 0, }; -- cgit v1.2.3 From 367d82f17eff53cec531920eb0de9973fac89ec1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 16:32:33 -0800 Subject: tools/runqslower: Drop copy/pasted BPF_F_CURRENT_CPU definiton With BPF_F_CURRENT_CPU being an enum, it is now captured in vmlinux.h and is readily usable by runqslower. So drop local copy/pasted definition in favor of the one coming from vmlinux.h. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200303003233.3496043-4-andriin@fb.com --- tools/bpf/runqslower/runqslower.bpf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 48a39f72fadf..0ba501305bad 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -6,9 +6,6 @@ #define TASK_RUNNING 0 -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK - const volatile __u64 min_us = 0; const volatile pid_t targ_pid = 0; -- cgit v1.2.3 From cc6fa771024ffdb428bdf25a94309cf21e8ef1b9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Mar 2020 10:43:36 -0800 Subject: selftests/bpf: Support out-of-tree vmlinux builds for VMLINUX_BTF Add detection of out-of-tree built vmlinux image for the purpose of VMLINUX_BTF detection. According to Documentation/kbuild/kbuild.rst, O takes precedence over KBUILD_OUTPUT. Also ensure ~/path/to/build/dir also works by relying on wildcard's resolution first, but then applying $(abspath) at the end to also handle O=../../whatever cases. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200304184336.165766-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d7f5df33f04..ee4ad34adb4a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -129,10 +129,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) +VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF:= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ -- cgit v1.2.3 From ae24082331d9bbaae283aafbe930a8f0eb85605a Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:49 +0100 Subject: bpf: Introduce BPF_MODIFY_RETURN When multiple programs are attached, each program receives the return value from the previous program on the stack and the last program provides the return value to the attached function. The fmod_ret bpf programs are run after the fentry programs and before the fexit programs. The original function is only called if all the fmod_ret programs return 0 to avoid any unintended side-effects. The success value, i.e. 0 is not currently configurable but can be made so where user-space can specify it at load time. For example: int func_to_be_attached(int a, int b) { <--- do_fentry do_fmod_ret: if (ret != 0) goto do_fexit; original_function: } <--- do_fexit The fmod_ret program attached to this function can be defined as: SEC("fmod_ret/func_to_be_attached") int BPF_PROG(func_name, int a, int b, int ret) { // This will skip the original function logic. return 1; } The first fmod_ret program is passed 0 in its return argument. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-4-kpsingh@chromium.org --- arch/x86/net/bpf_jit_comp.c | 130 +++++++++++++++++++++++++++++++++++++---- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/btf.c | 3 +- kernel/bpf/syscall.c | 1 + kernel/bpf/trampoline.c | 5 +- kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 1 + 8 files changed, 130 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d6349e930b06..b1fd000feb89 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1362,7 +1362,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size) + struct bpf_prog *p, int stack_size, bool mod_ret) { u8 *prog = *pprog; int cnt = 0; @@ -1383,6 +1383,13 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; + /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + * of the previous call which is then passed on the stack to + * the next BPF program. + */ + if (mod_ret) + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); @@ -1442,6 +1449,23 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) return 0; } +static int emit_mod_ret_check_imm8(u8 **pprog, int value) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (!is_imm8(value)) + return -EINVAL; + + if (value == 0) + EMIT2(0x85, add_2reg(0xC0, BPF_REG_0, BPF_REG_0)); + else + EMIT3(0x83, add_1reg(0xF8, BPF_REG_0), value); + + *pprog = prog; + return 0; +} + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_progs *tp, int stack_size) { @@ -1449,9 +1473,49 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + return -EINVAL; + } + *pprog = prog; + return 0; +} + +static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, + struct bpf_tramp_progs *tp, int stack_size, + u8 **branches) +{ + u8 *prog = *pprog; + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit_mov_imm32(&prog, false, BPF_REG_0, 0); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + for (i = 0; i < tp->nr_progs; i++) { + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) return -EINVAL; + + /* Generate a branch: + * + * if (ret != 0) + * goto do_fexit; + * + * If needed this can be extended to any integer value which can + * be passed by user-space when the program is loaded. + */ + if (emit_mod_ret_check_imm8(&prog, 0)) + return -EINVAL; + + /* Save the location of the branch and Generate 6 nops + * (4 bytes for an offset and 2 bytes for the jump) These nops + * are replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = prog; + emit_nops(&prog, 4 + 2); } + *pprog = prog; return 0; } @@ -1521,10 +1585,12 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, struct bpf_tramp_progs *tprogs, void *orig_call) { - int cnt = 0, nr_args = m->nr_args; + int ret, i, cnt = 0, nr_args = m->nr_args; int stack_size = nr_args * 8; struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + u8 **branches = NULL; u8 *prog; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ @@ -1557,24 +1623,60 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, if (invoke_bpf(m, &prog, fentry, stack_size)) return -EINVAL; + if (fmod_ret->nr_progs) { + branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, + branches)) { + ret = -EINVAL; + goto cleanup; + } + } + if (flags & BPF_TRAMP_F_CALL_ORIG) { - if (fentry->nr_progs) + if (fentry->nr_progs || fmod_ret->nr_progs) restore_regs(m, &prog, nr_args, stack_size); /* call original function */ - if (emit_call(&prog, orig_call, prog)) - return -EINVAL; + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); } + if (fmod_ret->nr_progs) { + /* From Intel 64 and IA-32 Architectures Optimization + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler + * Coding Rule 11: All branch targets should be 16-byte + * aligned. + */ + emit_align(&prog, 16); + /* Update the branches saved in invoke_bpf_mod_ret with the + * aligned address of do_fexit. + */ + for (i = 0; i < fmod_ret->nr_progs; i++) + emit_cond_near_jump(&branches[i], prog, branches[i], + X86_JNE); + } + if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) - return -EINVAL; + if (invoke_bpf(m, &prog, fexit, stack_size)) { + ret = -EINVAL; + goto cleanup; + } if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_regs(m, &prog, nr_args, stack_size); + /* This needs to be done regardless. If there were fmod_ret programs, + * the return value is only updated on the stack and still needs to be + * restored to R0. + */ if (flags & BPF_TRAMP_F_CALL_ORIG) /* restore original return value back into RAX */ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); @@ -1586,9 +1688,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ EMIT1(0xC3); /* ret */ /* Make sure the trampoline generation logic doesn't overflow */ - if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) - return -EFAULT; - return prog - (u8 *)image; + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; + } + ret = prog - (u8 *)image; + +cleanup: + kfree(branches); + return ret; } static int emit_fallback_jump(u8 **pprog) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98ec10b23dbb..f748b31e5888 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, + BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d6b33ea27bcc..40b2d9476268 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 787140095e58..30841fb8b3c0 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3710,7 +3710,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, nr_args--; } - if (prog->expected_attach_type == BPF_TRACE_FEXIT && + if ((prog->expected_attach_type == BPF_TRACE_FEXIT || + prog->expected_attach_type == BPF_MODIFY_RETURN) && arg == nr_args) { if (!t) /* Default prog with 5 args. 6th arg is retval. */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 13de65363ba2..7ce0815793dd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2324,6 +2324,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT && + prog->expected_attach_type != BPF_MODIFY_RETURN && prog->type != BPF_PROG_TYPE_EXT) { err = -EINVAL; goto out_put_prog; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 546198f6f307..221a17af1f81 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -232,7 +232,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } - if (tprogs[BPF_TRAMP_FEXIT].nr_progs) + if (tprogs[BPF_TRAMP_FEXIT].nr_progs || + tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; /* Though the second half of trampoline page is unused a task could be @@ -269,6 +270,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) switch (t) { case BPF_TRACE_FENTRY: return BPF_TRAMP_FENTRY; + case BPF_MODIFY_RETURN: + return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: return BPF_TRAMP_FEXIT; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 289383edfc8c..2460c8e6b5be 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9950,6 +9950,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!prog_extension) return -EINVAL; /* fallthrough */ + case BPF_MODIFY_RETURN: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (!btf_type_is_func(t)) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d6b33ea27bcc..40b2d9476268 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From aca228cd3387447d99d3ebaee3ebcc2b015a3e46 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:51 +0100 Subject: tools/libbpf: Add support for BPF_MODIFY_RETURN Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-6-kpsingh@chromium.org --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f8c4042e5855..223be01dc466 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6288,6 +6288,10 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_FENTRY, .is_attach_btf = true, .attach_fn = attach_trace), + SEC_DEF("fmod_ret/", TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .is_attach_btf = true, + .attach_fn = attach_trace), SEC_DEF("fexit/", TRACING, .expected_attach_type = BPF_TRACE_FEXIT, .is_attach_btf = true, -- cgit v1.2.3 From da00d2f117a08fbca262db5ea422c80a568b112b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:52 +0100 Subject: bpf: Add test ops for BPF_PROG_TYPE_TRACING The current fexit and fentry tests rely on a different program to exercise the functions they attach to. Instead of doing this, implement the test operations for tracing which will also be used for BPF_MODIFY_RETURN in a subsequent patch. Also, clean up the fexit test to use the generated skeleton. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-7-kpsingh@chromium.org --- include/linux/bpf.h | 10 ++++ kernel/trace/bpf_trace.c | 1 + net/bpf/test_run.c | 37 +++++++++--- .../selftests/bpf/prog_tests/fentry_fexit.c | 12 +--- .../testing/selftests/bpf/prog_tests/fentry_test.c | 14 ++--- .../testing/selftests/bpf/prog_tests/fexit_test.c | 69 +++++++--------------- 6 files changed, 67 insertions(+), 76 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f748b31e5888..40c53924571d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1156,6 +1156,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1313,6 +1316,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 07764c761073..363e0a2c75cf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1266,6 +1266,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = { }; const struct bpf_prog_ops tracing_prog_ops = { + .test_run = bpf_prog_test_run_tracing, }; static bool raw_tp_writable_prog_is_valid_access(int off, int size, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1cd7a1c2f8b2..3600f098e7c6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -160,18 +160,37 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, kfree(data); return ERR_PTR(-EFAULT); } - if (bpf_fentry_test1(1) != 2 || - bpf_fentry_test2(2, 3) != 5 || - bpf_fentry_test3(4, 5, 6) != 15 || - bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || - bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) { - kfree(data); - return ERR_PTR(-EFAULT); - } + return data; } +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + int err = -EFAULT; + + switch (prog->expected_attach_type) { + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + if (bpf_fentry_test1(1) != 2 || + bpf_fentry_test2(2, 3) != 5 || + bpf_fentry_test3(4, 5, 6) != 15 || + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + goto out; + break; + default: + goto out; + } + + err = 0; +out: + trace_bpf_test_finish(&err); + return err; +} + static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) { void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 235ac4f67f5b..83493bd5745c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" #include "fexit_test.skel.h" void test_fentry_fexit(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; struct fexit_test *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; - int err, pkt_fd, i; + int err, prog_fd, i; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; @@ -31,8 +26,8 @@ void test_fentry_fexit(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", @@ -49,7 +44,6 @@ void test_fentry_fexit(void) } close_prog: - test_pkt_access__destroy(pkt_skel); fentry_test__destroy(fentry_skel); fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 5cc06021f27d..04ebbf1cb390 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,20 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" void test_fentry_test(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; - int err, pkt_fd, i; + int err, prog_fd, i; __u32 duration = 0, retval; __u64 *result; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto cleanup; @@ -23,10 +18,10 @@ void test_fentry_test(void) if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto cleanup; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fentry_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); @@ -39,5 +34,4 @@ void test_fentry_test(void) cleanup: fentry_test__destroy(fentry_skel); - test_pkt_access__destroy(pkt_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index d2c3655dd7a3..78d7a2765c27 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,64 +1,37 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include +#include "fexit_test.skel.h" void test_fexit_test(void) { - struct bpf_prog_load_attr attr = { - .file = "./fexit_test.o", - }; - - char prog_name[] = "fexit/bpf_fentry_testX"; - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, kfree_skb_fd, i; - struct bpf_link *link[6] = {}; - struct bpf_program *prog[6]; + struct fexit_test *fexit_skel = NULL; + int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - u64 result[6]; + __u64 *result; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) - return; - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) - goto close_prog; + fexit_skel = fexit_test__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; - for (i = 0; i < 6; i++) { - prog_name[sizeof(prog_name) - 2] = '1' + i; - prog[i] = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) - goto close_prog; + err = fexit_test__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - for (i = 0; i < 6; i++) - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", - i + 1, result[i])) - goto close_prog; + result = (__u64 *)fexit_skel->bss; + for (i = 0; i < 6; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_test%d failed err %lld\n", i + 1, result[i])) + goto cleanup; + } -close_prog: - for (i = 0; i < 6; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - bpf_object__close(obj); - bpf_object__close(pkt_obj); +cleanup: + fexit_test__destroy(fexit_skel); } -- cgit v1.2.3 From 3d08b6f29cf33aeaf301553d8d3805f0aa609df7 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:53 +0100 Subject: bpf: Add selftests for BPF_MODIFY_RETURN Test for two scenarios: * When the fmod_ret program returns 0, the original function should be called along with fentry and fexit programs. * When the fmod_ret program returns a non-zero value, the original function should not be called, no side effect should be observed and fentry and fexit programs should be called. The result from the kernel function call and whether a side-effect is observed is returned via the retval attr of the BPF_PROG_TEST_RUN (bpf) syscall. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-8-kpsingh@chromium.org --- net/bpf/test_run.c | 22 +++++++- .../selftests/bpf/prog_tests/modify_return.c | 65 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/modify_return.c | 49 ++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/modify_return.c create mode 100644 tools/testing/selftests/bpf/progs/modify_return.c (limited to 'tools') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 3600f098e7c6..4c921f5154e0 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,6 +10,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -143,6 +144,14 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) return a + (long)b + c + d + (long)e + f; } +int noinline bpf_modify_return_test(int a, int *b) +{ + *b += 1; + return a + *b; +} + +ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); + static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) { @@ -168,7 +177,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - int err = -EFAULT; + u16 side_effect = 0, ret = 0; + int b = 2, err = -EFAULT; + u32 retval = 0; switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: @@ -181,10 +192,19 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) goto out; break; + case BPF_MODIFY_RETURN: + ret = bpf_modify_return_test(1, &b); + if (b != 2) + side_effect = 1; + break; default: goto out; } + retval = ((u32)side_effect << 16) | ret; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + err = 0; out: trace_bpf_test_finish(&err); diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c new file mode 100644 index 000000000000..97fec70c600b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include +#include "modify_return.skel.h" + +#define LOWER(x) ((x) & 0xffff) +#define UPPER(x) ((x) >> 16) + + +static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) +{ + struct modify_return *skel = NULL; + int err, prog_fd; + __u32 duration = 0, retval; + __u16 side_effect; + __s16 ret; + + skel = modify_return__open_and_load(); + if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n")) + goto cleanup; + + err = modify_return__attach(skel); + if (CHECK(err, "modify_return", "attach failed: %d\n", err)) + goto cleanup; + + skel->bss->input_retval = input_retval; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0, + &retval, &duration); + + CHECK(err, "test_run", "err %d errno %d\n", err, errno); + + side_effect = UPPER(retval); + ret = LOWER(retval); + + CHECK(ret != want_ret, "test_run", + "unexpected ret: %d, expected: %d\n", ret, want_ret); + CHECK(side_effect != want_side_effect, "modify_return", + "unexpected side_effect: %d\n", side_effect); + + CHECK(skel->bss->fentry_result != 1, "modify_return", + "fentry failed\n"); + CHECK(skel->bss->fexit_result != 1, "modify_return", + "fexit failed\n"); + CHECK(skel->bss->fmod_ret_result != 1, "modify_return", + "fmod_ret failed\n"); + +cleanup: + modify_return__destroy(skel); +} + +void test_modify_return(void) +{ + run_test(0 /* input_retval */, + 1 /* want_side_effect */, + 4 /* want_ret */); + run_test(-EINVAL /* input_retval */, + 0 /* want_side_effect */, + -EINVAL /* want_ret */); +} + diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c new file mode 100644 index 000000000000..8b7466a15c6b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/modify_return.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +static int sequence = 0; +__s32 input_retval = 0; + +__u64 fentry_result = 0; +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(fentry_test, int a, __u64 b) +{ + sequence++; + fentry_result = (sequence == 1); + return 0; +} + +__u64 fmod_ret_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int a, int *b, int ret) +{ + sequence++; + /* This is the first fmod_ret program, the ret passed should be 0 */ + fmod_ret_result = (sequence == 2 && ret == 0); + return input_retval; +} + +__u64 fexit_result = 0; +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int a, __u64 b, int ret) +{ + sequence++; + /* If the input_reval is non-zero a successful modification should have + * occurred. + */ + if (input_retval) + fexit_result = (sequence == 3 && ret == input_retval); + else + fexit_result = (sequence == 3 && ret == 4); + + return 0; +} -- cgit v1.2.3 From 51891498f2da78ee64dfad88fa53c9e85fb50abf Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 4 Mar 2020 11:05:17 -0700 Subject: seccomp: allow TSYNC and USER_NOTIF together The restriction introduced in 7a0df7fbc145 ("seccomp: Make NEW_LISTENER and TSYNC flags exclusive") is mostly artificial: there is enough information in a seccomp user notification to tell which thread triggered a notification. The reason it was introduced is because TSYNC makes the syscall return a thread-id on failure, and NEW_LISTENER returns an fd, and there's no way to distinguish between these two cases (well, I suppose the caller could check all fds it has, then do the syscall, and if the return value was an fd that already existed, then it must be a thread id, but bleh). Matthew would like to use these two flags together in the Chrome sandbox which wants to use TSYNC for video drivers and NEW_LISTENER to proxy syscalls. So, let's fix this ugliness by adding another flag, TSYNC_ESRCH, which tells the kernel to just return -ESRCH on a TSYNC error. This way, NEW_LISTENER (and any subsequent seccomp() commands that want to return positive values) don't conflict with each other. Suggested-by: Matthew Denton Signed-off-by: Tycho Andersen Link: https://lore.kernel.org/r/20200304180517.23867-1-tycho@tycho.ws Signed-off-by: Kees Cook --- include/linux/seccomp.h | 3 +- include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 14 +++-- tools/testing/selftests/seccomp/seccomp_bpf.c | 74 ++++++++++++++++++++++++++- 4 files changed, 86 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 03583b6d1416..4192369b8418 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -7,7 +7,8 @@ #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ - SECCOMP_FILTER_FLAG_NEW_LISTENER) + SECCOMP_FILTER_FLAG_NEW_LISTENER | \ + SECCOMP_FILTER_FLAG_TSYNC_ESRCH) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index be84d87f1f46..c1735455bc53 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -22,6 +22,7 @@ #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) /* * All BPF programs must return a 32-bit value. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b6ea3dcb57bf..29022c1bbe18 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -528,8 +528,12 @@ static long seccomp_attach_filter(unsigned int flags, int ret; ret = seccomp_can_sync_threads(); - if (ret) - return ret; + if (ret) { + if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) + return -ESRCH; + else + return ret; + } } /* Set log flag, if present. */ @@ -1288,10 +1292,12 @@ static long seccomp_set_mode_filter(unsigned int flags, * In the successful case, NEW_LISTENER returns the new listener fd. * But in the failure case, TSYNC returns the thread that died. If you * combine these two flags, there's no way to tell whether something - * succeeded or failed. So, let's disallow this combination. + * succeeded or failed. So, let's disallow this combination if the user + * has not explicitly requested no errors from TSYNC. */ if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && - (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER)) + (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) && + ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0)) return -EINVAL; /* Prepare the new filter before holding any locks. */ diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index ee1b727ede04..a9ad3bd8b2ad 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -212,6 +212,10 @@ struct seccomp_notif_sizes { #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 #endif +#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -2187,7 +2191,8 @@ TEST(detect_seccomp_filter_flags) unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, SECCOMP_FILTER_FLAG_LOG, SECCOMP_FILTER_FLAG_SPEC_ALLOW, - SECCOMP_FILTER_FLAG_NEW_LISTENER }; + SECCOMP_FILTER_FLAG_NEW_LISTENER, + SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; unsigned int exclusive[] = { SECCOMP_FILTER_FLAG_TSYNC, SECCOMP_FILTER_FLAG_NEW_LISTENER }; @@ -2645,6 +2650,55 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); } +TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) +{ + long ret, flags; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + flags = SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_TSYNC_ESRCH; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); + ASSERT_EQ(ESRCH, errno) { + TH_LOG("Did not return ESRCH for diverged sibling."); + } + ASSERT_EQ(-1, ret) { + TH_LOG("Did not fail on diverged sibling."); + } + + /* Wake the threads */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both unkilled. */ + PTHREAD_JOIN(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + PTHREAD_JOIN(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + TEST_F(TSYNC, two_siblings_not_under_filter) { long ret, sib; @@ -3196,6 +3250,24 @@ TEST(user_notification_basic) EXPECT_EQ(0, WEXITSTATUS(status)); } +TEST(user_notification_with_tsync) +{ + int ret; + unsigned int flags; + + /* these were exclusive */ + flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_TSYNC; + ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags)); + ASSERT_EQ(EINVAL, errno); + + /* but now they're not */ + flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; + ret = user_trap_syscall(__NR_getppid, flags); + close(ret); + ASSERT_LE(0, ret); +} + TEST(user_notification_kill_in_middle) { pid_t pid; -- cgit v1.2.3 From 6339998d22ecae5d6435dd87b4904ff6e16bfe56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Mar 2020 10:50:58 -0300 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h To get the changes in: bbfd5e4fab63 ("perf/core: Add new branch sample type for HW index of raw branch records") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h This update is a prerequisite to adding support for the HW index of raw branch records. Acked-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra (Intel) Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200304134902.GB12612@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 377d794d3105..397cfd65b3fe 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -208,6 +210,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -853,7 +857,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 nr; - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX + * { u64 from, to, flags } lbr[nr]; + * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- cgit v1.2.3 From c4ef2f3256e3bc008f98121cad39ee5467db07a6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 4 Mar 2020 11:11:05 -0800 Subject: selftests/bpf: Add send_signal_sched_switch test Added one test, send_signal_sched_switch, to test bpf_send_signal() helper triggered by sched/sched_switch tracepoint. This test can be used to verify kernel deadlocks fixed by the previous commit. The test itself is heavily borrowed from Commit eac9153f2b58 ("bpf/stackmap: Fix deadlock with rq_lock in bpf_get_stack()"). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: Song Liu Link: https://lore.kernel.org/bpf/20200304191105.2796601-1-yhs@fb.com --- .../bpf/prog_tests/send_signal_sched_switch.c | 60 ++++++++++++++++++++++ .../selftests/bpf/progs/test_send_signal_kern.c | 6 +++ 2 files changed, 66 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c new file mode 100644 index 000000000000..189a34a7addb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_send_signal_kern.skel.h" + +static void sigusr1_handler(int signum) +{ +} + +#define THREAD_COUNT 100 + +static void *worker(void *p) +{ + int i; + + for ( i = 0; i < 1000; i++) + usleep(1); + + return NULL; +} + +void test_send_signal_sched_switch(void) +{ + struct test_send_signal_kern *skel; + pthread_t threads[THREAD_COUNT]; + u32 duration = 0; + int i, err; + + signal(SIGUSR1, sigusr1_handler); + + skel = test_send_signal_kern__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n")) + return; + + skel->bss->pid = getpid(); + skel->bss->sig = SIGUSR1; + + err = test_send_signal_kern__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) + goto destroy_skel; + + for (i = 0; i < THREAD_COUNT; i++) { + err = pthread_create(threads + i, NULL, worker, NULL); + if (CHECK(err, "pthread_create", "Error creating thread, %s\n", + strerror(errno))) + goto destroy_skel; + } + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(threads[i], NULL); + +destroy_skel: + test_send_signal_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 1acc91e87bfc..b4233d3efac2 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -31,6 +31,12 @@ int send_signal_tp(void *ctx) return bpf_send_signal_test(ctx); } +SEC("tracepoint/sched/sched_switch") +int send_signal_tp_sched(void *ctx) +{ + return bpf_send_signal_test(ctx); +} + SEC("perf_event") int send_signal_perf(void *ctx) { -- cgit v1.2.3 From b9b72999eb86e650489a4321c8e95a4e2e3df20c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Mar 2020 09:16:44 +0200 Subject: selftests: forwarding: ETS: Use Qdisc counters Currently the SW-datapath ETS selftests use "ip link" stats to obtain the number of packets that went through a given band. mlxsw then uses ethtool per-priority counters. Instead, change both to use qdiscs. In SW datapath this is the obvious choice, and now that mlxsw offloads FIFO, this should work on the offloaded datapath as well. This has the effect of verifying that the FIFO offload works. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh | 14 +++++++++++--- tools/testing/selftests/net/forwarding/lib.sh | 10 ++++++++++ tools/testing/selftests/net/forwarding/sch_ets.sh | 9 ++++++--- tools/testing/selftests/net/forwarding/sch_ets_tests.sh | 10 +++------- 4 files changed, 30 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index c9fc4d4885c1..94c37124a840 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -56,11 +56,19 @@ switch_destroy() } # Callback from sch_ets_tests.sh -get_stats() +collect_stats() { - local band=$1; shift + local -a streams=("$@") + local stream - ethtool_stats_get "$h2" rx_octets_prio_$band + # Wait for qdisc counter update so that we don't get it mid-way through. + busywait_for_counter 1000 +1 \ + qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \ + > /dev/null + + for stream in ${streams[@]}; do + qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes + done } bail_on_lldpad diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7ecce65d08f9..a4a7879b3bb9 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -655,6 +655,16 @@ qdisc_stats_get() | jq '.[] | select(.handle == "'"$handle"'") | '"$selector" } +qdisc_parent_stats_get() +{ + local dev=$1; shift + local parent=$1; shift + local selector=$1; shift + + tc -j -s qdisc show dev "$dev" invisible \ + | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" +} + humanize() { local speed=$1; shift diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 40e0ad1bc4f2..e60c8b4818cc 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -34,11 +34,14 @@ switch_destroy() } # Callback from sch_ets_tests.sh -get_stats() +collect_stats() { - local stream=$1; shift + local -a streams=("$@") + local stream - link_stats_get $h2.1$stream rx bytes + for stream in ${streams[@]}; do + qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes + done } ets_run diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 3c3b204d47e8..cdf689e99458 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -2,7 +2,7 @@ # Global interface: # $put -- port under test (e.g. $swp2) -# get_stats($band) -- A function to collect stats for band +# collect_stats($streams...) -- A function to get stats for individual streams # ets_start_traffic($band) -- Start traffic for this band # ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc @@ -94,15 +94,11 @@ __ets_dwrr_test() sleep 10 - t0=($(for stream in ${streams[@]}; do - get_stats $stream - done)) + t0=($(collect_stats "${streams[@]}")) sleep 10 - t1=($(for stream in ${streams[@]}; do - get_stats $stream - done)) + t1=($(collect_stats "${streams[@]}")) d=($(for ((i = 0; i < ${#streams[@]}; i++)); do echo $((${t1[$i]} - ${t0[$i]})) done)) -- cgit v1.2.3 From 52ef8108396f277083ce384f72a49b073225d959 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 4 Mar 2020 07:55:46 -0500 Subject: tc-testing: list kernel options for basic filter with canid ematch. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/config | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 477bc61b374a..c812faa29f36 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -31,6 +31,7 @@ CONFIG_NET_EMATCH_U32=m CONFIG_NET_EMATCH_META=m CONFIG_NET_EMATCH_TEXT=m CONFIG_NET_EMATCH_IPSET=m +CONFIG_NET_EMATCH_CANID=m CONFIG_NET_EMATCH_IPT=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m @@ -57,3 +58,8 @@ CONFIG_NET_IFE_SKBMARK=m CONFIG_NET_IFE_SKBPRIO=m CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y + +# +## Network testing +# +CONFIG_CAN=m -- cgit v1.2.3 From 9dd620afd1544a0d78233a80c6c7a48dbbff3ba7 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 4 Mar 2020 07:55:47 -0500 Subject: tc-testing: updated tdc tests for basic filter with canid extended match rules Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/basic.json | 220 +++++++++++++++++++++ 1 file changed, 220 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index afb9187b46a7..e788c114a484 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -1054,5 +1054,225 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "b2b6", + "name": "Add basic filter with canid ematch and single SFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "f67f", + "name": "Add basic filter with canid ematch and single SFF with mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaabb:0x00ff)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x2BB:0xFF\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "bd5c", + "name": "Add basic filter with canid ematch and multiple SFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1 sff 2 sff 3)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "83c7", + "name": "Add basic filter with canid ematch and multiple SFF with masks", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "a8f5", + "name": "Add basic filter with canid ematch and single EFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "98ae", + "name": "Add basic filter with canid ematch and single EFF with mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaabb:0xf1f1)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAABB:0xF1F1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "6056", + "name": "Add basic filter with canid ematch and multiple EFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1 eff 2 eff 3)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "d188", + "name": "Add basic filter with canid ematch and multiple EFF with masks", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "25d1", + "name": "Add basic filter with canid ematch and a combination of SFF/EFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01 eff 0x02)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2 sff 0x1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "b438", + "name": "Add basic filter with canid ematch and a combination of SFF/EFF with masks", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01:0xf eff 0x02:0xf)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] -- cgit v1.2.3 From 93e5fbb18cec70b3b5c614f67b65388829113bdd Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Thu, 5 Mar 2020 15:44:13 -0800 Subject: selftests: bpf: Add test for JMP32 JSET BPF_X with upper bits set The existing tests attempt to check that JMP32 JSET ignores the upper bits in the operand registers. However, the tests missed one such bug in the x32 JIT that is only uncovered when a previous instruction pollutes the upper 32 bits of the registers. This patch adds a new test case that catches the bug by first executing a 64-bit JSET to pollute the upper 32-bits of the temporary registers, followed by a 32-bit JSET which should ignore the upper 32 bits. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200305234416.31597-2-luke.r.nels@gmail.com --- tools/testing/selftests/bpf/verifier/jmp32.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index bf0322eb5346..bd5cae4a7f73 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -61,6 +61,21 @@ }, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "jset32: ignores upper bits", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_7, 0x8000000000000000), + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, { "jset32: min/max deduction", .insns = { -- cgit v1.2.3 From be40920fbf1003c38ccdc02b571e01a75d890c82 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 7 Mar 2020 03:32:58 +0900 Subject: tools: Let O= makes handle a relative path with -C option When I tried to compile tools/perf from the top directory with the -C option, the O= option didn't work correctly if I passed a relative path: $ make O=BUILD -C tools/perf/ make: Entering directory '/home/mhiramat/ksrc/linux/tools/perf' BUILD: Doing 'make -j8' parallel build ../scripts/Makefile.include:4: *** O=/home/mhiramat/ksrc/linux/tools/perf/BUILD does not exist. Stop. make: *** [Makefile:70: all] Error 2 make: Leaving directory '/home/mhiramat/ksrc/linux/tools/perf' The O= directory existence check failed because the check script ran in the build target directory instead of the directory where I ran the make command. To fix that, once change directory to $(PWD) and check O= directory, since the PWD is set to where the make command runs. Fixes: c883122acc0d ("perf tools: Let O= makes handle relative paths") Reported-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Cc: Andrew Morton Cc: Borislav Petkov Cc: Geert Uytterhoeven Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Michal Marek Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/158351957799.3363.15269768530697526765.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- tools/scripts/Makefile.include | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7902a5681fc8..b8fc7d972be9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -35,7 +35,7 @@ endif # Only pass canonical directory names as the output directory: # ifneq ($(O),) - FULL_O := $(shell readlink -f $(O) || echo $(O)) + FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O)) endif # diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index ded7a950dc40..6d2f3a1b2249 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 ifneq ($(O),) ifeq ($(origin O), command line) - dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) - ABSOLUTE_O := $(shell cd $(O) ; pwd) + dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) -- cgit v1.2.3 From db2c549407d4a76563c579e4768f7d6d32afefba Mon Sep 17 00:00:00 2001 From: disconnect3d Date: Mon, 9 Mar 2020 11:48:53 +0100 Subject: perf map: Fix off by one in strncpy() size argument This patch fixes an off-by-one error in strncpy size argument in tools/perf/util/map.c. The issue is that in: strncmp(filename, "/system/lib/", 11) the passed string literal: "/system/lib/" has 12 bytes (without the NULL byte) and the passed size argument is 11. As a result, the logic won't match the ending "/" byte and will pass filepaths that are stored in other directories e.g. "/system/libmalicious/bin" or just "/system/libmalicious". This functionality seems to be present only on Android. I assume the /system/ directory is only writable by the root user, so I don't think this bug has much (or any) security impact. Fixes: eca818369996 ("perf tools: Add automatic remapping of Android libraries") Signed-off-by: disconnect3d Cc: Alexander Shishkin Cc: Changbin Du Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Michael Lentine Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200309104855.3775-1-dominik.b.czarnota@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 95428511300d..b342f744b1fc 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (!strncmp(filename, "/system/lib/", 12)) { char *ndk, *app; const char *arch; size_t ndk_length; -- cgit v1.2.3 From a7ffd416d80497f03d1f62c0b330cff76a86d5ad Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Mon, 9 Mar 2020 10:56:17 +0200 Subject: perf python: Fix clang detection when using CC=clang-version Currently, the setup.py script detects the clang compiler only when invoked with CC=clang. But when using a specific version (e.g. CC=clang-11), this doesn't work correctly and wrong compiler flags are set, leading to build errors. To properly detect clang, invoke the compiler with -v and check the output. The first line should start with "clang version ...". Committer testing: $ make CC=clang-9 O=/tmp/build/perf -C tools/perf install-bin $ readelf -wi /tmp/build/perf/python/perf.cpython-37m-x86_64-linux-gnu.so | grep DW_AT_producer | head -1 DW_AT_producer : (indirect string, offset: 0x0): clang version 9.0.1 (Fedora 9.0.1-2.fc31) /usr/bin/clang-9 -Wno-unused-result -Wsign-compare -D DYNAMIC_ANNOTATIONS_ENABLED=1 -D NDEBUG -O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-command-line -m64 -mtune=generic -fasynchronous-unwind-tables -fcf-protection=full -D _GNU_SOURCE -fPIC -fwrapv -Wbad-function-cast -Wdeclaration-after-statement -Wformat-security -Wformat-y2k -Winit-self -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-system-headers -Wold-style-definition -Wpacked -Wredundant-decls -Wstrict-prototypes -Wswitch-default -Wswitch-enum -Wundef -Wwrite-strings -Wformat -Wshadow -D HAVE_ARCH_X86_64_SUPPORT -I /tmp/build/perf/arch/x86/include/generated -D HAVE_SYSCALL_TABLE_SUPPORT -D HAVE_PERF_REGS_SUPPORT -D HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -Werror -O3 -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 -fstack-protector-all -D _FORTIFY_SOURCE=2 -D _LARGEFILE64_SOURCE -D _FILE_OFFSET_BITS=64 -D _GNU_SOURCE -I /home/acme/git/perf/tools/lib/perf/include -I /home/acme/git/perf/tools/perf/util/include -I /home/acme/git/perf/tools/perf/arch/x86/include -I /home/acme/git/perf/tools/include/ -I /home/acme/git/perf/tools/arch/x86/include/uapi -I /home/acme/git/perf/tools/include/uapi -I /home/acme/git/perf/tools/arch/x86/include/ -I /home/acme/git/perf/tools/arch/x86/ -I /tmp/build/perf//util -I /tmp/build/perf/ -I /home/acme/git/perf/tools/perf/util -I /home/acme/git/perf/tools/perf -I /home/acme/git/perf/tools/lib/ -D HAVE_PTHREAD_ATTR_SETAFFINITY_NP -D HAVE_PTHREAD_BARRIER -D HAVE_EVENTFD -D HAVE_GET_CURRENT_DIR_NAME -D HAVE_GETTID -D HAVE_DWARF_GETLOCATIONS_SUPPORT -D HAVE_GLIBC_SUPPORT -D HAVE_AIO_SUPPORT -D HAVE_SCHED_GETCPU_SUPPORT -D HAVE_SETNS_SUPPORT -D HAVE_LIBELF_SUPPORT -D HAVE_LIBELF_MMAP_SUPPORT -D HAVE_ELF_GETPHDRNUM_SUPPORT -D HAVE_GELF_GETNOTE_SUPPORT -D HAVE_ELF_GETSHDRSTRNDX_SUPPORT -D HAVE_DWARF_SUPPORT -D HAVE_LIBBPF_SUPPORT -D HAVE_BPF_PROLOGUE -D HAVE_SDT_EVENT -D HAVE_JITDUMP -D HAVE_DWARF_UNWIND_SUPPORT -D NO_LIBUNWIND_DEBUG_FRAME -D HAVE_LIBUNWIND_SUPPORT -D HAVE_LIBCRYPTO_SUPPORT -D HAVE_SLANG_SUPPORT -D HAVE_GTK2_SUPPORT -D NO_LIBPERL -D HAVE_TIMERFD_SUPPORT -D HAVE_LIBPYTHON_SUPPORT -D HAVE_CPLUS_DEMANGLE_SUPPORT -D HAVE_LIBBFD_SUPPORT -D HAVE_ZLIB_SUPPORT -D HAVE_LZMA_SUPPORT -D HAVE_ZSTD_SUPPORT -D HAVE_LIBCAP_SUPPORT -D HAVE_BACKTRACE_SUPPORT -D HAVE_LIBNUMA_SUPPORT -D HAVE_KVM_STAT_SUPPORT -D DISASM_FOUR_ARGS_SIGNATURE -D HAVE_LIBBABELTRACE_SUPPORT -D HAVE_AUXTRACE_SUPPORT -D HAVE_JVMTI_CMLR -I /tmp/build/perf/ -fPIC -I util/include -I /usr/include/python3.7m -c /home/acme/git/perf/tools/perf/util/python.c -o /tmp/build/perf/python_ext_build/tmp/home/acme/git/perf/tools/perf/util/python.o -Wbad-function-cast -Wdeclaration-after-statement -Wformat-security -Wformat-y2k -Winit-self -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-system-headers -Wold-style-definition -Wpacked -Wredundant-decls -Wstrict-prototypes -Wswitch-default -Wswitch-enum -Wundef -Wwrite-strings -Wformat -Wshadow -D HAVE_ARCH_X86_64_SUPPORT -I /tmp/build/perf/arch/x86/include/generated -D HAVE_SYSCALL_TABLE_SUPPORT -D HAVE_PERF_REGS_SUPPORT -D HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -Werror -O3 -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 -fstack-protector-all -D _FORTIFY_SOURCE=2 -D _LARGEFILE64_SOURCE -D _FILE_OFFSET_BITS=64 -D _GNU_SOURCE -I /home/acme/git/perf/tools/lib/perf/include -I /home/acme/git/perf/tools/perf/util/include -I /home/acme/git/perf/tools/perf/arch/x86/include -I /home/acme/git/perf/tools/include/ -I /home/acme/git/perf/tools/arch/x86/include/uapi -I /home/acme/git/perf/tools/include/uapi -I /home/acme/git/perf/tools/arch/x86/include/ -I /home/acme/git/perf/tools/arch/x86/ -I /tmp/build/perf//util -I /tmp/build/perf/ -I /home/acme/git/perf/tools/perf/util -I /home/acme/git/perf/tools/perf -I /home/acme/git/perf/tools/lib/ -D HAVE_PTHREAD_ATTR_SETAFFINITY_NP -D HAVE_PTHREAD_BARRIER -D HAVE_EVENTFD -D HAVE_GET_CURRENT_DIR_NAME -D HAVE_GETTID -D HAVE_DWARF_GETLOCATIONS_SUPPORT -D HAVE_GLIBC_SUPPORT -D HAVE_AIO_SUPPORT -D HAVE_SCHED_GETCPU_SUPPORT -D HAVE_SETNS_SUPPORT -D HAVE_LIBELF_SUPPORT -D HAVE_LIBELF_MMAP_SUPPORT -D HAVE_ELF_GETPHDRNUM_SUPPORT -D HAVE_GELF_GETNOTE_SUPPORT -D HAVE_ELF_GETSHDRSTRNDX_SUPPORT -D HAVE_DWARF_SUPPORT -D HAVE_LIBBPF_SUPPORT -D HAVE_BPF_PROLOGUE -D HAVE_SDT_EVENT -D HAVE_JITDUMP -D HAVE_DWARF_UNWIND_SUPPORT -D NO_LIBUNWIND_DEBUG_FRAME -D HAVE_LIBUNWIND_SUPPORT -D HAVE_LIBCRYPTO_SUPPORT -D HAVE_SLANG_SUPPORT -D HAVE_GTK2_SUPPORT -D NO_LIBPERL -D HAVE_TIMERFD_SUPPORT -D HAVE_LIBPYTHON_SUPPORT -D HAVE_CPLUS_DEMANGLE_SUPPORT -D HAVE_LIBBFD_SUPPORT -D HAVE_ZLIB_SUPPORT -D HAVE_LZMA_SUPPORT -D HAVE_ZSTD_SUPPORT -D HAVE_LIBCAP_SUPPORT -D HAVE_BACKTRACE_SUPPORT -D HAVE_LIBNUMA_SUPPORT -D HAVE_KVM_STAT_SUPPORT -D DISASM_FOUR_ARGS_SIGNATURE -D HAVE_LIBBABELTRACE_SUPPORT -D HAVE_AUXTRACE_SUPPORT -D HAVE_JVMTI_CMLR -I /tmp/build/perf/ -fno-strict-aliasing -Wno-write-strings -Wno-unused-parameter -Wno-redundant-decls $ And here is how tools/perf/util/setup.py checks if the used clang has options that the distro specific python extension building compiler defaults: if cc_is_clang: from distutils.sysconfig import get_config_vars vars = get_config_vars() for var in ('CFLAGS', 'OPT'): vars[var] = sub("-specs=[^ ]+", "", vars[var]) if not clang_has_option("-mcet"): vars[var] = sub("-mcet", "", vars[var]) if not clang_has_option("-fcf-protection"): vars[var] = sub("-fcf-protection", "", vars[var]) if not clang_has_option("-fstack-clash-protection"): vars[var] = sub("-fstack-clash-protection", "", vars[var]) if not clang_has_option("-fstack-protector-strong"): vars[var] = sub("-fstack-protector-strong", "", vars[var]) So "-fcf-protection=full" is used, clang-9 has this option and thus it was kept, the perf python extension was built with it and the build completed successfully. Link: https://github.com/ClangBuiltLinux/linux/issues/903 Signed-off-by: Ilie Halip Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Igor Lubashev Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200309085618.14307-1-ilie.halip@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index aa344a163eaf..8a065a6f9713 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -2,11 +2,13 @@ from os import getenv from subprocess import Popen, PIPE from re import sub +cc = getenv("CC") +cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() + def clang_has_option(option): - return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] -cc = getenv("CC") -if cc == "clang": +if cc_is_clang: from distutils.sysconfig import get_config_vars vars = get_config_vars() for var in ('CFLAGS', 'OPT'): @@ -40,7 +42,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] -if cc != "clang": +if not cc_is_clang: cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' -- cgit v1.2.3 From 05e54e2386733dfdb62b6784b3d6e1b0bd9bb559 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Mar 2020 23:31:21 -0800 Subject: perf parse-events: Fix reading of invalid memory in event parsing ADD_CONFIG_TERM accesses term->weak, however, in get_config_chgs this value is accessed outside of the list_for_each_entry and references invalid memory. Add an argument for ADD_CONFIG_TERM for weak and set it to false in the get_config_chgs case. This bug was cause by clang's address sanitizer and libfuzzer. It can be reproduced with a command line of: perf stat -a -e i/bs,tsc,L2/o Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200307073121.203816-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 46 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a14995835d85..a7dc0b096974 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1213,7 +1213,7 @@ static int config_attr(struct perf_event_attr *attr, static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { -#define ADD_CONFIG_TERM(__type) \ +#define ADD_CONFIG_TERM(__type, __weak) \ struct perf_evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ @@ -1222,18 +1222,18 @@ static int get_config_terms(struct list_head *head_config, \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ - __t->weak = term->weak; \ + __t->weak = __weak; \ list_add_tail(&__t->list, head_terms) -#define ADD_CONFIG_TERM_VAL(__type, __name, __val) \ +#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \ do { \ - ADD_CONFIG_TERM(__type); \ + ADD_CONFIG_TERM(__type, __weak); \ __t->val.__name = __val; \ } while (0) -#define ADD_CONFIG_TERM_STR(__type, __val) \ +#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \ do { \ - ADD_CONFIG_TERM(__type); \ + ADD_CONFIG_TERM(__type, __weak); \ __t->val.str = strdup(__val); \ if (!__t->val.str) { \ zfree(&__t); \ @@ -1247,62 +1247,62 @@ do { \ list_for_each_entry(term, head_config, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: - ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num); + ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: - ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num); + ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_TIME: - ADD_CONFIG_TERM_VAL(TIME, time, term->val.num); + ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: - ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str); + ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: - ADD_CONFIG_TERM_STR(BRANCH, term->val.str); + ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: ADD_CONFIG_TERM_VAL(STACK_USER, stack_user, - term->val.num); + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_INHERIT: ADD_CONFIG_TERM_VAL(INHERIT, inherit, - term->val.num ? 1 : 0); + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_NOINHERIT: ADD_CONFIG_TERM_VAL(INHERIT, inherit, - term->val.num ? 0 : 1); + term->val.num ? 0 : 1, term->weak); break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack, - term->val.num); + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events, - term->val.num); + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, - term->val.num ? 1 : 0); + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, - term->val.num ? 0 : 1); + term->val.num ? 0 : 1, term->weak); break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: - ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str); + ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_PERCORE: ADD_CONFIG_TERM_VAL(PERCORE, percore, - term->val.num ? true : false); + term->val.num ? true : false, term->weak); break; case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, - term->val.num ? 1 : 0); + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, - term->val.num); + term->val.num, term->weak); break; default: break; @@ -1339,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, } if (bits) - ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits); + ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false); #undef ADD_CONFIG_TERM return 0; -- cgit v1.2.3 From 6b8d68f1ce9266b05a55e93c62923ff51daae4c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Feb 2020 16:57:42 +0900 Subject: perf probe: Fix to delete multiple probe event When we put an event with multiple probes, perf-probe fails to delete with filters. This comes from a failure to list up the event name because of overwrapping its name. To fix this issue, skip to list up the event which has same name. Without this patch: # perf probe -l \* probe_perf:map__map_ip (on perf_sample__fprintf_brstackoff:21@ probe_perf:map__map_ip (on perf_sample__fprintf_brstackoff:25@ probe_perf:map__map_ip (on append_inlines:12@util/machine.c in probe_perf:map__map_ip (on unwind_entry:19@util/machine.c in / probe_perf:map__map_ip (on map__map_ip@util/map.h in /home/mhi probe_perf:map__map_ip (on map__map_ip@util/map.h in /home/mhi # perf probe -d \* "*" does not hit any event. Error: Failed to delete events. Reason: No such file or directory (Code: -2) With it: # perf probe -d \* Removed event: probe_perf:map__map_ip # Fixes: 72363540c009 ("perf probe: Support multiprobe event") Reported-by: Arnaldo Carvalho de Melo Reported-by: He Zhe Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/158287666197.16697.7514373548551863562.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 0f5fda11675f..8c852948513e 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group) } else ret = strlist__add(sl, tev.event); clear_probe_trace_event(&tev); + /* Skip if there is same name multi-probe event in the list */ + if (ret == -EEXIST) + ret = 0; if (ret < 0) break; } -- cgit v1.2.3 From 1efde2754275dbd9d11c6e0132a4f09facf297ab Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Feb 2020 00:42:01 +0900 Subject: perf probe: Do not depend on dwfl_module_addrsym() Do not depend on dwfl_module_addrsym() because it can fail on user-space shared libraries. Actually, same bug was fixed by commit 664fee3dc379 ("perf probe: Do not use dwfl_module_addrsym if dwarf_diename finds symbol name"), but commit 07d369857808 ("perf probe: Fix wrong address verification) reverted to get actual symbol address from symtab. This fixes it again by getting symbol address from DIE, and only if the DIE has only address range, it uses dwfl_module_addrsym(). Fixes: 07d369857808 ("perf probe: Fix wrong address verification) Reported-by: Alexandre Ghiti Signed-off-by: Masami Hiramatsu Tested-by: Alexandre Ghiti Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sasha Levin Link: http://lore.kernel.org/lkml/158281812176.476.14164573830975116234.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1c817add6ca4..e4cff49384f4 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -637,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, return -EINVAL; } - /* Try to get actual symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + if (dwarf_entrypc(sp_die, &eaddr) == 0) { + /* If the DIE has entrypc, use it. */ + symbol = dwarf_diename(sp_die); + } else { + /* Try to get actual symbol name and address from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + eaddr = sym.st_value; + } if (!symbol) { pr_warning("Failed to find symbol at 0x%lx\n", (unsigned long)paddr); return -ENOENT; } - eaddr = sym.st_value; tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; -- cgit v1.2.3 From b05fbb9f03f15134735f9d2dcc7d067092ec9dd2 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:40 +0000 Subject: selftests: bpf: Don't listen() on UDP sockets Most tests for TCP sockmap can be adapted to UDP sockmap if the listen call is skipped. Rename listen_loopback, etc. to socket_loopback and skip listen() for SOCK_DGRAM. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-10-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 47 ++++++++++++---------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index b1b2acea0638..4ba41dd26d6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -230,7 +230,7 @@ static int enable_reuseport(int s, int progfd) return 0; } -static int listen_loopback_reuseport(int family, int sotype, int progfd) +static int socket_loopback_reuseport(int family, int sotype, int progfd) { struct sockaddr_storage addr; socklen_t len; @@ -249,6 +249,9 @@ static int listen_loopback_reuseport(int family, int sotype, int progfd) if (err) goto close; + if (sotype == SOCK_DGRAM) + return s; + err = xlisten(s, SOMAXCONN); if (err) goto close; @@ -259,9 +262,9 @@ close: return -1; } -static int listen_loopback(int family, int sotype) +static int socket_loopback(int family, int sotype) { - return listen_loopback_reuseport(family, sotype, -1); + return socket_loopback_reuseport(family, sotype, -1); } static void test_insert_invalid(int family, int sotype, int mapfd) @@ -333,7 +336,7 @@ static void test_insert_listening(int family, int sotype, int mapfd) u32 key; int s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -349,7 +352,7 @@ static void test_delete_after_insert(int family, int sotype, int mapfd) u32 key; int s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -366,7 +369,7 @@ static void test_delete_after_close(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -390,7 +393,7 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd) u32 key; int s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -417,7 +420,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -439,7 +442,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd) u32 key, value32; int err, s; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -470,11 +473,11 @@ static void test_update_listening(int family, int sotype, int mapfd) u64 value; u32 key; - s1 = listen_loopback(family, sotype); + s1 = socket_loopback(family, sotype); if (s1 < 0) return; - s2 = listen_loopback(family, sotype); + s2 = socket_loopback(family, sotype); if (s2 < 0) goto close_s1; @@ -500,7 +503,7 @@ static void test_destroy_orphan_child(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -534,7 +537,7 @@ static void test_clone_after_delete(int family, int sotype, int mapfd) u64 value; u32 key; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s < 0) return; @@ -570,7 +573,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd) socklen_t len; u64 value; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s == -1) return; @@ -624,7 +627,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd) socklen_t len; u64 value; - s = listen_loopback(family, sotype); + s = socket_loopback(family, sotype); if (s == -1) return; @@ -735,7 +738,7 @@ static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) int err, s; u64 value; - s = listen_loopback(family, sotype | SOCK_NONBLOCK); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s < 0) return; @@ -877,7 +880,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, zero_verdict_count(verd_mapfd); - s = listen_loopback(family, sotype | SOCK_NONBLOCK); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s < 0) return; @@ -1009,7 +1012,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, zero_verdict_count(verd_mapfd); - s = listen_loopback(family, sotype | SOCK_NONBLOCK); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s < 0) return; @@ -1120,7 +1123,7 @@ static void test_reuseport_select_listening(int family, int sotype, zero_verdict_count(verd_map); - s = listen_loopback_reuseport(family, sotype, reuseport_prog); + s = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s < 0) return; @@ -1174,7 +1177,7 @@ static void test_reuseport_select_connected(int family, int sotype, zero_verdict_count(verd_map); - s = listen_loopback_reuseport(family, sotype, reuseport_prog); + s = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s < 0) return; @@ -1249,11 +1252,11 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, zero_verdict_count(verd_map); /* Create two listeners, each in its own reuseport group */ - s1 = listen_loopback_reuseport(family, sotype, reuseport_prog); + s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s1 < 0) return; - s2 = listen_loopback_reuseport(family, sotype, reuseport_prog); + s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); if (s2 < 0) goto close_srv1; -- cgit v1.2.3 From 84be2113e6a7f781bd37c0fd0159899150fdcdfb Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:41 +0000 Subject: selftests: bpf: Add tests for UDP sockets in sockmap Expand the TCP sockmap test suite to also check UDP sockets. Signed-off-by: Jakub Sitnicki Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-11-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 157 +++++++++++++++++---- 1 file changed, 127 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 4ba41dd26d6b..52aa468bdccd 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -108,6 +108,22 @@ __ret; \ }) +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + #define xsocket(family, sotype, flags) \ ({ \ int __ret = socket(family, sotype, flags); \ @@ -330,7 +346,7 @@ close: xclose(s); } -static void test_insert_listening(int family, int sotype, int mapfd) +static void test_insert(int family, int sotype, int mapfd) { u64 value; u32 key; @@ -467,7 +483,7 @@ close: xclose(s); } -static void test_update_listening(int family, int sotype, int mapfd) +static void test_update_existing(int family, int sotype, int mapfd) { int s1, s2; u64 value; @@ -1116,7 +1132,7 @@ static void test_reuseport_select_listening(int family, int sotype, { struct sockaddr_storage addr; unsigned int pass; - int s, c, p, err; + int s, c, err; socklen_t len; u64 value; u32 key; @@ -1145,19 +1161,33 @@ static void test_reuseport_select_listening(int family, int sotype, if (err) goto close_cli; - p = xaccept(s, NULL, NULL); - if (p < 0) - goto close_cli; + if (sotype == SOCK_STREAM) { + int p; + + p = xaccept(s, NULL, NULL); + if (p < 0) + goto close_cli; + xclose(p); + } else { + char b = 'a'; + ssize_t n; + + n = xsend(c, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + + n = xrecv(s, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + } key = SK_PASS; err = xbpf_map_lookup_elem(verd_map, &key, &pass); if (err) - goto close_peer; + goto close_cli; if (pass != 1) FAIL("want pass count 1, have %d", pass); -close_peer: - xclose(p); close_cli: xclose(c); close_srv: @@ -1201,9 +1231,24 @@ static void test_reuseport_select_connected(int family, int sotype, if (err) goto close_cli0; - p0 = xaccept(s, NULL, NULL); - if (err) - goto close_cli0; + if (sotype == SOCK_STREAM) { + p0 = xaccept(s, NULL, NULL); + if (p0 < 0) + goto close_cli0; + } else { + p0 = xsocket(family, sotype, 0); + if (p0 < 0) + goto close_cli0; + + len = sizeof(addr); + err = xgetsockname(c0, sockaddr(&addr), &len); + if (err) + goto close_cli0; + + err = xconnect(p0, sockaddr(&addr), len); + if (err) + goto close_cli0; + } /* Update sock_map[0] to redirect to a connected socket */ key = 0; @@ -1216,8 +1261,24 @@ static void test_reuseport_select_connected(int family, int sotype, if (c1 < 0) goto close_peer0; + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + errno = 0; err = connect(c1, sockaddr(&addr), len); + if (sotype == SOCK_DGRAM) { + char b = 'a'; + ssize_t n; + + n = xsend(c1, &b, sizeof(b), 0); + if (n == -1) + goto close_cli1; + + n = recv(c1, &b, sizeof(b), 0); + err = n == -1; + } if (!err || errno != ECONNREFUSED) FAIL_ERRNO("connect: expected ECONNREFUSED"); @@ -1281,7 +1342,18 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, goto close_srv2; err = connect(c, sockaddr(&addr), len); - if (err && errno != ECONNREFUSED) { + if (sotype == SOCK_DGRAM) { + char b = 'a'; + ssize_t n; + + n = xsend(c, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + + n = recv(c, &b, sizeof(b), 0); + err = n == -1; + } + if (!err || errno != ECONNREFUSED) { FAIL_ERRNO("connect: expected ECONNREFUSED"); goto close_cli; } @@ -1302,9 +1374,9 @@ close_srv1: xclose(s1); } -#define TEST(fn) \ +#define TEST(fn, ...) \ { \ - fn, #fn \ + fn, #fn, __VA_ARGS__ \ } static void test_ops_cleanup(const struct bpf_map *map) @@ -1353,18 +1425,31 @@ static const char *map_type_str(const struct bpf_map *map) } } +static const char *sotype_str(int sotype) +{ + switch (sotype) { + case SOCK_DGRAM: + return "UDP"; + case SOCK_STREAM: + return "TCP"; + default: + return "unknown"; + } +} + static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { const struct op_test { void (*fn)(int family, int sotype, int mapfd); const char *name; + int sotype; } tests[] = { /* insert */ TEST(test_insert_invalid), TEST(test_insert_opened), - TEST(test_insert_bound), - TEST(test_insert_listening), + TEST(test_insert_bound, SOCK_STREAM), + TEST(test_insert), /* delete */ TEST(test_delete_after_insert), TEST(test_delete_after_close), @@ -1373,28 +1458,32 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, TEST(test_lookup_after_delete), TEST(test_lookup_32_bit_value), /* update */ - TEST(test_update_listening), + TEST(test_update_existing), /* races with insert/delete */ - TEST(test_destroy_orphan_child), - TEST(test_syn_recv_insert_delete), - TEST(test_race_insert_listen), + TEST(test_destroy_orphan_child, SOCK_STREAM), + TEST(test_syn_recv_insert_delete, SOCK_STREAM), + TEST(test_race_insert_listen, SOCK_STREAM), /* child clone */ - TEST(test_clone_after_delete), - TEST(test_accept_after_delete), - TEST(test_accept_before_delete), + TEST(test_clone_after_delete, SOCK_STREAM), + TEST(test_accept_after_delete, SOCK_STREAM), + TEST(test_accept_before_delete, SOCK_STREAM), }; - const char *family_name, *map_name; + const char *family_name, *map_name, *sotype_name; const struct op_test *t; char s[MAX_TEST_NAME]; int map_fd; family_name = family_str(family); map_name = map_type_str(map); + sotype_name = sotype_str(sotype); map_fd = bpf_map__fd(map); for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, - t->name); + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, + sotype_name, t->name); + + if (t->sotype != 0 && t->sotype != sotype) + continue; if (!test__start_subtest(s)) continue; @@ -1427,6 +1516,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, t->name); + if (!test__start_subtest(s)) continue; @@ -1441,26 +1531,31 @@ static void test_reuseport(struct test_sockmap_listen *skel, void (*fn)(int family, int sotype, int socket_map, int verdict_map, int reuseport_prog); const char *name; + int sotype; } tests[] = { TEST(test_reuseport_select_listening), TEST(test_reuseport_select_connected), TEST(test_reuseport_mixed_groups), }; int socket_map, verdict_map, reuseport_prog; - const char *family_name, *map_name; + const char *family_name, *map_name, *sotype_name; const struct reuseport_test *t; char s[MAX_TEST_NAME]; family_name = family_str(family); map_name = map_type_str(map); + sotype_name = sotype_str(sotype); socket_map = bpf_map__fd(map); verdict_map = bpf_map__fd(skel->maps.verdict_map); reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, - t->name); + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, + sotype_name, t->name); + + if (t->sotype != 0 && t->sotype != sotype) + continue; if (!test__start_subtest(s)) continue; @@ -1473,8 +1568,10 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, int family) { test_ops(skel, map, family, SOCK_STREAM); + test_ops(skel, map, family, SOCK_DGRAM); test_redir(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_STREAM); + test_reuseport(skel, map, family, SOCK_DGRAM); } void test_sockmap_listen(void) -- cgit v1.2.3 From 1f441b35ea5453e1dcc00fac03dbd5d7e6cd4f97 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:42 +0000 Subject: selftests: bpf: Enable UDP sockmap reuseport tests Remove the guard that disables UDP tests now that sockmap has support for them. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-12-lmb@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index a1dd13b34d4b..821b4146b7b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -805,12 +805,6 @@ static void test_config(int sotype, sa_family_t family, bool inany) char s[MAX_TEST_NAME]; const struct test *t; - /* SOCKMAP/SOCKHASH don't support UDP yet */ - if (sotype == SOCK_DGRAM && - (inner_map_type == BPF_MAP_TYPE_SOCKMAP || - inner_map_type == BPF_MAP_TYPE_SOCKHASH)) - return; - for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { if (t->need_sotype && t->need_sotype != sotype) continue; /* test not compatible with socket type */ -- cgit v1.2.3 From 47c09d6a9f6794caface4ad50930460b82d7c670 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 9 Mar 2020 10:32:15 -0700 Subject: bpftool: Introduce "prog profile" command With fentry/fexit programs, it is possible to profile BPF program with hardware counters. Introduce bpftool "prog profile", which measures key metrics of a BPF program. bpftool prog profile command creates per-cpu perf events. Then it attaches fentry/fexit programs to the target BPF program. The fentry program saves perf event value to a map. The fexit program reads the perf event again, and calculates the difference, which is the instructions/cycles used by the target program. Example input and output: ./bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) This command measures cycles and instructions for BPF program with id 337 for 3 seconds. The program has triggered 4228 times. The rest of the output is similar to perf-stat. In this example, the counters were only counting ~84% of the time because of time multiplexing of perf counters. Note that, this approach measures cycles and instructions in very small increments. So the fentry/fexit programs introduce noticeable errors to the measurement results. The fentry/fexit programs are generated with BPF skeletons. Therefore, we build bpftool twice. The first time _bpftool is built without skeletons. Then, _bpftool is used to generate the skeletons. The second time, bpftool is built with skeletons. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200309173218.2739965-2-songliubraving@fb.com --- tools/bpf/bpftool/Makefile | 18 ++ tools/bpf/bpftool/prog.c | 424 +++++++++++++++++++++++++++++- tools/bpf/bpftool/skeleton/profiler.bpf.c | 119 +++++++++ tools/bpf/bpftool/skeleton/profiler.h | 47 ++++ tools/scripts/Makefile.include | 1 + 5 files changed, 608 insertions(+), 1 deletion(-) create mode 100644 tools/bpf/bpftool/skeleton/profiler.bpf.c create mode 100644 tools/bpf/bpftool/skeleton/profiler.h (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c4e810335810..20a90d8450f8 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -59,6 +59,7 @@ LIBS = $(LIBBPF) -lelf -lz INSTALL ?= install RM ?= rm -f +CLANG ?= clang FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib @@ -110,6 +111,22 @@ SRCS += $(BFD_SRCS) endif OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o + +$(OUTPUT)_prog.o: prog.c + $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< + +$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) + +skeleton/profiler.bpf.o: skeleton/profiler.bpf.c + $(QUIET_CLANG)$(CLANG) -I$(srctree)/tools/lib -g -O2 -target bpf -c $< -o $@ + +profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o + $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ + +$(OUTPUT)prog.o: prog.c profiler.skel.h + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -125,6 +142,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 1996e67a2f00..576ddd82bc96 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -4,6 +4,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include @@ -11,10 +12,13 @@ #include #include #include +#include #include #include +#include #include +#include #include #include @@ -1537,6 +1541,421 @@ static int do_loadall(int argc, char **argv) return load_with_options(argc, argv, false); } +#ifdef BPFTOOL_WITHOUT_SKELETONS + +static int do_profile(int argc, char **argv) +{ + return 0; +} + +#else /* BPFTOOL_WITHOUT_SKELETONS */ + +#include "profiler.skel.h" + +struct profile_metric { + const char *name; + struct bpf_perf_event_value val; + struct perf_event_attr attr; + bool selected; + + /* calculate ratios like instructions per cycle */ + const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */ + const char *ratio_desc; + const float ratio_mul; +} metrics[] = { + { + .name = "cycles", + .attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .exclude_user = 1, + }, + }, + { + .name = "instructions", + .attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .exclude_user = 1, + }, + .ratio_metric = 1, + .ratio_desc = "insns per cycle", + .ratio_mul = 1.0, + }, + { + .name = "l1d_loads", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + .exclude_user = 1, + }, + }, + { + .name = "llc_misses", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_LL | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .exclude_user = 1 + }, + .ratio_metric = 2, + .ratio_desc = "LLC misses per million insns", + .ratio_mul = 1e6, + }, +}; + +static __u64 profile_total_count; + +#define MAX_NUM_PROFILE_METRICS 4 + +static int profile_parse_metrics(int argc, char **argv) +{ + unsigned int metric_cnt; + int selected_cnt = 0; + unsigned int i; + + metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); + + while (argc > 0) { + for (i = 0; i < metric_cnt; i++) { + if (is_prefix(argv[0], metrics[i].name)) { + if (!metrics[i].selected) + selected_cnt++; + metrics[i].selected = true; + break; + } + } + if (i == metric_cnt) { + p_err("unknown metric %s", argv[0]); + return -1; + } + NEXT_ARG(); + } + if (selected_cnt > MAX_NUM_PROFILE_METRICS) { + p_err("too many (%d) metrics, please specify no more than %d metrics at at time", + selected_cnt, MAX_NUM_PROFILE_METRICS); + return -1; + } + return selected_cnt; +} + +static void profile_read_values(struct profiler_bpf *obj) +{ + __u32 m, cpu, num_cpu = obj->rodata->num_cpu; + int reading_map_fd, count_map_fd; + __u64 counts[num_cpu]; + __u32 key = 0; + int err; + + reading_map_fd = bpf_map__fd(obj->maps.accum_readings); + count_map_fd = bpf_map__fd(obj->maps.counts); + if (reading_map_fd < 0 || count_map_fd < 0) { + p_err("failed to get fd for map"); + return; + } + + err = bpf_map_lookup_elem(count_map_fd, &key, counts); + if (err) { + p_err("failed to read count_map: %s", strerror(errno)); + return; + } + + profile_total_count = 0; + for (cpu = 0; cpu < num_cpu; cpu++) + profile_total_count += counts[cpu]; + + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + struct bpf_perf_event_value values[num_cpu]; + + if (!metrics[m].selected) + continue; + + err = bpf_map_lookup_elem(reading_map_fd, &key, values); + if (err) { + p_err("failed to read reading_map: %s", + strerror(errno)); + return; + } + for (cpu = 0; cpu < num_cpu; cpu++) { + metrics[m].val.counter += values[cpu].counter; + metrics[m].val.enabled += values[cpu].enabled; + metrics[m].val.running += values[cpu].running; + } + key++; + } +} + +static void profile_print_readings_json(void) +{ + __u32 m; + + jsonw_start_array(json_wtr); + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + if (!metrics[m].selected) + continue; + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "metric", metrics[m].name); + jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count); + jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter); + jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled); + jsonw_lluint_field(json_wtr, "running", metrics[m].val.running); + + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); +} + +static void profile_print_readings_plain(void) +{ + __u32 m; + + printf("\n%18llu %-20s\n", profile_total_count, "run_cnt"); + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + struct bpf_perf_event_value *val = &metrics[m].val; + int r; + + if (!metrics[m].selected) + continue; + printf("%18llu %-20s", val->counter, metrics[m].name); + + r = metrics[m].ratio_metric - 1; + if (r >= 0 && metrics[r].selected && + metrics[r].val.counter > 0) { + printf("# %8.2f %-30s", + val->counter * metrics[m].ratio_mul / + metrics[r].val.counter, + metrics[m].ratio_desc); + } else { + printf("%-41s", ""); + } + + if (val->enabled > val->running) + printf("(%4.2f%%)", + val->running * 100.0 / val->enabled); + printf("\n"); + } +} + +static void profile_print_readings(void) +{ + if (json_output) + profile_print_readings_json(); + else + profile_print_readings_plain(); +} + +static char *profile_target_name(int tgt_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; + const struct btf_type *t; + char *name = NULL; + struct btf *btf; + + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("failed to get info_linear for prog FD %d", tgt_fd); + return NULL; + } + + if (info_linear->info.btf_id == 0 || + btf__get_from_id(info_linear->info.btf_id, &btf)) { + p_err("prog FD %d doesn't have valid btf", tgt_fd); + goto out; + } + + func_info = (struct bpf_func_info *)(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); + if (!t) { + p_err("btf %d doesn't have type %d", + info_linear->info.btf_id, func_info[0].type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); +out: + free(info_linear); + return name; +} + +static struct profiler_bpf *profile_obj; +static int profile_tgt_fd = -1; +static char *profile_tgt_name; +static int *profile_perf_events; +static int profile_perf_event_cnt; + +static void profile_close_perf_events(struct profiler_bpf *obj) +{ + int i; + + for (i = profile_perf_event_cnt - 1; i >= 0; i--) + close(profile_perf_events[i]); + + free(profile_perf_events); + profile_perf_event_cnt = 0; +} + +static int profile_open_perf_events(struct profiler_bpf *obj) +{ + unsigned int cpu, m; + int map_fd, pmu_fd; + + profile_perf_events = calloc( + sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + if (!profile_perf_events) { + p_err("failed to allocate memory for perf_event array: %s", + strerror(errno)); + return -1; + } + map_fd = bpf_map__fd(obj->maps.events); + if (map_fd < 0) { + p_err("failed to get fd for events map"); + return -1; + } + + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + if (!metrics[m].selected) + continue; + for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { + pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, + -1/*pid*/, cpu, -1/*group_fd*/, 0); + if (pmu_fd < 0 || + bpf_map_update_elem(map_fd, &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + p_err("failed to create event %s on cpu %d", + metrics[m].name, cpu); + return -1; + } + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + } + } + return 0; +} + +static void profile_print_and_cleanup(void) +{ + profile_close_perf_events(profile_obj); + profile_read_values(profile_obj); + profile_print_readings(); + profiler_bpf__destroy(profile_obj); + + close(profile_tgt_fd); + free(profile_tgt_name); +} + +static void int_exit(int signo) +{ + profile_print_and_cleanup(); + exit(0); +} + +static int do_profile(int argc, char **argv) +{ + int num_metric, num_cpu, err = -1; + struct bpf_program *prog; + unsigned long duration; + char *endptr; + + /* we at least need two args for the prog and one metric */ + if (!REQ_ARGS(3)) + return -EINVAL; + + /* parse target fd */ + profile_tgt_fd = prog_parse_fd(&argc, &argv); + if (profile_tgt_fd < 0) { + p_err("failed to parse fd"); + return -1; + } + + /* parse profiling optional duration */ + if (argc > 2 && is_prefix(argv[0], "duration")) { + NEXT_ARG(); + duration = strtoul(*argv, &endptr, 0); + if (*endptr) + usage(); + NEXT_ARG(); + } else { + duration = UINT_MAX; + } + + num_metric = profile_parse_metrics(argc, argv); + if (num_metric <= 0) + goto out; + + num_cpu = libbpf_num_possible_cpus(); + if (num_cpu <= 0) { + p_err("failed to identify number of CPUs"); + goto out; + } + + profile_obj = profiler_bpf__open(); + if (!profile_obj) { + p_err("failed to open and/or load BPF object"); + goto out; + } + + profile_obj->rodata->num_cpu = num_cpu; + profile_obj->rodata->num_metric = num_metric; + + /* adjust map sizes */ + bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu); + bpf_map__resize(profile_obj->maps.fentry_readings, num_metric); + bpf_map__resize(profile_obj->maps.accum_readings, num_metric); + bpf_map__resize(profile_obj->maps.counts, 1); + + /* change target name */ + profile_tgt_name = profile_target_name(profile_tgt_fd); + if (!profile_tgt_name) + goto out; + + bpf_object__for_each_program(prog, profile_obj->obj) { + err = bpf_program__set_attach_target(prog, profile_tgt_fd, + profile_tgt_name); + if (err) { + p_err("failed to set attach target\n"); + goto out; + } + } + + set_max_rlimit(); + err = profiler_bpf__load(profile_obj); + if (err) { + p_err("failed to load profile_obj"); + goto out; + } + + err = profile_open_perf_events(profile_obj); + if (err) + goto out; + + err = profiler_bpf__attach(profile_obj); + if (err) { + p_err("failed to attach profile_obj"); + goto out; + } + signal(SIGINT, int_exit); + + sleep(duration); + profile_print_and_cleanup(); + return 0; + +out: + profile_close_perf_events(profile_obj); + if (profile_obj) + profiler_bpf__destroy(profile_obj); + close(profile_tgt_fd); + free(profile_tgt_name); + return err; +} + +#endif /* BPFTOOL_WITHOUT_SKELETONS */ + static int do_help(int argc, char **argv) { if (json_output) { @@ -1560,6 +1979,7 @@ static int do_help(int argc, char **argv) " [data_out FILE [data_size_out L]] \\\n" " [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n" " [repeat N]\n" + " %s %s profile PROG [duration DURATION] METRICs\n" " %s %s tracelog\n" " %s %s help\n" "\n" @@ -1577,12 +1997,13 @@ static int do_help(int argc, char **argv) " struct_ops | fentry | fexit | freplace }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" + " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1599,6 +2020,7 @@ static const struct cmd cmds[] = { { "detach", do_detach }, { "tracelog", do_tracelog }, { "run", do_run }, + { "profile", do_profile }, { 0 } }; diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c new file mode 100644 index 000000000000..20034c12f7c5 --- /dev/null +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include "profiler.h" +#include +#include +#include + +/* map of perf event fds, num_cpu * num_metric entries */ +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +/* readings at fentry */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} fentry_readings SEC(".maps"); + +/* accumulated readings */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} accum_readings SEC(".maps"); + +/* sample counts, one per cpu */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u64)); +} counts SEC(".maps"); + +const volatile __u32 num_cpu = 1; +const volatile __u32 num_metric = 1; +#define MAX_NUM_MATRICS 4 + +SEC("fentry/XXX") +int BPF_PROG(fentry_XXX) +{ + struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS]; + u32 key = bpf_get_smp_processor_id(); + u32 i; + + /* look up before reading, to reduce error */ + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + u32 flag = i; + + ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); + if (!ptrs[i]) + return 0; + } + + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + struct bpf_perf_event_value reading; + int err; + + err = bpf_perf_event_read_value(&events, key, &reading, + sizeof(reading)); + if (err) + return 0; + *(ptrs[i]) = reading; + key += num_cpu; + } + + return 0; +} + +static inline void +fexit_update_maps(u32 id, struct bpf_perf_event_value *after) +{ + struct bpf_perf_event_value *before, diff, *accum; + + before = bpf_map_lookup_elem(&fentry_readings, &id); + /* only account samples with a valid fentry_reading */ + if (before && before->counter) { + struct bpf_perf_event_value *accum; + + diff.counter = after->counter - before->counter; + diff.enabled = after->enabled - before->enabled; + diff.running = after->running - before->running; + + accum = bpf_map_lookup_elem(&accum_readings, &id); + if (accum) { + accum->counter += diff.counter; + accum->enabled += diff.enabled; + accum->running += diff.running; + } + } +} + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; + u32 cpu = bpf_get_smp_processor_id(); + u32 i, one = 1, zero = 0; + int err; + u64 *count; + + /* read all events before updating the maps, to reduce error */ + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, + readings + i, sizeof(*readings)); + if (err) + return 0; + } + count = bpf_map_lookup_elem(&counts, &zero); + if (count) { + *count += 1; + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + fexit_update_maps(i, &readings[i]); + } + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h new file mode 100644 index 000000000000..e03b53eae767 --- /dev/null +++ b/tools/bpf/bpftool/skeleton/profiler.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __PROFILER_H +#define __PROFILER_H + +/* useful typedefs from vmlinux.h */ + +typedef signed char __s8; +typedef unsigned char __u8; +typedef short int __s16; +typedef short unsigned int __u16; +typedef int __s32; +typedef unsigned int __u32; +typedef long long int __s64; +typedef long long unsigned int __u64; + +typedef __s8 s8; +typedef __u8 u8; +typedef __s16 s16; +typedef __u16 u16; +typedef __s32 s32; +typedef __u32 u32; +typedef __s64 s64; +typedef __u64 u64; + +enum { + false = 0, + true = 1, +}; + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#define __bitwise __bitwise__ + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#endif /* __PROFILER_H */ diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index ded7a950dc40..59f31f01cb93 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -106,6 +106,7 @@ ifneq ($(silent),1) ifneq ($(V),1) QUIET_CC = @echo ' CC '$@; QUIET_CC_FPIC = @echo ' CC FPIC '$@; + QUIET_CLANG = @echo ' CLANG '$@; QUIET_AR = @echo ' AR '$@; QUIET_LINK = @echo ' LINK '$@; QUIET_MKDIR = @echo ' MKDIR '$@; -- cgit v1.2.3 From 319c7c1f6b78a04e72d32336787912cc1b284f25 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 9 Mar 2020 10:32:16 -0700 Subject: bpftool: Documentation for bpftool prog profile Add documentation for the new bpftool prog profile command. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200309173218.2739965-3-songliubraving@fb.com --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 46862e85fed2..9f19404f470e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -30,6 +30,7 @@ PROG COMMANDS | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** | **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] +| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -48,6 +49,9 @@ PROG COMMANDS | *ATTACH_TYPE* := { | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** | } +| *METRIC* := { +| **cycles** | **instructions** | **l1d_loads** | **llc_misses** +| } DESCRIPTION @@ -189,6 +193,12 @@ DESCRIPTION not all of them can take the **ctx_in**/**ctx_out** arguments. bpftool does not perform checks on program types. + **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs* + Profile *METRICs* for bpf program *PROG* for *DURATION* + seconds or until user hits Ctrl-C. *DURATION* is optional. + If *DURATION* is not specified, the profiling will run up to + UINT_MAX seconds. + **bpftool prog help** Print short help message. @@ -311,6 +321,15 @@ EXAMPLES **# rm /sys/fs/bpf/xdp1** +| +| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses** + +:: + 51397 run_cnt + 40176203 cycles (83.05%) + 42518139 instructions # 1.06 insns per cycle (83.39%) + 123 llc_misses # 2.89 LLC misses per million insns (83.15%) + SEE ALSO ======== **bpf**\ (2), -- cgit v1.2.3 From 397692eab35cbbd83681880c6a2dbcdb9fd84386 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 9 Mar 2020 10:32:17 -0700 Subject: bpftool: Bash completion for "bpftool prog profile" Add bash completion for "bpftool prog profile" command. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200309173218.2739965-4-songliubraving@fb.com --- tools/bpf/bpftool/bash-completion/bpftool | 45 ++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index f2838a658339..49f4ab2f67e3 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -337,6 +337,7 @@ _bpftool() local PROG_TYPE='id pinned tag name' local MAP_TYPE='id pinned name' + local METRIC_TYPE='cycles instructions l1d_loads llc_misses' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -498,6 +499,48 @@ _bpftool() tracelog) return 0 ;; + profile) + case $cword in + 3) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 4) + case $prev in + id) + _bpftool_get_prog_ids + ;; + name) + _bpftool_get_prog_names + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 5) + COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) ) + return 0 + ;; + 6) + case $prev in + duration) + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + return 0 + ;; + esac + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + return 0 + ;; + esac + ;; run) if [[ ${#words[@]} -lt 5 ]]; then _filedir @@ -525,7 +568,7 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'dump help pin attach detach \ - load loadall show list tracelog run' -- "$cur" ) ) + load loadall show list tracelog run profile' -- "$cur" ) ) ;; esac ;; -- cgit v1.2.3 From aad32f4c76a22fa0417db083a8cbf9222d4f3d9a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 9 Mar 2020 10:32:18 -0700 Subject: bpftool: Fix typo in bash-completion _bpftool_get_map_names => _bpftool_get_prog_names for prog-attach|detach. Fixes: 99f9863a0c45 ("bpftool: Match maps by name") Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200309173218.2739965-5-songliubraving@fb.com --- tools/bpf/bpftool/bash-completion/bpftool | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 49f4ab2f67e3..a9cce9d3745a 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -389,7 +389,7 @@ _bpftool() _bpftool_get_prog_ids ;; name) - _bpftool_get_map_names + _bpftool_get_prog_names ;; pinned) _filedir -- cgit v1.2.3 From 42bbabed09ce6208026648a71a45b4394c74585a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:00 -0800 Subject: perf tools: Add hw_idx in struct branch_stack The low level index of raw branch records for the most recent branch can be recorded in a sample with PERF_SAMPLE_BRANCH_HW_INDEX branch_sample_type. Extend struct branch_stack to support it. However, if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, only nr and entries[] will be output by kernel. The pointer of entries[] could be wrong, since the output format is different with new struct branch_stack. Add a variable no_hw_idx in struct perf_sample to indicate whether the hw_idx is output. Add get_branch_entry() to return corresponding pointer of entries[0]. To make dummy branch sample consistent as new branch sample, add hw_idx in struct dummy_branch_stack for cs-etm and intel-pt. Apply the new struct branch_stack for synthetic events as well. Extend test case sample-parsing to support new struct branch_stack. Committer notes: Renamed get_branch_entries() to perf_sample__branch_entries() to have proper namespacing and pave the way for this to be moved to libperf, eventually. Add 'static' to that inline as it is in a header. Add 'hw_idx' to 'struct dummy_branch_stack' in cs-etm.c to fix the build on arm64. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200228163011.19358-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 70 ++++++++++++---------- tools/perf/tests/sample-parsing.c | 7 ++- tools/perf/util/branch.h | 22 +++++++ tools/perf/util/cs-etm.c | 2 + tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 5 ++ tools/perf/util/evsel.h | 5 ++ tools/perf/util/hist.c | 3 +- tools/perf/util/intel-pt.c | 2 + tools/perf/util/machine.c | 35 +++++------ .../util/scripting-engines/trace-event-python.c | 30 +++++----- tools/perf/util/session.c | 8 ++- tools/perf/util/synthetic-events.c | 6 +- 13 files changed, 125 insertions(+), 71 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e2406b291c1c..656b347f6dd8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, return 0; for (i = 0; i < br->nr; i++) { - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (PRINT_FIELD(DSO)) { memset(&alf, 0, sizeof(alf)); @@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; thread__find_symbol_fb(thread, sample->cpumode, from, &alf); thread__find_symbol_fb(thread, sample->cpumode, to, &alt); @@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (thread__find_map_fb(thread, sample->cpumode, from, &alf) && !alf.map->dso->adjust_symbols) @@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(br->entries + i), - br->entries[i].flags.in_tx ? 'X' : '-', - br->entries[i].flags.abort ? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -1053,6 +1056,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, struct machine *machine, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 start, end; int i, insn, len, nr, ilen, printed = 0; struct perf_insn x; @@ -1073,31 +1077,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += fprintf(fp, "%c", '\n'); /* Handle first from jump, of which we don't know the entry. */ - len = grab_bb(buffer, br->entries[nr-1].from, - br->entries[nr-1].from, + len = grab_bb(buffer, entries[nr-1].from, + entries[nr-1].from, machine, thread, &x.is64bit, &x.cpumode, false); if (len > 0) { - printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, + printed += ip__fprintf_sym(entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); - printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], &x, buffer, len, 0, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) - printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from); + printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); } /* Print all blocks */ for (i = nr - 2; i >= 0; i--) { - if (br->entries[i].from || br->entries[i].to) + if (entries[i].from || entries[i].to) pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, - br->entries[i].from, - br->entries[i].to); - start = br->entries[i + 1].to; - end = br->entries[i].from; + entries[i].from, + entries[i].to); + start = entries[i + 1].to; + end = entries[i].from; len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); /* Patch up missing kernel transfers due to ring filters */ if (len == -ENXIO && i > 0) { - end = br->entries[--i].from; + end = entries[--i].from; pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); } @@ -1110,7 +1114,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, + printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, ip); @@ -1134,9 +1138,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * Hit the branch? In this case we are already done, and the target * has not been executed yet. */ - if (br->entries[0].from == sample->ip) + if (entries[0].from == sample->ip) goto out; - if (br->entries[0].flags.abort) + if (entries[0].flags.abort) goto out; /* @@ -1147,7 +1151,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * between final branch and sample. When this happens just * continue walking after the last TO until we hit a branch. */ - start = br->entries[0].to; + start = entries[0].to; end = sample->ip; if (end < start) { /* Missing jump. Scan 128 bytes for the next branch */ diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2762e1155238..14239e472187 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); + COMP(branch_stack->hw_idx); for (i = 0; i < s1->branch_stack->nr; i++) MCOMP(branch_stack->entries[i]); } @@ -186,7 +187,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) u64 data[64]; } branch_stack = { /* 1 branch_entry */ - .data = {1, 211, 212, 213}, + .data = {1, -1ULL, 211, 212, 213}, }; u64 regs[64]; const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; @@ -208,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .transaction = 112, .raw_data = (void *)raw_data, .callchain = &callchain.callchain, + .no_hw_idx = false, .branch_stack = &branch_stack.branch_stack, .user_regs = { .abi = PERF_SAMPLE_REGS_ABI_64, @@ -244,6 +246,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) if (sample_type & PERF_SAMPLE_REGS_INTR) evsel.core.attr.sample_regs_intr = sample_regs; + if (sample_type & PERF_SAMPLE_BRANCH_STACK) + evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + for (i = 0; i < sizeof(regs); i++) *(i + (u8 *)regs) = i & 0xfe; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 88e00d268f6f..154a05cd03af 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -12,6 +12,7 @@ #include #include #include +#include "event.h" struct branch_flags { u64 mispred:1; @@ -39,9 +40,30 @@ struct branch_entry { struct branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries[0]; }; +/* + * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied. + * Otherwise, the output format of a sample with branch stack is + * struct branch_stack { + * u64 nr; + * struct branch_entry entries[0]; + * } + * Check whether the hw_idx is available, + * and return the corresponding pointer of entries[0]. + */ +static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample) +{ + u64 *entry = (u64 *)sample->branch_stack; + + entry++; + if (sample->no_hw_idx) + return (struct branch_entry *)entry; + return (struct branch_entry *)(++entry); +} + struct branch_type_stat { bool branch_to; u64 counts[PERF_BR_MAX]; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5471045ebf5c..b3b3fe3ea345 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1172,6 +1172,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; u64 ip; @@ -1202,6 +1203,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, if (etm->synth_opts.last_branch) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 85223159737c..3cda40a2fafc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -139,6 +139,7 @@ struct perf_sample { u16 insn_len; u8 cpumode; u16 misc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c8dc4450884c..05883a45de5b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2169,7 +2169,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (data->branch_stack->nr > max_branch_nr) return -EFAULT; + sz = data->branch_stack->nr * sizeof(struct branch_entry); + if (perf_evsel__has_branch_hw_idx(evsel)) + sz += sizeof(u64); + else + data->no_hw_idx = true; OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dc14f4a823cd..99a0cb60c556 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -389,6 +389,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } +static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +{ + return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + static inline bool evsel__has_callchain(const struct evsel *evsel) { return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index ca5a8f4d007e..e74a5acf66d9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2584,9 +2584,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, u64 *total_cycles) { struct branch_info *bi; + struct branch_entry *entries = perf_sample__branch_entries(sample); /* If we have branch cycles always annotate them. */ - if (bs && bs->nr && bs->entries[0].flags.cycles) { + if (bs && bs->nr && entries[0].flags.cycles) { int i; bi = sample__resolve_bstack(sample, al); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 33cf8928cf05..23c8289c2472 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct perf_sample sample = { .ip = 0, }; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; @@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fb5c2cd44d30..fd14f1489802 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2081,15 +2081,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, { unsigned int i; const struct branch_stack *bs = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); - bi[i].flags = bs->entries[i].flags; + ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); + ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); + bi[i].flags = entries[i].flags; } return bi; } @@ -2185,6 +2186,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); int lbr_nr = lbr_stack->nr, j, k; bool branch; struct branch_flags *flags; @@ -2210,31 +2212,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = chain->ips[j]; else if (j > i + 1) { k = j - i - 2; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } else { if (j < lbr_nr) { k = lbr_nr - j - 1; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } @@ -2281,6 +2281,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int max_stack) { struct branch_stack *branch = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; @@ -2328,7 +2329,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { if (callchain_param.order == ORDER_CALLEE) { - be[i] = branch->entries[i]; + be[i] = entries[i]; if (chain == NULL) continue; @@ -2347,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i].from >= chain->ips[first_call] - 8) first_call++; } else - be[i] = branch->entries[branch->nr - i - 1]; + be[i] = entries[branch->nr - i - 1]; } memset(iter, 0, sizeof(struct iterations) * nr); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 80ca5d0ab7fe..8c1b27cd8b99 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; @@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); pydict_set_item_string_decref(pyelem, "from", - PyLong_FromUnsignedLongLong(br->entries[i].from)); + PyLong_FromUnsignedLongLong(entries[i].from)); pydict_set_item_string_decref(pyelem, "to", - PyLong_FromUnsignedLongLong(br->entries[i].to)); + PyLong_FromUnsignedLongLong(entries[i].to)); pydict_set_item_string_decref(pyelem, "mispred", - PyBool_FromLong(br->entries[i].flags.mispred)); + PyBool_FromLong(entries[i].flags.mispred)); pydict_set_item_string_decref(pyelem, "predicted", - PyBool_FromLong(br->entries[i].flags.predicted)); + PyBool_FromLong(entries[i].flags.predicted)); pydict_set_item_string_decref(pyelem, "in_tx", - PyBool_FromLong(br->entries[i].flags.in_tx)); + PyBool_FromLong(entries[i].flags.in_tx)); pydict_set_item_string_decref(pyelem, "abort", - PyBool_FromLong(br->entries[i].flags.abort)); + PyBool_FromLong(entries[i].flags.abort)); pydict_set_item_string_decref(pyelem, "cycles", - PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); + PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "from_dsoname", _PyUnicode_FromString(dsoname)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); @@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; char bf[512]; @@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "from", _PyUnicode_FromString(bf)); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "to", _PyUnicode_FromString(bf)); - get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); + get_br_mspred(&entries[i].flags, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "pred", _PyUnicode_FromString(bf)); - if (br->entries[i].flags.in_tx) { + if (entries[i].flags.in_tx) { pydict_set_item_string_decref(pyelem, "in_tx", _PyUnicode_FromString("X")); } else { @@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, _PyUnicode_FromString("-")); } - if (br->entries[i].flags.abort) { + if (entries[i].flags.abort) { pydict_set_item_string_decref(pyelem, "abort", _PyUnicode_FromString("A")); } else { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d0d7d25b23e3..055b00abd56d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1007,6 +1007,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) { struct ip_callchain *callchain = sample->callchain; struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 kernel_callchain_nr = callchain->nr; unsigned int i; @@ -1043,10 +1044,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) i, callchain->ips[i]); printf("..... %2d: %016" PRIx64 "\n", - (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + (int)(kernel_callchain_nr), entries[0].to); for (i = 0; i < lbr_stack->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + (int)(i + kernel_callchain_nr + 1), entries[i].from); } } @@ -1068,6 +1069,7 @@ static void callchain__printf(struct evsel *evsel, static void branch_stack__printf(struct perf_sample *sample, bool callstack) { + struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; printf("%s: nr:%" PRIu64 "\n", @@ -1075,7 +1077,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) sample->branch_stack->nr); for (i = 0; i < sample->branch_stack->nr; i++) { - struct branch_entry *e = &sample->branch_stack->entries[i]; + struct branch_entry *e = &entries[i]; if (!callstack) { printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index c423298fe62d..dd3e6f43fb86 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1183,7 +1183,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); result += sz; } @@ -1344,7 +1345,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); memcpy(array, sample->branch_stack, sz); array = (void *)array + sz; } -- cgit v1.2.3 From d3f85437ad6a55113882d730beaa75759452da8f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:01 -0800 Subject: perf evsel: Support PERF_SAMPLE_BRANCH_HW_INDEX A new branch sample type PERF_SAMPLE_BRANCH_HW_INDEX has been introduced in latest kernel. Enable HW_INDEX by default in LBR call stack mode. If kernel doesn't support the sample type, switching it off. Add HW_INDEX in attr_fprintf as well. User can check whether the branch sample type is set via debug information or header. Committer testing: First collect some samples with LBR callchains, system wide, for a few seconds: # perf record --call-graph lbr -a sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.625 MB perf.data (224 samples) ] # Now lets use 'perf evlist -v' to look at the branch_sample_type: # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES|HW_INDEX # So the machine has the kernel feature, and it was correctly added to perf_event_attr.branch_sample_type, for the default 'cycles' event. If we do it in another machine, where the kernel lacks the HW_INDEX feature, we get: # perf record --call-graph lbr -a sleep 2s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.690 MB perf.data (499 samples) ] # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES # No HW_INDEX in attr.branch_sample_type. Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200228163011.19358-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 15 ++++++++++++--- tools/perf/util/evsel.h | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 05883a45de5b..816d930d774e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS; + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_HW_INDEX; } } else pr_warning("Cannot use LBR callstack with branch stack. " @@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel, if (param->record_mode == CALLCHAIN_LBR) { perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | - PERF_SAMPLE_BRANCH_CALL_STACK); + PERF_SAMPLE_BRANCH_CALL_STACK | + PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { perf_evsel__reset_sample_bit(evsel, REGS_USER); @@ -1673,6 +1675,8 @@ fallback_missing_features: evsel->core.attr.ksymbol = 0; if (perf_missing_features.bpf) evsel->core.attr.bpf_event = 0; + if (perf_missing_features.branch_hw_idx) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->core.attr.sample_id_all = 0; @@ -1784,7 +1788,12 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { + if (!perf_missing_features.branch_hw_idx && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { perf_missing_features.aux_output = true; pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); goto out_close; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 99a0cb60c556..33804740e2ca 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -119,6 +119,7 @@ struct perf_missing_features { bool ksymbol; bool bpf; bool aux_output; + bool branch_hw_idx; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 651203126c71..355d3458d4e6 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name -- cgit v1.2.3 From 277ce1efa7b504873cd32a4106654836c2f80e1b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:02 -0800 Subject: perf header: Add check for unexpected use of reserved membrs in event attr The perf.data may be generated by a newer version of perf tool, which support new input bits in attr, e.g. new bit for branch_sample_type. The perf.data may be parsed by an older version of perf tool later. The old perf tool may parse the perf.data incorrectly. There is no warning message for this case. Current perf header never check for unknown input bits in attr. When read the event desc from header, check the stored event attr. The reserved bits, sample type, read format and branch sample type will be checked. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lkml.kernel.org/r/20200228163011.19358-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4246e7447e54..acbd046bf95c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events) free(events); } +static bool perf_attr_check(struct perf_event_attr *attr) +{ + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) { + pr_warning("Reserved bits are set unexpectedly. " + "Please update perf tool.\n"); + return false; + } + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) { + pr_warning("Unknown sample type (0x%llx) is detected. " + "Please update perf tool.\n", + attr->sample_type); + return false; + } + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) { + pr_warning("Unknown read format (0x%llx) is detected. " + "Please update perf tool.\n", + attr->read_format); + return false; + } + + if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) && + (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) { + pr_warning("Unknown branch sample type (0x%llx) is detected. " + "Please update perf tool.\n", + attr->branch_sample_type); + + return false; + } + + return true; +} + static struct evsel *read_event_desc(struct feat_fd *ff) { struct evsel *evsel, *events = NULL; @@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff) memcpy(&evsel->core.attr, buf, msz); + if (!perf_attr_check(&evsel->core.attr)) + goto error; + if (do_read_u32(ff, &nr)) goto error; -- cgit v1.2.3 From 576a65b6974ddc830a89b6feb6823bd6b5914bde Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:12 +0100 Subject: perf expr: Add expr.c object Add generic expr code into new expr.c object. The expr.c object will be mainly used in following change that will get rid of the manual flex code, Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/expr.c | 19 +++++++++++++++++++ tools/perf/util/expr.y | 16 ---------------- 3 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 tools/perf/util/expr.c (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 07da6c790b63..6fdf073093a6 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -122,6 +122,7 @@ perf-y += vsprintf.o perf-y += units.o perf-y += time-utils.o perf-y += expr-bison.o +perf-y += expr.o perf-y += branch.o perf-y += mem2node.o diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c new file mode 100644 index 000000000000..816b23b2068a --- /dev/null +++ b/tools/perf/util/expr.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "expr.h" + +/* Caller must make sure id is allocated */ +void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +{ + int idx; + + assert(ctx->num_ids < MAX_PARSE_ID); + idx = ctx->num_ids++; + ctx->ids[idx].name = name; + ctx->ids[idx].val = val; +} + +void expr__ctx_init(struct parse_ctx *ctx) +{ + ctx->num_ids = 0; +} diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 7d226241f1d7..7cea8b7c3a5c 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -6,7 +6,6 @@ #define IN_EXPR_Y 1 #include "expr.h" #include "smt.h" -#include #include #define MAXIDLEN 256 @@ -169,21 +168,6 @@ static int expr__lex(YYSTYPE *res, const char **pp) return tok; } -/* Caller must make sure id is allocated */ -void expr__add_id(struct parse_ctx *ctx, const char *name, double val) -{ - int idx; - assert(ctx->num_ids < MAX_PARSE_ID); - idx = ctx->num_ids++; - ctx->ids[idx].name = name; - ctx->ids[idx].val = val; -} - -void expr__ctx_init(struct parse_ctx *ctx) -{ - ctx->num_ids = 0; -} - static bool already_seen(const char *val, const char *one, const char **other, int num_other) { -- cgit v1.2.3 From 26226a97724d1671d553b8eb0cd95b0a5557cfb2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:13 +0100 Subject: perf expr: Move expr lexer to flex Adding expr flex code instead of the manual parser code. So it's easily extensible in upcoming changes. The new flex code is in flex.l object and gets compiled like all the other flexers we use. It's defined as flex reentrant parser. It's used by both expr__parse and expr__find_other interfaces by separating the starting point. There's no intended change of functionality ;-) the test expr is passing. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 10 ++- tools/perf/util/expr.c | 93 +++++++++++++++++++++++++++ tools/perf/util/expr.h | 2 - tools/perf/util/expr.l | 114 +++++++++++++++++++++++++++++++++ tools/perf/util/expr.y | 169 +++++++++---------------------------------------- 5 files changed, 247 insertions(+), 141 deletions(-) create mode 100644 tools/perf/util/expr.l (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 6fdf073093a6..c0cf8dff694e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -121,6 +121,7 @@ perf-y += mem-events.o perf-y += vsprintf.o perf-y += units.o perf-y += time-utils.o +perf-y += expr-flex.o perf-y += expr-bison.o perf-y += expr.o perf-y += branch.o @@ -190,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ +$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c + $(call rule_mkdir) + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l + $(OUTPUT)util/expr-bison.c: util/expr.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) @@ -204,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y CFLAGS_parse-events-flex.o += -w CFLAGS_pmu-flex.o += -w +CFLAGS_expr-flex.o += -w CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c +$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" @@ -217,6 +224,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_parse-events.o += -Wno-redundant-decls +CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 816b23b2068a..b39fd39f10ec 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include "expr.h" +#include "expr-bison.h" +#define YY_EXTRA_TYPE int +#include "expr-flex.h" + +#ifdef PARSER_DEBUG +extern int expr_debug; +#endif /* Caller must make sure id is allocated */ void expr__add_id(struct parse_ctx *ctx, const char *name, double val) @@ -17,3 +25,88 @@ void expr__ctx_init(struct parse_ctx *ctx) { ctx->num_ids = 0; } + +static int +__expr__parse(double *val, struct parse_ctx *ctx, const char *expr, + int start) +{ + YY_BUFFER_STATE buffer; + void *scanner; + int ret; + + ret = expr_lex_init_extra(start, &scanner); + if (ret) + return ret; + + buffer = expr__scan_string(expr, scanner); + +#ifdef PARSER_DEBUG + expr_debug = 1; +#endif + + ret = expr_parse(val, ctx, scanner); + + expr__flush_buffer(buffer, scanner); + expr__delete_buffer(buffer, scanner); + expr_lex_destroy(scanner); + return ret; +} + +int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp) +{ + return __expr__parse(final_val, ctx, *pp, EXPR_PARSE); +} + +static bool +already_seen(const char *val, const char *one, const char **other, + int num_other) +{ + int i; + + if (one && !strcasecmp(one, val)) + return true; + for (i = 0; i < num_other; i++) + if (!strcasecmp(other[i], val)) + return true; + return false; +} + +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_other) +{ + int err, i = 0, j = 0; + struct parse_ctx ctx; + + expr__ctx_init(&ctx); + err = __expr__parse(NULL, &ctx, p, EXPR_OTHER); + if (err) + return -1; + + *other = malloc((ctx.num_ids + 1) * sizeof(char *)); + if (!*other) + return -ENOMEM; + + for (i = 0, j = 0; i < ctx.num_ids; i++) { + const char *str = ctx.ids[i].name; + + if (already_seen(str, one, *other, j)) + continue; + + str = strdup(str); + if (!str) + goto out; + (*other)[j++] = str; + } + (*other)[j] = NULL; + +out: + if (i != ctx.num_ids) { + while (--j) + free((char *) (*other)[i]); + free(*other); + err = -1; + } + + *num_other = j; + return err; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 046160831f90..9332796e6649 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -17,9 +17,7 @@ struct parse_ctx { void expr__ctx_init(struct parse_ctx *ctx); void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -#ifndef IN_EXPR_Y int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); -#endif int expr__find_other(const char *p, const char *one, const char ***other, int *num_other); diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l new file mode 100644 index 000000000000..1928f2a3dddc --- /dev/null +++ b/tools/perf/util/expr.l @@ -0,0 +1,114 @@ +%option prefix="expr_" +%option reentrant +%option bison-bridge + +%{ +#include +#include "expr.h" +#include "expr-bison.h" + +char *expr_get_text(yyscan_t yyscanner); +YYSTYPE *expr_get_lval(yyscan_t yyscanner); + +static int __value(YYSTYPE *yylval, char *str, int base, int token) +{ + u64 num; + + errno = 0; + num = strtoull(str, NULL, base); + if (errno) + return EXPR_ERROR; + + yylval->num = num; + return token; +} + +static int value(yyscan_t scanner, int base) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + char *text = expr_get_text(scanner); + + return __value(yylval, text, base, NUMBER); +} + +/* + * Allow @ instead of / to be able to specify pmu/event/ without + * conflicts with normal division. + */ +static char *normalize(char *str) +{ + char *ret = str; + char *dst = str; + + while (*str) { + if (*str == '@') + *dst++ = '/'; + else if (*str == '\\') + *dst++ = *++str; + else + *dst++ = *str; + str++; + } + + *dst = 0x0; + return ret; +} + +static int str(yyscan_t scanner, int token) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + char *text = expr_get_text(scanner); + + yylval->str = normalize(strdup(text)); + if (!yylval->str) + return EXPR_ERROR; + + yylval->str = normalize(yylval->str); + return token; +} +%} + +number [0-9]+ + +sch [-,=] +spec \\{sch} +sym [0-9a-zA-Z_\.:@]+ +symbol {spec}*{sym}*{spec}*{sym}* + +%% + { + int start_token; + + start_token = parse_events_get_extra(yyscanner); + + if (start_token) { + parse_events_set_extra(NULL, yyscanner); + return start_token; + } + } + +max { return MAX; } +min { return MIN; } +if { return IF; } +else { return ELSE; } +#smt_on { return SMT_ON; } +{number} { return value(yyscanner, 10); } +{symbol} { return str(yyscanner, ID); } +"|" { return '|'; } +"^" { return '^'; } +"&" { return '&'; } +"-" { return '-'; } +"+" { return '+'; } +"*" { return '*'; } +"/" { return '/'; } +"%" { return '%'; } +"(" { return '('; } +")" { return ')'; } +"," { return ','; } +. { } +%% + +int expr_wrap(void *scanner __maybe_unused) +{ + return 1; +} diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 7cea8b7c3a5c..4720cbe79357 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,5 +1,7 @@ /* Simple expression parser */ %{ +#define YYDEBUG 1 +#include #include "util.h" #include "util/debug.h" #include // strtod() @@ -8,23 +10,23 @@ #include "smt.h" #include -#define MAXIDLEN 256 %} %define api.pure full %parse-param { double *final_val } %parse-param { struct parse_ctx *ctx } -%parse-param { const char **pp } -%lex-param { const char **pp } +%parse-param {void *scanner} +%lex-param {void* scanner} %union { - double num; - char id[MAXIDLEN+1]; + double num; + char *str; } +%token EXPR_PARSE EXPR_OTHER EXPR_ERROR %token NUMBER -%token ID +%token ID %token MIN MAX IF ELSE SMT_ON %left MIN MAX IF %left '|' @@ -36,11 +38,9 @@ %type expr if_expr %{ -static int expr__lex(YYSTYPE *res, const char **pp); - -static void expr__error(double *final_val __maybe_unused, +static void expr_error(double *final_val __maybe_unused, struct parse_ctx *ctx __maybe_unused, - const char **pp __maybe_unused, + void *scanner, const char *s) { pr_debug("%s\n", s); @@ -62,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val) %} %% +start: +EXPR_PARSE all_expr +| +EXPR_OTHER all_other + +all_other: all_other other +| + +other: ID +{ + if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { + pr_err("failed: way too many variables"); + YYABORT; + } + + ctx->ids[ctx->num_ids++].name = $1; +} +| +MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' + + all_expr: if_expr { *final_val = $1; } ; @@ -92,131 +113,3 @@ expr: NUMBER ; %% - -static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) -{ - char *dst = res->id; - const char *s = p; - - if (*p == '#') - *dst++ = *p++; - - while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') { - if (p - s >= MAXIDLEN) - return -1; - /* - * Allow @ instead of / to be able to specify pmu/event/ without - * conflicts with normal division. - */ - if (*p == '@') - *dst++ = '/'; - else if (*p == '\\') - *dst++ = *++p; - else - *dst++ = *p; - p++; - } - *dst = 0; - *pp = p; - dst = res->id; - switch (dst[0]) { - case 'm': - if (!strcmp(dst, "min")) - return MIN; - if (!strcmp(dst, "max")) - return MAX; - break; - case 'i': - if (!strcmp(dst, "if")) - return IF; - break; - case 'e': - if (!strcmp(dst, "else")) - return ELSE; - break; - case '#': - if (!strcasecmp(dst, "#smt_on")) - return SMT_ON; - break; - } - return ID; -} - -static int expr__lex(YYSTYPE *res, const char **pp) -{ - int tok; - const char *s; - const char *p = *pp; - - while (isspace(*p)) - p++; - s = p; - switch (*p++) { - case '#': - case 'a' ... 'z': - case 'A' ... 'Z': - return expr__symbol(res, p - 1, pp); - case '0' ... '9': case '.': - res->num = strtod(s, (char **)&p); - tok = NUMBER; - break; - default: - tok = *s; - break; - } - *pp = p; - return tok; -} - -static bool already_seen(const char *val, const char *one, const char **other, - int num_other) -{ - int i; - - if (one && !strcasecmp(one, val)) - return true; - for (i = 0; i < num_other; i++) - if (!strcasecmp(other[i], val)) - return true; - return false; -} - -int expr__find_other(const char *p, const char *one, const char ***other, - int *num_otherp) -{ - const char *orig = p; - int err = -1; - int num_other; - - *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); - if (!*other) - return -1; - - num_other = 0; - for (;;) { - YYSTYPE val; - int tok = expr__lex(&val, &p); - if (tok == 0) { - err = 0; - break; - } - if (tok == ID && !already_seen(val.id, one, *other, num_other)) { - if (num_other >= EXPR_MAX_OTHER - 1) { - pr_debug("Too many extra events in %s\n", orig); - break; - } - (*other)[num_other] = strdup(val.id); - if (!(*other)[num_other]) - return -1; - num_other++; - } - } - (*other)[num_other] = NULL; - *num_otherp = num_other; - if (err) { - *num_otherp = 0; - free(*other); - *other = NULL; - } - return err; -} -- cgit v1.2.3 From 58ca707636dc44c8803a098fb68129f64272f151 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:14 +0100 Subject: perf expr: Increase EXPR_MAX_OTHER to support metrics with more than 15 variables We have metrics that define more than 15 variables, like Branch_Misprediction_Cost. Increasing the allowed variables count to 20. As Andy pointed out, we can't go too high in here, because some of the code has O(n^2) complexity (already_seen) and we might want to do some other changes (like using hash tables) before increasing the maximum even more. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 9332796e6649..df0a17df0cef 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,7 +2,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#define EXPR_MAX_OTHER 15 +#define EXPR_MAX_OTHER 20 #define MAX_PARSE_ID EXPR_MAX_OTHER struct parse_id { -- cgit v1.2.3 From 0f9b1e124bb29719eb1572db74b7893bd616f938 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:15 +0100 Subject: perf expr: Straighten expr__parse()/expr__find_other() interface Now that we have a flex parser we don't need to update the parsed string pointer, so the interface can just be passed the pointer to the expression instead of a pointer to pointer. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 6 +++--- tools/perf/util/expr.c | 8 ++++---- tools/perf/util/expr.h | 4 ++-- tools/perf/util/stat-shadow.c | 4 +--- 4 files changed, 10 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 87843af4c118..755d73c86c68 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -10,7 +10,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, &e)) + if (expr__parse(&val, ctx, e)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -44,11 +44,11 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) return ret; p = "FOO/0"; - ret = expr__parse(&val, &ctx, &p); + ret = expr__parse(&val, &ctx, p); TEST_ASSERT_VAL("division by zero", ret == 1); p = "BAR/"; - ret = expr__parse(&val, &ctx, &p); + ret = expr__parse(&val, &ctx, p); TEST_ASSERT_VAL("missing operand", ret == 1); TEST_ASSERT_VAL("find other", diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b39fd39f10ec..45b25530db5b 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -52,9 +52,9 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, return ret; } -int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp) +int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) { - return __expr__parse(final_val, ctx, *pp, EXPR_PARSE); + return __expr__parse(final_val, ctx, expr, EXPR_PARSE); } static bool @@ -71,14 +71,14 @@ already_seen(const char *val, const char *one, const char **other, return false; } -int expr__find_other(const char *p, const char *one, const char ***other, +int expr__find_other(const char *expr, const char *one, const char ***other, int *num_other) { int err, i = 0, j = 0; struct parse_ctx ctx; expr__ctx_init(&ctx); - err = __expr__parse(NULL, &ctx, p, EXPR_OTHER); + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); if (err) return -1; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index df0a17df0cef..9377538f4097 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -17,8 +17,8 @@ struct parse_ctx { void expr__ctx_init(struct parse_ctx *ctx); void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); -int expr__find_other(const char *p, const char *one, const char ***other, +int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); +int expr__find_other(const char *expr, const char *one, const char ***other, int *num_other); #endif diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 90d23cc3c8d4..0fd713d3674f 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -777,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config, } if (!metric_events[i]) { - const char *p = metric_expr; - - if (expr__parse(&ratio, &pctx, &p) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr) == 0) { char *unit; char metric_bf[64]; -- cgit v1.2.3 From d942815a76463fa53b81d3d1c064f76bb3f80ead Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:16 +0100 Subject: perf expr: Make expr__parse() return -1 on error To match the error value of the expr__find_other function, so all exported expr functions return the same values: 0 on success, -1 on error. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 4 ++-- tools/perf/util/expr.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 755d73c86c68..28313e59d6f6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -45,11 +45,11 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) p = "FOO/0"; ret = expr__parse(&val, &ctx, p); - TEST_ASSERT_VAL("division by zero", ret == 1); + TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; ret = expr__parse(&val, &ctx, p); - TEST_ASSERT_VAL("missing operand", ret == 1); + TEST_ASSERT_VAL("missing operand", ret == -1); TEST_ASSERT_VAL("find other", expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 45b25530db5b..fd192ddf93c1 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -54,7 +54,7 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE); + return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; } static bool -- cgit v1.2.3 From 3e152aa984ff4f639f7d2daf1ad10d408c0a3332 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:52 +0800 Subject: perf block-info: Fix wrong block address comparison in block_info__cmp() Commit 6041441870ab ("perf block: Cleanup and refactor block info functions") introduces block_info__cmp(), which compares two blocks. But the issues are: 1. It should return the strcmp cmp value only if it's not 0. 2. When symbol names are matched, we need to compare the addresses of blocks further. But it wrongly uses the symbol addresses for comparison. 3. If the syms are both NULL, we can't consider these two blocks are matched. This patch fixes above 3 issues. Fixes: 6041441870ab ("perf block: Cleanup and refactor block info functions") Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/block-info.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index fbbb6d640dad..5b4214656e29 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -74,30 +74,21 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, if (!bi_l->sym || !bi_r->sym) { if (!bi_l->sym && !bi_r->sym) - return 0; + return -1; else if (!bi_l->sym) return -1; else return 1; } - if (bi_l->sym == bi_r->sym) { - if (bi_l->start == bi_r->start) { - if (bi_l->end == bi_r->end) - return 0; - else - return (int64_t)(bi_r->end - bi_l->end); - } else - return (int64_t)(bi_r->start - bi_l->start); - } else { - cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + if (cmp) return cmp; - } - if (bi_l->sym->start != bi_r->sym->start) - return (int64_t)(bi_r->sym->start - bi_l->sym->start); + if (bi_l->start != bi_r->start) + return (int64_t)(bi_r->start - bi_l->start); - return (int64_t)(bi_r->sym->end - bi_l->sym->end); + return (int64_t)(bi_r->end - bi_l->end); } static void init_block_info(struct block_info *bi, struct symbol *sym, -- cgit v1.2.3 From a8a9f6dc0dbfc0f4f225987abec7eb688f4b2d7e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:53 +0800 Subject: perf diff: Use __block_info__cmp() to replace block_pair_cmp() 'perf diff' uses block_pair_cmp() to compare two blocks. But block_info__cmp() has the similar functionality and it's a bit more complete. This patch removes block_pair_cmp() and uses __block_info__cmp() instead. __block_info__cmp() is wrapped by block_info__cmp() and it doesn't receives a perf_hpp_fmt parameter. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 21 ++------------------- tools/perf/util/block-info.c | 9 +++++++-- tools/perf/util/block-info.h | 2 ++ 3 files changed, 11 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c03c36fde7e2..5e697cd2224a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -572,29 +572,12 @@ static void init_block_hist(struct block_hist *bh) bh->valid = true; } -static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) -{ - struct block_info *bi_a = a->block_info; - struct block_info *bi_b = b->block_info; - int cmp; - - if (!bi_a->sym || !bi_b->sym) - return -1; - - cmp = strcmp(bi_a->sym->name, bi_b->sym->name); - - if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) - return 0; - - return -1; -} - static struct hist_entry *get_block_pair(struct hist_entry *he, struct hists *hists_pair) { struct rb_root_cached *root = hists_pair->entries_in; struct rb_node *next = rb_first_cached(root); - int cmp; + int64_t cmp; while (next != NULL) { struct hist_entry *he_pair = rb_entry(next, struct hist_entry, @@ -602,7 +585,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he, next = rb_next(&he_pair->rb_node_in); - cmp = block_pair_cmp(he_pair, he); + cmp = __block_info__cmp(he_pair, he); if (!cmp) return he_pair; } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 5b4214656e29..25f6422c548b 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -65,8 +65,7 @@ struct block_info *block_info__new(void) return bi; } -int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, - struct hist_entry *left, struct hist_entry *right) +int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right) { struct block_info *bi_l = left->block_info; struct block_info *bi_r = right->block_info; @@ -91,6 +90,12 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, return (int64_t)(bi_r->end - bi_l->end); } +int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return __block_info__cmp(left, right); +} + static void init_block_info(struct block_info *bi, struct symbol *sym, struct cyc_hist *ch, int offset, u64 total_cycles) diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index bef0d75e9819..a0fa9fefeaf0 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -61,6 +61,8 @@ static inline void __block_info__zput(struct block_info **bi) #define block_info__zput(bi) __block_info__zput(&bi) +int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); + int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left, struct hist_entry *right); -- cgit v1.2.3 From cca0cc76f5f56dff2c8461b551a3e1fdabcd3fba Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:54 +0800 Subject: perf block-info: Allow selecting which columns to report and its order Currently we use a predefined array to set the block info output formats, it's fixed and inflexible. This patch adds two parameters "block_hpps" and "nr_hpps" in block_info__create_report and other static functions, in order to let user decide which columns to report and with specified report ordering. It should be more flexible. Buffers will be allocated to contain the new fmts, of course, we need to release them before perf exits. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 21 +++++++++++++++--- tools/perf/util/block-info.c | 51 ++++++++++++++++++++++++++++++-------------- tools/perf/util/block-info.h | 7 +++++- 3 files changed, 59 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 72a12b69f120..d7c905f7520f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -104,6 +104,7 @@ struct report { bool symbol_ipc; bool total_cycles_mode; struct block_report *block_reports; + int nr_block_reports; }; static int report__config(const char *var, const char *value, void *cb) @@ -966,8 +967,19 @@ static int __cmd_report(struct report *rep) report__output_resort(rep); if (rep->total_cycles_mode) { + int block_hpps[6] = { + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT, + PERF_HPP_REPORT__BLOCK_LBR_CYCLES, + PERF_HPP_REPORT__BLOCK_CYCLES_PCT, + PERF_HPP_REPORT__BLOCK_AVG_CYCLES, + PERF_HPP_REPORT__BLOCK_RANGE, + PERF_HPP_REPORT__BLOCK_DSO, + }; + rep->block_reports = block_info__create_report(session->evlist, - rep->total_cycles); + rep->total_cycles, + block_hpps, 6, + &rep->nr_block_reports); if (!rep->block_reports) return -1; } @@ -1551,8 +1563,11 @@ error: zfree(&report.ptime_range); } - if (report.block_reports) - zfree(&report.block_reports); + if (report.block_reports) { + block_info__free_report(report.block_reports, + report.nr_block_reports); + report.block_reports = NULL; + } zstd_fini(&(session->zstd_data)); perf_session__delete(session); diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 25f6422c548b..debb8b12cf51 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -373,33 +373,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, } static void register_block_columns(struct perf_hpp_list *hpp_list, - struct block_fmt *block_fmts) + struct block_fmt *block_fmts, + int *block_hpps, int nr_hpps) { - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) - hpp_register(&block_fmts[i], i, hpp_list); + for (int i = 0; i < nr_hpps; i++) + hpp_register(&block_fmts[i], block_hpps[i], hpp_list); } -static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts) +static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, + int *block_hpps, int nr_hpps) { __hists__init(&bh->block_hists, &bh->block_list); perf_hpp_list__init(&bh->block_list); bh->block_list.nr_header_lines = 1; - register_block_columns(&bh->block_list, block_fmts); + register_block_columns(&bh->block_list, block_fmts, + block_hpps, nr_hpps); - perf_hpp_list__register_sort_field(&bh->block_list, - &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt); + /* Sort by the first fmt */ + perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt); } -static void process_block_report(struct hists *hists, - struct block_report *block_report, - u64 total_cycles) +static int process_block_report(struct hists *hists, + struct block_report *block_report, + u64 total_cycles, int *block_hpps, + int nr_hpps) { struct rb_node *next = rb_first_cached(&hists->entries); struct block_hist *bh = &block_report->hist; struct hist_entry *he; - init_block_hist(bh, block_report->fmts); + if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX) + return -1; + + block_report->nr_fmts = nr_hpps; + init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps); while (next) { he = rb_entry(next, struct hist_entry, rb_node); @@ -408,16 +416,19 @@ static void process_block_report(struct hists *hists, next = rb_next(&he->rb_node); } - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) { + for (int i = 0; i < nr_hpps; i++) { block_report->fmts[i].total_cycles = total_cycles; block_report->fmts[i].block_cycles = block_report->cycles; } hists__output_resort(&bh->block_hists, NULL); + return 0; } struct block_report *block_info__create_report(struct evlist *evlist, - u64 total_cycles) + u64 total_cycles, + int *block_hpps, int nr_hpps, + int *nr_reps) { struct block_report *block_reports; int nr_hists = evlist->core.nr_entries, i = 0; @@ -430,13 +441,23 @@ struct block_report *block_info__create_report(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - process_block_report(hists, &block_reports[i], total_cycles); + process_block_report(hists, &block_reports[i], total_cycles, + block_hpps, nr_hpps); i++; } + *nr_reps = nr_hists; return block_reports; } +void block_info__free_report(struct block_report *reps, int nr_reps) +{ + for (int i = 0; i < nr_reps; i++) + hists__delete_entries(&reps[i].hist.block_hists); + + free(reps); +} + int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel, struct perf_env *env, struct annotation_options *annotation_opts) @@ -448,13 +469,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, symbol_conf.report_individual_block = true; hists__fprintf(&bh->block_hists, true, 0, 0, min_percent, stdout, true); - hists__delete_entries(&bh->block_hists); return 0; case 1: symbol_conf.report_individual_block = true; ret = block_hists_tui_browse(bh, evsel, min_percent, env, annotation_opts); - hists__delete_entries(&bh->block_hists); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index a0fa9fefeaf0..42e9dcc4cf0a 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -45,6 +45,7 @@ struct block_report { struct block_hist hist; u64 cycles; struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX]; + int nr_fmts; }; struct block_hist; @@ -70,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, u64 *block_cycles_aggr, u64 total_cycles); struct block_report *block_info__create_report(struct evlist *evlist, - u64 total_cycles); + u64 total_cycles, + int *block_hpps, int nr_hpps, + int *nr_reps); + +void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel, struct perf_env *env, -- cgit v1.2.3 From f787feff69c466dfc6f261c9632627e383b49187 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:55 +0800 Subject: perf block-info: Support color ops to print block percents in color It would be nice to print the block percents with colors. This patch supports the 'Sampled Cycles%' and 'Avg Cycles%' printed in colors. For example, perf record -b ... perf report --total-cycles or perf report --total-cycles --stdio percent > 5%, colored in red percent > 0.5%, colored in green percent < 0.5%, default color Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/block-info.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index debb8b12cf51..423ec69bda6c 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -181,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt, return block_fmt->width; } +static int color_pct(struct perf_hpp *hpp, int width, double pct) +{ +#ifdef HAVE_SLANG_SUPPORT + if (use_browser) { + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", + width - 1, pct); + } +#endif + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct); +} + static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) @@ -188,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); struct block_info *bi = he->block_info; double ratio = 0.0; - char buf[16]; if (block_fmt->total_cycles) ratio = (double)bi->cycles / (double)block_fmt->total_cycles; - sprintf(buf, "%.2f%%", 100.0 * ratio); - - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + return color_pct(hpp, block_fmt->width, 100.0 * ratio); } static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt, @@ -248,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct block_info *bi = he->block_info; double ratio = 0.0; u64 avg; - char buf[16]; if (block_fmt->block_cycles && bi->num_aggr) { avg = bi->cycles_aggr / bi->num_aggr; ratio = (double)avg / (double)block_fmt->block_cycles; } - sprintf(buf, "%.2f%%", 100.0 * ratio); - - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + return color_pct(hpp, block_fmt->width, 100.0 * ratio); } static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt, @@ -345,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, switch (idx) { case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT: - fmt->entry = block_total_cycles_pct_entry; + fmt->color = block_total_cycles_pct_entry; fmt->cmp = block_info__cmp; fmt->sort = block_total_cycles_pct_sort; break; @@ -353,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, fmt->entry = block_cycles_lbr_entry; break; case PERF_HPP_REPORT__BLOCK_CYCLES_PCT: - fmt->entry = block_cycles_pct_entry; + fmt->color = block_cycles_pct_entry; break; case PERF_HPP_REPORT__BLOCK_AVG_CYCLES: fmt->entry = block_avg_cycles_entry; -- cgit v1.2.3 From 8a4b910d005db03ce644e4657ae1fc512a67e596 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 6 Mar 2020 21:29:45 +0100 Subject: mptcp: selftests: add rcvbuf set option allows to run the tests with fixed receive buffer by passing "-R " to mptcp_connect.sh. While at it, add a default 10 second poll timeout so the "-t" becomes optional -- this makes mptcp_connect simpler to use during manual testing. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 45 +++++++++++++++++----- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 24 +++++++++--- 2 files changed, 54 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 99579c0223c1..702bab2c12da 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -34,8 +34,8 @@ extern int optind; #define TCP_ULP 31 #endif +static int poll_timeout = 10 * 1000; static bool listen_mode; -static int poll_timeout; enum cfg_mode { CFG_MODE_POLL, @@ -50,11 +50,20 @@ static int cfg_sock_proto = IPPROTO_MPTCP; static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; +static int cfg_rcvbuf; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]" - "[ -l ] [ -t timeout ] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" + "[-l] connect_address\n"); + fprintf(stderr, "\t-6 use ipv6\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-p num -- use port num\n"); + fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); + fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); + fprintf(stderr, "\t-u -- check mptcp ulp\n"); exit(1); } @@ -97,6 +106,17 @@ static void xgetaddrinfo(const char *node, const char *service, } } +static void set_rcvbuf(int fd, unsigned int size) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); + if (err) { + perror("set SO_RCVBUF"); + exit(1); + } +} + static void set_sndbuf(int fd, unsigned int size) { int err; @@ -704,6 +724,8 @@ int main_loop(void) check_getpeername_connect(fd); + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); @@ -745,7 +767,7 @@ int parse_mode(const char *mode) return 0; } -int parse_sndbuf(const char *size) +static int parse_int(const char *size) { unsigned long s; @@ -765,16 +787,14 @@ int parse_sndbuf(const char *size) die_usage(); } - cfg_sndbuf = s; - - return 0; + return (int)s; } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) { + while ((c = getopt(argc, argv, "6lp:s:hut:m:S:R:")) != -1) { switch (c) { case 'l': listen_mode = true; @@ -802,8 +822,11 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_mode = parse_mode(optarg); break; - case 'b': - cfg_sndbuf = parse_sndbuf(optarg); + case 'S': + cfg_sndbuf = parse_int(optarg); + break; + case 'R': + cfg_rcvbuf = parse_int(optarg); break; } } @@ -831,6 +854,8 @@ int main(int argc, char *argv[]) if (fd < 0) return 1; + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index d573a0feb98d..acf02e156d20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="b:d:e:l:r:h4cm:" +optstring="S:R:d:e:l:r:h4cm:" ret=0 sin="" sout="" @@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101)) tc_reorder="" testmode="" sndbuf=0 +rcvbuf=0 options_log=true if [ $tc_loss -eq 100 ];then @@ -39,7 +40,8 @@ usage() { echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" - echo -e "\t-b: set sndbuf value (default: use kernel default)" + echo -e "\t-S: set sndbuf value (default: use kernel default)" + echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" } @@ -73,11 +75,19 @@ while getopts "$optstring" option;do "c") capture=true ;; - "b") + "S") if [ $OPTARG -ge 0 ];then sndbuf="$OPTARG" else - echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2 + echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "R") + if [ $OPTARG -ge 0 ];then + rcvbuf="$OPTARG" + else + echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 exit 1 fi ;; @@ -342,8 +352,12 @@ do_transfer() port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) + if [ "$rcvbuf" -gt 0 ]; then + extra_args="$extra_args -R $rcvbuf" + fi + if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -b $sndbuf" + extra_args="$extra_args -S $sndbuf" fi if [ -n "$testmode" ]; then -- cgit v1.2.3 From e7950166e40271c025e0eec348cdf5c63ac734fa Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 10 Mar 2020 15:29:37 +0100 Subject: perf vendor events s390: Add new deflate counters for IBM z15 Add support for new deflate counters: - Counter 247: cycles CPU spent obtaining access to Deflate unit - Counter 252: cycles CPU is using Deflate unit - Counter 264: Increments by one for every DEFLATE CONVERSION CALL instruction executed. - Counter 265: Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2. Also adjust the some crypto counter description to latest documentation. Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20200310142937.32045-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/s390/cf_z15/crypto6.json | 8 +++--- .../perf/pmu-events/arch/s390/cf_z15/extended.json | 30 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index 5e36bc2468d0..c998e4f1d1d2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json @@ -4,27 +4,27 @@ "EventCode": "80", "EventName": "ECC_FUNCTION_COUNT", "BriefDescription": "ECC Function Count", - "PublicDescription": "Long ECC function Count" + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU." }, { "Unit": "CPU-M-CF", "EventCode": "81", "EventName": "ECC_CYCLES_COUNT", "BriefDescription": "ECC Cycles Count", - "PublicDescription": "Long ECC Function cycles count" + "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU." }, { "Unit": "CPU-M-CF", "EventCode": "82", "EventName": "ECC_BLOCKED_FUNCTION_COUNT", "BriefDescription": "Ecc Blocked Function Count", - "PublicDescription": "Long ECC blocked function count" + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU." }, { "Unit": "CPU-M-CF", "EventCode": "83", "EventName": "ECC_BLOCKED_CYCLES_COUNT", "BriefDescription": "ECC Blocked Cycles Count", - "PublicDescription": "Long ECC blocked cycles count" + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 89e070727e1b..2df2e231e9ee 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -25,7 +25,7 @@ "EventCode": "131", "EventName": "DTLB2_HPAGE_WRITES", "BriefDescription": "DTLB2 One-Megabyte Page Writes", - "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page" }, { "Unit": "CPU-M-CF", @@ -356,6 +356,34 @@ "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" }, + { + "Unit": "CPU-M-CF", + "EventCode": "247", + "EventName": "DFLT_ACCESS", + "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit", + "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "252", + "EventName": "DFLT_CYCLES", + "BriefDescription": "Cycles CPU is using Deflate unit", + "PublicDescription": "Cycles CPU is using Deflate unit" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "264", + "EventName": "DFLT_CC", + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "265", + "EventName": "DFLT_CCERROR", + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2" + }, { "Unit": "CPU-M-CF", "EventCode": "448", -- cgit v1.2.3 From 6ffe559a77d1c963a3567f7a39a5419bdcdc4f1c Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Tue, 10 Mar 2020 16:32:30 +0900 Subject: selftests/bpf: Add test for the packed enum member in struct/union Add a simple test to the existing selftest program in order to make sure that a packed enum member in struct unexceeds the struct_size. Signed-off-by: Yoshiki Komachi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1583825550-18606-3-git-send-email-komachi.yoshiki@gmail.com --- tools/testing/selftests/bpf/test_btf.c | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 93040ca83e60..8da77cda5f4a 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -1062,6 +1062,48 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Member exceeds struct_size", }, +/* Test member unexceeds the size of struct + * + * enum E { + * E0, + * E1, + * }; + * + * struct A { + * char m; + * enum E __attribute__((packed)) n; + * }; + */ +{ + .descr = "size check test #5", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* char */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), + /* enum E { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + /* } */ + /* struct A { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* char m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0E\0E0\0E1\0A\0m\0n", + .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check5_map", + .key_size = sizeof(int), + .value_size = 2, + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 4, +}, + /* typedef const void * const_void_ptr; * struct A { * const_void_ptr m; -- cgit v1.2.3 From 03fe02b113888576dc90c3e918d8e1a76b1ceb63 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:20 -0800 Subject: perf jevents: Support metric constraint A new field "MetricConstraint" is introduced in JSON event list. Extend jevents to parse the field and save the value in metric_constraint. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 19 +++++++++++++------ tools/perf/pmu-events/jevents.h | 2 +- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 079c77b6a2fd..6d0f61ffee42 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -323,7 +323,7 @@ static int print_events_table_entry(void *data, char *name, char *event, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated) + char *deprecated, char *metric_constraint) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -357,6 +357,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); if (deprecated) fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); + if (metric_constraint) + fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint); fprintf(outfp, "},\n"); return 0; @@ -375,6 +377,7 @@ struct event_struct { char *metric_name; char *metric_group; char *deprecated; + char *metric_constraint; }; #define ADD_EVENT_FIELD(field) do { if (field) { \ @@ -422,7 +425,7 @@ static int save_arch_std_events(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated) + char *deprecated, char *metric_constraint) { struct event_struct *es; @@ -486,7 +489,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, char **name, char **long_desc, char **pmu, char **filter, char **perpkg, char **unit, char **metric_expr, char **metric_name, char **metric_group, unsigned long long eventcode, - char **deprecated) + char **deprecated, char **metric_constraint) { /* try to find matching event from arch standard values */ struct event_struct *es; @@ -515,7 +518,7 @@ int json_events(const char *fn, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated), + char *deprecated, char *metric_constraint), void *data) { int err; @@ -545,6 +548,7 @@ int json_events(const char *fn, char *metric_name = NULL; char *metric_group = NULL; char *deprecated = NULL; + char *metric_constraint = NULL; char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; @@ -629,6 +633,8 @@ int json_events(const char *fn, addfield(map, &metric_name, "", "", val); } else if (json_streq(map, field, "MetricGroup")) { addfield(map, &metric_group, "", "", val); + } else if (json_streq(map, field, "MetricConstraint")) { + addfield(map, &metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) @@ -670,13 +676,13 @@ int json_events(const char *fn, &long_desc, &pmu, &filter, &perpkg, &unit, &metric_expr, &metric_name, &metric_group, eventcode, - &deprecated); + &deprecated, &metric_constraint); if (err) goto free_strings; } err = func(data, name, real_event(name, event), desc, long_desc, pmu, unit, perpkg, metric_expr, metric_name, - metric_group, deprecated); + metric_group, deprecated, metric_constraint); free_strings: free(event); free(desc); @@ -691,6 +697,7 @@ free_strings: free(metric_expr); free(metric_name); free(metric_group); + free(metric_constraint); free(arch_std); if (err) diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 5cda49a42143..2afc8304529e 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -8,7 +8,7 @@ int json_events(const char *fn, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated), + char *deprecated, char *metric_constraint), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index caeb577d36c9..53e76d5d5b37 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -18,6 +18,7 @@ struct pmu_event { const char *metric_name; const char *metric_group; const char *deprecated; + const char *metric_constraint; }; /* -- cgit v1.2.3 From f742634ab47f59160a85ddc502418556b21953c2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:21 -0800 Subject: perf metricgroup: Factor out metricgroup__add_metric_weak_group() Factor out metricgroup__add_metric_weak_group() which add metrics into a weak group. The change can improve code readability. Because following patch will introduce a function which add standalone metrics. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-3-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 57 +++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 02aee946b6c1..1cd042cb262e 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -399,13 +399,42 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } +static void metricgroup__add_metric_weak_group(struct strbuf *events, + const char **ids, + int idnum) +{ + bool no_group = false; + int i; + + for (i = 0; i < idnum; i++) { + pr_debug("found event %s\n", ids[i]); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(ids[i], "duration_time")) { + if (i > 0) + strbuf_addf(events, "}:W,"); + strbuf_addf(events, "duration_time"); + no_group = true; + continue; + } + strbuf_addf(events, "%s%s", + i == 0 || no_group ? "{" : ",", + ids[i]); + no_group = false; + } + if (!no_group) + strbuf_addf(events, "}:W"); +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; - int ret = -EINVAL; - int i, j; + int i, ret = -EINVAL; if (!map) return 0; @@ -422,7 +451,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, const char **ids; int idnum; struct egroup *eg; - bool no_group = false; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); @@ -431,27 +459,8 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, continue; if (events->len > 0) strbuf_addf(events, ","); - for (j = 0; j < idnum; j++) { - pr_debug("found event %s\n", ids[j]); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(ids[j], "duration_time")) { - if (j > 0) - strbuf_addf(events, "}:W,"); - strbuf_addf(events, "duration_time"); - no_group = true; - continue; - } - strbuf_addf(events, "%s%s", - j == 0 || no_group ? "{" : ",", - ids[j]); - no_group = false; - } - if (!no_group) - strbuf_addf(events, "}:W"); + + metricgroup__add_metric_weak_group(events, ids, idnum); eg = malloc(sizeof(struct egroup)); if (!eg) { -- cgit v1.2.3 From 2a14c1bf017f48a17c8c0ba26a22625363e77cc7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:22 -0800 Subject: perf util: Factor out sysctl__nmi_watchdog_enabled() The NMI watchdog status is required for metric group constraint examination. Factor out sysctl__nmi_watchdog_enabled() to retrieve the NMI watchdog status. Users may count more than one metric group each time. If so, the NMI watchdog status may be retrieved several times. To reduce the overhead, cache the NMI watchdog status. Replace the NMI watchdog status checking in print_footer() by sysctl__nmi_watchdog_enabled(). Suggested-by: Andi Kleen Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-4-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 6 ++---- tools/perf/util/util.c | 18 ++++++++++++++++++ tools/perf/util/util.h | 2 ++ 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d89cb0da90f8..76c6052b12e2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -16,6 +16,7 @@ #include #include "cgroup.h" #include +#include "util.h" #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" @@ -1097,7 +1098,6 @@ static void print_footer(struct perf_stat_config *config) { double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; - int n; if (!config->null_run) fprintf(output, "\n"); @@ -1131,9 +1131,7 @@ static void print_footer(struct perf_stat_config *config) } fprintf(output, "\n\n"); - if (config->print_free_counters_hint && - sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && - n > 0) + if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled()) fprintf(output, "Some events weren't counted. Try disabling the NMI watchdog:\n" " echo 0 > /proc/sys/kernel/nmi_watchdog\n" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 969ae560dad9..d707c9624dd9 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -55,6 +55,24 @@ int sysctl__max_stack(void) return sysctl_perf_event_max_stack; } +bool sysctl__nmi_watchdog_enabled(void) +{ + static bool cached; + static bool nmi_watchdog; + int value; + + if (cached) + return nmi_watchdog; + + if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0) + return false; + + nmi_watchdog = (value > 0) ? true : false; + cached = true; + + return nmi_watchdog; +} + bool test_attr__enabled; bool perf_host = true; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9969b8b46f7c..f486fdd3a538 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -29,6 +29,8 @@ size_t hex_width(u64 v); int sysctl__max_stack(void); +bool sysctl__nmi_watchdog_enabled(void); + int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) -- cgit v1.2.3 From ab483d8bc8acb83f0103bc38ef8f2c27d98ffd1b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:23 -0800 Subject: perf metricgroup: Support metric constraint Some metric groups have metric constraints. A metric group can be scheduled as a group only when some constraints are applied. For example, Page_Walks_Utilization has a metric constraint, "NO_NMI_WATCHDOG". When NMI watchdog is disabled, the metric group can be scheduled as a group. Otherwise, splitting the metric group into standalone metrics. Add a new function, metricgroup__has_constraint(), to check whether all constraints are applied. If not, splitting the metric group into standalone metrics. Currently, only one constraint, "NO_NMI_WATCHDOG", is checked. Print a warning for the metric group with the constraint, when NMI WATCHDOG is enabled. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-5-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 54 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 1cd042cb262e..c3a8c701609a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include "util.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -429,6 +431,49 @@ static void metricgroup__add_metric_weak_group(struct strbuf *events, strbuf_addf(events, "}:W"); } +static void metricgroup__add_metric_non_group(struct strbuf *events, + const char **ids, + int idnum) +{ + int i; + + for (i = 0; i < idnum; i++) + strbuf_addf(events, ",%s", ids[i]); +} + +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; + + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } + + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(struct pmu_event *pe) +{ + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { @@ -460,7 +505,10 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, if (events->len > 0) strbuf_addf(events, ","); - metricgroup__add_metric_weak_group(events, ids, idnum); + if (metricgroup__has_constraint(pe)) + metricgroup__add_metric_non_group(events, ids, idnum); + else + metricgroup__add_metric_weak_group(events, ids, idnum); eg = malloc(sizeof(struct egroup)); if (!eg) { @@ -502,6 +550,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, } } free(nlist); + + if (!ret) + metricgroup___watchdog_constraint_hint(NULL, true); + return ret; } -- cgit v1.2.3 From b95fcd2c1c25bd14f55d5d6ab268b3ab00b8a774 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:24 -0800 Subject: perf vendor events intel: Add NO_NMI_WATCHDOG metric constraint Add NO_NMI_WATCHDOG metric constraint to Page_Walks_Utilization for Sky Lake and Cascade Lake. Committer testing: On a Lenovo T480S, Intel(R) Core(TM) i7-8650U Kaby Lake, that looking at x86's mapfile.csv file is a: $ grep -w skylake tools/perf/pmu-events/arch/x86/mapfile.csv GenuineIntel-6-[4589]E,v24,skylake,core $ So uses the constraint added in this patch in this file: tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json Before: # perf stat -a -M Page_Walks_Utilization sleep 2 Performance counter stats for 'system wide': itlb_misses.walk_pending (0.00%) dtlb_load_misses.walk_pending (0.00%) dtlb_store_misses.walk_pending (0.00%) ept.walk_pending (0.00%) cycles (0.00%) 2.001750514 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog The events in group usually have to be from the same PMU. Try reorganizing the group. # After: # perf stat -a -M Page_Walks_Utilization sleep 2 Splitting metric group Page_Walks_Utilization into standalone metrics. Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog , Performance counter stats for 'system wide': 36,883,102 itlb_misses.walk_pending # 0.1 Page_Walks_Utilization (79.99%) 123,104,146 dtlb_load_misses.walk_pending (80.02%) 13,720,795 dtlb_store_misses.walk_pending (79.99%) 0 ept.walk_pending (79.99%) 1,519,948,400 cycles (80.01%) 2.002170780 seconds time elapsed # Before and after, if we disable the nmi_watchdog we get: # echo 0 > /proc/sys/kernel/nmi_watchdog # perf stat -a -M Page_Walks_Utilization sleep 2 Performance counter stats for 'system wide': 33,721,658 itlb_misses.walk_pending # 0.1 Page_Walks_Utilization 84,070,996 dtlb_load_misses.walk_pending 9,816,071 dtlb_store_misses.walk_pending 0 ept.walk_pending 704,920,899 cycles 2.002331670 seconds time elapsed # More information about the metric expressions: # perf stat -v -a -M Page_Walks_Utilization sleep 2 Using CPUID GenuineIntel-6-8E-A metric expr ( itlb_misses.walk_pending + dtlb_load_misses.walk_pending + dtlb_store_misses.walk_pending + ept.walk_pending ) / ( 2 * cycles ) for Page_Walks_Utilization found event itlb_misses.walk_pending found event dtlb_load_misses.walk_pending found event dtlb_store_misses.walk_pending found event ept.walk_pending found event cycles adding {itlb_misses.walk_pending,dtlb_load_misses.walk_pending,dtlb_store_misses.walk_pending,ept.walk_pending,cycles}:W -> cpu/umask=0x10,(null)=0x186a3,event=0x85/ -> cpu/umask=0x10,(null)=0x1e8483,event=0x8/ -> cpu/umask=0x10,(null)=0x1e8483,event=0x49/ -> cpu/umask=0x10,(null)=0x1e8483,event=0x4f/ itlb_misses.walk_pending: 8085772 16010162799 16010162799 dtlb_load_misses.walk_pending: 28134579 16010162799 16010162799 dtlb_store_misses.walk_pending: 7276535 16010162799 16010162799 ept.walk_pending: 2 16010162799 16010162799 cycles: 315140605 16010162799 16010162799 Performance counter stats for 'system wide': 8,085,772 itlb_misses.walk_pending # 0.1 Page_Walks_Utilization 28,134,579 dtlb_load_misses.walk_pending 7,276,535 dtlb_store_misses.walk_pending 2 ept.walk_pending 315,140,605 cycles 2.002333181 seconds time elapsed # Signed-off-by: Kan Liang Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-6-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 3 ++- tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json | 3 ++- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index f94653229dd4..a728c6e5119b 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index e7feb60f9fa9..f97e8316ad2f 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 21d7a0c2c2e8..35f5db1786f7 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", -- cgit v1.2.3 From b8fdcfb5a17f1d4fd503caef5c457005a765ecd7 Mon Sep 17 00:00:00 2001 From: disconnect3d Date: Mon, 9 Mar 2020 11:48:53 +0100 Subject: perf map: Fix off by one in strncpy() size argument This patch fixes an off-by-one error in strncpy size argument in tools/perf/util/map.c. The issue is that in: strncmp(filename, "/system/lib/", 11) the passed string literal: "/system/lib/" has 12 bytes (without the NULL byte) and the passed size argument is 11. As a result, the logic won't match the ending "/" byte and will pass filepaths that are stored in other directories e.g. "/system/libmalicious/bin" or just "/system/libmalicious". This functionality seems to be present only on Android. I assume the /system/ directory is only writable by the root user, so I don't think this bug has much (or any) security impact. Fixes: eca818369996 ("perf tools: Add automatic remapping of Android libraries") Signed-off-by: disconnect3d Cc: Alexander Shishkin Cc: Changbin Du Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Michael Lentine Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200309104855.3775-1-dominik.b.czarnota@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 95428511300d..b342f744b1fc 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (!strncmp(filename, "/system/lib/", 12)) { char *ndk, *app; const char *arch; size_t ndk_length; -- cgit v1.2.3 From bdadd647cbf7b6e7f5d5891bd4e711292793cf23 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Mar 2020 16:53:41 -0300 Subject: perf map: Use strstarts() to look for Android libraries And add the '/' to avoid looking at things like "/system/libsomething", when all we want to know if it is like "/system/lib/something", i.e. if it is in that system library dir. Using strstarts() avoids off-by-one errors like recently fixed in this file. Since this adds the '/' I separated this patch, another patch will make this consistent by removing other strncmp(str, prefix, manually calculated prefix length) usage. Reported-by: Dominik Czarnota Acked-by: Dominik Czarnota Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: Link: http://lore.kernel.org/lkml/CABEVAa0_q-uC0vrrqpkqRHy_9RLOSXOJxizMLm1n5faHRy2AeA@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b342f744b1fc..53d96611e6a6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename) static inline int is_android_lib(const char *filename) { - return !strncmp(filename, "/data/app-lib", 13) || - !strncmp(filename, "/system/lib", 11); + return strstarts(filename, "/data/app-lib/") || + strstarts(filename, "/system/lib/"); } static inline bool replace_android_lib(const char *filename, char *newfilename) @@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) app_abi_length = strlen(app_abi); - if (!strncmp(filename, "/data/app-lib", 13)) { + if (strstarts(filename, "/data/app-lib/")) { char *apk_path; if (!app_abi_length) @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 12)) { + if (strstarts(filename, "/system/lib/")) { char *ndk, *app; const char *arch; size_t ndk_length; -- cgit v1.2.3 From d01751563caf0dec7be36f81de77cc0197b77e59 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:07 +0800 Subject: perf cs-etm: Swap packets for instruction samples If use option '--itrace=iNNN' with Arm CoreSight trace data, perf tool fails inject instruction samples; the root cause is the packets are only swapped for branch samples and last branches but not for instruction samples, so the new coming packets cannot be properly handled for only synthesizing instruction samples. To fix this issue, this patch refactors the code with a new function cs_etm__packet_swap() which is used to swap packets and adds the condition for instruction samples. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index b3b3fe3ea345..294b09cfb034 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -363,6 +363,23 @@ struct cs_etm_packet_queue return NULL; } +static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, + struct cs_etm_traceid_queue *tidq) +{ + struct cs_etm_packet *tmp; + + if (etm->sample_branches || etm->synth_opts.last_branch || + etm->sample_instructions) { + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; + } +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -1342,7 +1359,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) { struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; int ret; u8 trace_chan_id = tidq->trace_chan_id; u64 instrs_executed = tidq->packet->instr_count; @@ -1406,15 +1422,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); return 0; } @@ -1443,7 +1451,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, { int err = 0; struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; /* Handle start tracing packet */ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) @@ -1478,15 +1485,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } swap_packet: - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); return err; } -- cgit v1.2.3 From f1410028c762893daf353765112cf6797e4442fa Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:08 +0800 Subject: perf cs-etm: Continuously record last branch Every time synthesize instruction sample, the last branch recording will be reset. This is fine if the instruction period is big enough, for example if use the option '--itrace=i100000', the last branch array is reset for every sample with 100000 instructions per period; before generate the next instruction sample, there has the sufficient packets coming to fill the last branch array. On the other hand, if set a very small period, the packets will be significantly reduced between two continuous instruction samples, thus the last branch array is almost empty for new instruction sample by frequently resetting. To allow the last branches to work properly for any instruction periods, this patch avoids to reset the last branch for every instruction sample and only reset it when flush the trace data. The last branches will be reset only for two cases, one is for trace starting, another case is for discontinuous trace; other cases can keep recording last branches for continuous instruction samples. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 294b09cfb034..2c4156c5ed09 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1170,9 +1170,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); - if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(tidq); - return ret; } @@ -1487,6 +1484,10 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, swap_packet: cs_etm__packet_swap(etm, tidq); + /* Reset last branches after flush the trace */ + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(tidq); + return err; } -- cgit v1.2.3 From c9f5baa136777b2c982f6f7a90c9da69a88be148 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:09 +0800 Subject: perf cs-etm: Correct synthesizing instruction samples When 'etm->instructions_sample_period' is less than 'tidq->period_instructions', the function cs_etm__sample() cannot handle this case properly with its logic. Let's see below flow as an example: - If we set itrace option '--itrace=i4', then function cs_etm__sample() has variables with initialized values: tidq->period_instructions = 0 etm->instructions_sample_period = 4 - When the first packet is coming: packet->instr_count = 10; the number of instructions executed in this packet is 10, thus update period_instructions as below: tidq->period_instructions = 0 + 10 = 10 instrs_over = 10 - 4 = 6 offset = 10 - 6 - 1 = 3 tidq->period_instructions = instrs_over = 6 - When the second packet is coming: packet->instr_count = 10; in the second pass, assume 10 instructions in the trace sample again: tidq->period_instructions = 6 + 10 = 16 instrs_over = 16 - 4 = 12 offset = 10 - 12 - 1 = -3 -> the negative value tidq->period_instructions = instrs_over = 12 So after handle these two packets, there have below issues: The first issue is that cs_etm__instr_addr() returns the address within the current trace sample of the instruction related to offset, so the offset is supposed to be always unsigned value. But in fact, function cs_etm__sample() might calculate a negative offset value (in handling the second packet, the offset is -3) and pass to cs_etm__instr_addr() with u64 type with a big positive integer. The second issue is it only synthesizes 2 samples for sample period = 4. In theory, every packet has 10 instructions so the two packets have total 20 instructions, 20 instructions should generate 5 samples (4 x 5 = 20). This is because cs_etm__sample() only calls once cs_etm__synth_instruction_sample() to generate instruction sample per range packet. This patch fixes the logic in function cs_etm__sample(); the basic idea for handling coming packet is: - To synthesize the first instruction sample, it combines the left instructions from the previous packet and the head of the new packet; then generate continuous samples with sample period; - At the tail of the new packet, if it has the rest instructions, these instructions will be left for the sequential sample. Suggested-by: Mike Leach Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 87 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2c4156c5ed09..1ddcc67e13dd 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1358,9 +1358,12 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_auxtrace *etm = etmq->etm; int ret; u8 trace_chan_id = tidq->trace_chan_id; - u64 instrs_executed = tidq->packet->instr_count; + u64 instrs_prev; - tidq->period_instructions += instrs_executed; + /* Get instructions remainder from previous packet */ + instrs_prev = tidq->period_instructions; + + tidq->period_instructions += tidq->packet->instr_count; /* * Record a branch when the last instruction in @@ -1378,26 +1381,76 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, * TODO: allow period to be defined in cycles and clock time */ - /* Get number of instructions executed after the sample point */ - u64 instrs_over = tidq->period_instructions - - etm->instructions_sample_period; + /* + * Below diagram demonstrates the instruction samples + * generation flows: + * + * Instrs Instrs Instrs Instrs + * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) + * | | | | + * V V V V + * -------------------------------------------------- + * ^ ^ + * | | + * Period Period + * instructions(Pi) instructions(Pi') + * + * | | + * \---------------- -----------------/ + * V + * tidq->packet->instr_count + * + * Instrs Sample(n...) are the synthesised samples occurring + * every etm->instructions_sample_period instructions - as + * defined on the perf command line. Sample(n) is being the + * last sample before the current etm packet, n+1 to n+3 + * samples are generated from the current etm packet. + * + * tidq->packet->instr_count represents the number of + * instructions in the current etm packet. + * + * Period instructions (Pi) contains the the number of + * instructions executed after the sample point(n) from the + * previous etm packet. This will always be less than + * etm->instructions_sample_period. + * + * When generate new samples, it combines with two parts + * instructions, one is the tail of the old packet and another + * is the head of the new coming packet, to generate + * sample(n+1); sample(n+2) and sample(n+3) consume the + * instructions with sample period. After sample(n+3), the rest + * instructions will be used by later packet and it is assigned + * to tidq->period_instructions for next round calculation. + */ /* - * Calculate the address of the sampled instruction (-1 as - * sample is reported as though instruction has just been - * executed, but PC has not advanced to next instruction) + * Get the initial offset into the current packet instructions; + * entry conditions ensure that instrs_prev is less than + * etm->instructions_sample_period. */ - u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, - tidq->packet, offset); + u64 offset = etm->instructions_sample_period - instrs_prev; + u64 addr; - ret = cs_etm__synth_instruction_sample( - etmq, tidq, addr, etm->instructions_sample_period); - if (ret) - return ret; + while (tidq->period_instructions >= + etm->instructions_sample_period) { + /* + * Calculate the address of the sampled instruction (-1 + * as sample is reported as though instruction has just + * been executed, but PC has not advanced to next + * instruction) + */ + addr = cs_etm__instr_addr(etmq, trace_chan_id, + tidq->packet, offset - 1); + ret = cs_etm__synth_instruction_sample( + etmq, tidq, addr, + etm->instructions_sample_period); + if (ret) + return ret; - /* Carry remaining instructions into next sample period */ - tidq->period_instructions = instrs_over; + offset += etm->instructions_sample_period; + tidq->period_instructions -= + etm->instructions_sample_period; + } } if (etm->sample_branches) { -- cgit v1.2.3 From 695378b567df1fe6631c6684fcc9eeb4257df70f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:10 +0800 Subject: perf cs-etm: Optimize copying last branches If an instruction range packet can generate multiple instruction samples, these samples share the same last branches; it's not necessary to copy the same last branches repeatedly for these samples within the same packet. This patch moves out the last branches copying from function cs_etm__synth_instruction_sample(), and execute it prior to generating instruction samples. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1ddcc67e13dd..87d9943177bc 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1151,10 +1151,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); - if (etm->synth_opts.last_branch) { - cs_etm__copy_last_branch_rb(etmq, tidq); + if (etm->synth_opts.last_branch) sample.branch_stack = tidq->last_branch; - } if (etm->synth_opts.inject) { ret = cs_etm__inject_event(event, &sample, @@ -1431,6 +1429,10 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, u64 offset = etm->instructions_sample_period - instrs_prev; u64 addr; + /* Prepare last branches for instruction sample */ + if (etm->synth_opts.last_branch) + cs_etm__copy_last_branch_rb(etmq, tidq); + while (tidq->period_instructions >= etm->instructions_sample_period) { /* @@ -1508,6 +1510,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, if (etmq->etm->synth_opts.last_branch && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1515,7 +1522,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, * Use the address of the end of the last reported execution * range */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, @@ -1560,11 +1567,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, */ if (etmq->etm->synth_opts.last_branch && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Use the address of the end of the last reported execution * range. */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, -- cgit v1.2.3 From bc010dd657ee0309276c88ab828b9ad156f75b31 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:11 +0800 Subject: perf cs-etm: Fix unsigned variable comparison to zero The variable 'offset' in function cs_etm__sample() is u64 type, it's not appropriate to check it with 'while (offset > 0)'; this patch changes to 'while (offset)'. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 87d9943177bc..62d2f9b9ce1b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -962,7 +962,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, if (packet->isa == CS_ETM_ISA_T32) { u64 addr = packet->start_addr; - while (offset > 0) { + while (offset) { addr += cs_etm__t32_instr_size(etmq, trace_chan_id, addr); offset--; -- cgit v1.2.3 From 0c2d041232411c8124136c9497c0e352dcf18baa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 10 Mar 2020 22:21:10 -0700 Subject: perf doc: Set man page date to last git commit Currently the man page dates reflect the date the man pages were built. This patch adjusts the date so that the date is when then man page last had a commit against it. The date is generated using 'git log'. Committer testing: $ git log -1 --pretty="format:%cd" --date=short tools/perf/Documentation/perf-top.txt 2020-01-14 Before: rm -rf /tmp/build/perf mkdir -p /tmp/build/perf make -C tools/perf O=/tmp/build/perf/ install $ date Wed 11 Mar 2020 10:21:19 AM -03 $ man perf-top | tail -1 perf 03/11/2020 PERF-TOP(1) $ After: rm -rf /tmp/build/perf mkdir -p /tmp/build/perf make -C tools/perf O=/tmp/build/perf/ install $ date $ date Wed 11 Mar 2020 10:24:06 AM -03 $ man perf-top | tail -1 perf 2020-01-14 PERF-TOP(1) $ Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Mark Rutland Cc: Masanari Iida Cc: Mukesh Ojha Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20200311052110.23132-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index adc5a7e44b98..31824d5269cc 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -295,7 +295,10 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ + -aperf_date=$(shell git log -1 --pretty="format:%cd" \ + --date=short $<) \ + -o $@+ $< && \ mv $@+ $@ XSLT = docbook.xsl -- cgit v1.2.3 From 97256d1a2a62390077bc72009628af5be44fd8a9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Mar 2020 14:20:32 +0200 Subject: perf intel-pt: Rename intel-pt.txt and put it in man page format Make the Intel PT documentation into a man page. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200311122034.3697-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 991 --------------------------- tools/perf/Documentation/perf-intel-pt.txt | 1000 ++++++++++++++++++++++++++++ 2 files changed, 1000 insertions(+), 991 deletions(-) delete mode 100644 tools/perf/Documentation/intel-pt.txt create mode 100644 tools/perf/Documentation/perf-intel-pt.txt (limited to 'tools') diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt deleted file mode 100644 index 2cf2d9e9d0da..000000000000 --- a/tools/perf/Documentation/intel-pt.txt +++ /dev/null @@ -1,991 +0,0 @@ -Intel Processor Trace -===================== - -Overview -======== - -Intel Processor Trace (Intel PT) is an extension of Intel Architecture that -collects information about software execution such as control flow, execution -modes and timings and formats it into highly compressed binary packets. -Technical details are documented in the Intel 64 and IA-32 Architectures -Software Developer Manuals, Chapter 36 Intel Processor Trace. - -Intel PT is first supported in Intel Core M and 5th generation Intel Core -processors that are based on the Intel micro-architecture code name Broadwell. - -Trace data is collected by 'perf record' and stored within the perf.data file. -See below for options to 'perf record'. - -Trace data must be 'decoded' which involves walking the object code and matching -the trace data packets. For example a TNT packet only tells whether a -conditional branch was taken or not taken, so to make use of that packet the -decoder must know precisely which instruction was being executed. - -Decoding is done on-the-fly. The decoder outputs samples in the same format as -samples output by perf hardware events, for example as though the "instructions" -or "branches" events had been recorded. Presently 3 tools support this: -'perf script', 'perf report' and 'perf inject'. See below for more information -on using those tools. - -The main distinguishing feature of Intel PT is that the decoder can determine -the exact flow of software execution. Intel PT can be used to understand why -and how did software get to a certain point, or behave a certain way. The -software does not have to be recompiled, so Intel PT works with debug or release -builds, however the executed images are needed - which makes use in JIT-compiled -environments, or with self-modified code, a challenge. Also symbols need to be -provided to make sense of addresses. - -A limitation of Intel PT is that it produces huge amounts of trace data -(hundreds of megabytes per second per core) which takes a long time to decode, -for example two or three orders of magnitude longer than it took to collect. -Another limitation is the performance impact of tracing, something that will -vary depending on the use-case and architecture. - - -Quickstart -========== - -It is important to start small. That is because it is easy to capture vastly -more data than can possibly be processed. - -The simplest thing to do with Intel PT is userspace profiling of small programs. -Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: - - perf record -e intel_pt//u ls - -And profiled with 'perf report' e.g. - - perf report - -To also trace kernel space presents a problem, namely kernel self-modifying -code. A fairly good kernel image is available in /proc/kcore but to get an -accurate image a copy of /proc/kcore needs to be made under the same conditions -as the data capture. A script perf-with-kcore can do that, but beware that the -script makes use of 'sudo' to copy /proc/kcore. If you have perf installed -locally from the source tree you can do: - - ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls - -which will create a directory named 'pt_ls' and put the perf.data file and -copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use -'perf report' becomes: - - ~/libexec/perf-core/perf-with-kcore report pt_ls - -Because samples are synthesized after-the-fact, the sampling period can be -selected for reporting. e.g. sample every microsecond - - ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge - -See the sections below for more information about the --itrace option. - -Beware the smaller the period, the more samples that are produced, and the -longer it takes to process them. - -Also note that the coarseness of Intel PT timing information will start to -distort the statistical value of the sampling as the sampling period becomes -smaller. - -To represent software control flow, "branches" samples are produced. By default -a branch sample is synthesized for every single branch. To get an idea what -data is available you can use the 'perf script' tool with all itrace sampling -options, which will list all the samples. - - perf record -e intel_pt//u ls - perf script --itrace=ibxwpe - -An interesting field that is not printed by default is 'flags' which can be -displayed as follows: - - perf script --itrace=ibxwpe -F+flags - -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. - -Another interesting field that is not printed by default is 'ipc' which can be -displayed as follows: - - perf script --itrace=be -F+ipc - -There are two ways that instructions-per-cycle (IPC) can be calculated depending -on the recording. - -If the 'cyc' config term (see config terms section below) was used, then IPC is -calculated using the cycle count from CYC packets, otherwise MTC packets are -used - refer to the 'mtc' config term. When MTC is used, however, the values -are less accurate because the timing is less accurate. - -Because Intel PT does not update the cycle count on every branch or instruction, -the values will often be zero. When there are values, they will be the number -of instructions and number of cycles since the last update, and thus represent -the average IPC since the last IPC for that event type. Note IPC for "branches" -events is calculated separately from IPC for "instructions" events. - -Also note that the IPC instruction count may or may not include the current -instruction. If the cycle count is associated with an asynchronous branch -(e.g. page fault or interrupt), then the instruction count does not include the -current instruction, otherwise it does. That is consistent with whether or not -that instruction has retired when the cycle count is updated. - -Another note, in the case of "branches" events, non-taken branches are not -presently sampled, so IPC values for them do not appear e.g. a CYC packet with a -TNT packet that starts with a non-taken branch. To see every possible IPC -value, "instructions" events can be used e.g. --itrace=i0ns - -While it is possible to create scripts to analyze the data, an alternative -approach is available to export the data to a sqlite or postgresql database. -Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script exported-sql-viewer.py for an example of using the database. - -There is also script intel-pt-events.py which provides an example of how to -unpack the raw data for power events and PTWRITE. - -As mentioned above, it is easy to capture too much data. One way to limit the -data captured is to use 'snapshot' mode which is explained further below. -Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. - -Another problem that will be experienced is decoder errors. They can be caused -by inability to access the executed image, self-modified or JIT-ed code, or the -inability to match side-band information (such as context switches and mmaps) -which results in the decoder not knowing what code was executed. - -There is also the problem of perf not being able to copy the data fast enough, -resulting in data lost because the buffer was full. See 'Buffer handling' below -for more details. - - -perf record -=========== - -new event ---------- - -The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are -selected by providing the PMU name followed by the "config" separated by slashes. -An enhancement has been made to allow default "config" e.g. the option - - -e intel_pt// - -will use a default config value. Currently that is the same as - - -e intel_pt/tsc,noretcomp=0/ - -which is the same as - - -e intel_pt/tsc=1,noretcomp=0/ - -Note there are now new config terms - see section 'config terms' further below. - -The config terms are listed in /sys/devices/intel_pt/format. They are bit -fields within the config member of the struct perf_event_attr which is -passed to the kernel by the perf_event_open system call. They correspond to bit -fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: - - $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* - /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 - /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 - /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 - /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 - /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 - /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 - /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 - -Note that the default config must be overridden for each term i.e. - - -e intel_pt/noretcomp=0/ - -is the same as: - - -e intel_pt/tsc=1,noretcomp=0/ - -So, to disable TSC packets use: - - -e intel_pt/tsc=0/ - -It is also possible to specify the config value explicitly: - - -e intel_pt/config=0x400/ - -Note that, as with all events, the event is suffixed with event modifiers: - - u userspace - k kernel - h hypervisor - G guest - H host - p precise ip - -'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. -'p' is also not relevant to Intel PT. So only options 'u' and 'k' are -meaningful for Intel PT. - -perf_event_attr is displayed if the -vv option is used e.g. - - ------------------------------------------------------------ - perf_event_attr: - type 6 - size 112 - config 0x400 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - enable_on_exec 1 - sample_id_all 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - - -config terms ------------- - -The June 2015 version of Intel 64 and IA-32 Architectures Software Developer -Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. -Some of the features are reflect in new config terms. All the config terms are -described below. - -tsc Always supported. Produces TSC timestamp packets to provide - timing information. In some cases it is possible to decode - without timing information, for example a per-thread context - that does not overlap executable memory maps. - - The default config selects tsc (i.e. tsc=1). - -noretcomp Always supported. Disables "return compression" so a TIP packet - is produced when a function returns. Causes more packets to be - produced but might make decoding more reliable. - - The default config does not select noretcomp (i.e. noretcomp=0). - -psb_period Allows the frequency of PSB packets to be specified. - - The PSB packet is a synchronization packet that provides a - starting point for decoding or recovery from errors. - - Support for psb_period is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc - - which contains "1" if the feature is supported and "0" - otherwise. - - Valid values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_periods - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The psb_period value is converted to the approximate number of - trace bytes between PSB packets as: - - 2 ^ (value + 11) - - e.g. value 3 means 16KiB bytes between PSBs - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/psb_period=15/u uname - Invalid psb_period for intel_pt. Valid values are: 0-5 - - If MTC packets are selected, the default config selects a value - of 3 (i.e. psb_period=3) or the nearest lower value that is - supported (0 is always supported). Otherwise the default is 0. - - If decoding is expected to be reliable and the buffer is large - then a large PSB period can be used. - - Because a TSC packet is produced with PSB, the PSB period can - also affect the granularity to timing information in the absence - of MTC or CYC. - -mtc Produces MTC timing packets. - - MTC packets provide finer grain timestamp information than TSC - packets. MTC packets record time using the hardware crystal - clock (CTC) which is related to TSC packets using a TMA packet. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/mtc - - which contains "1" if the feature is supported and - "0" otherwise. - - The frequency of MTC packets can also be specified - see - mtc_period below. - -mtc_period Specifies how frequently MTC packets are produced - see mtc - above for how to determine if MTC packets are supported. - - Valid values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/mtc_periods - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The mtc_period value is converted to the MTC frequency as: - - CTC-frequency / (2 ^ value) - - e.g. value 3 means one eighth of CTC-frequency - - Where CTC is the hardware crystal clock, the frequency of which - can be related to TSC via values provided in cpuid leaf 0x15. - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/mtc_period=15/u uname - Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 - - The default value is 3 or the nearest lower value - that is supported (0 is always supported). - -cyc Produces CYC timing packets. - - CYC packets provide even finer grain timestamp information than - MTC and TSC packets. A CYC packet contains the number of CPU - cycles since the last CYC packet. Unlike MTC and TSC packets, - CYC packets are only sent when another packet is also sent. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc - - which contains "1" if the feature is supported and - "0" otherwise. - - The number of CYC packets produced can be reduced by specifying - a threshold - see cyc_thresh below. - -cyc_thresh Specifies how frequently CYC packets are produced - see cyc - above for how to determine if CYC packets are supported. - - Valid cyc_thresh values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The cyc_thresh value represents the minimum number of CPU cycles - that must have passed before a CYC packet can be sent. The - number of CPU cycles is: - - 2 ^ (value - 1) - - e.g. value 4 means 8 CPU cycles must pass before a CYC packet - can be sent. Note a CYC packet is still only sent when another - packet is sent, not at, e.g. every 8 CPU cycles. - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname - Invalid cyc_thresh for intel_pt. Valid values are: 0-12 - - CYC packets are not requested by default. - -pt Specifies pass-through which enables the 'branch' config term. - - The default config selects 'pt' if it is available, so a user will - never need to specify this term. - -branch Enable branch tracing. Branch tracing is enabled by default so to - disable branch tracing use 'branch=0'. - - The default config selects 'branch' if it is available. - -ptw Enable PTWRITE packets which are produced when a ptwrite instruction - is executed. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/ptwrite - - which contains "1" if the feature is supported and - "0" otherwise. - -fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet - provides the address of the ptwrite instruction. In the absence of - fup_on_ptw, the decoder will use the address of the previous branch - if branch tracing is enabled, otherwise the address will be zero. - Note that fup_on_ptw will work even when branch tracing is disabled. - -pwr_evt Enable power events. The power events provide information about - changes to the CPU C-state. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/power_event_trace - - which contains "1" if the feature is supported and - "0" otherwise. - - -AUX area sampling option ------------------------- - -To select Intel PT "sampling" the AUX area sampling option can be used: - - --aux-sample - -Optionally it can be followed by the sample size in bytes e.g. - - --aux-sample=8192 - -In addition, the Intel PT event to sample must be defined e.g. - - -e intel_pt//u - -Samples on other events will be created containing Intel PT data e.g. the -following will create Intel PT samples on the branch-misses event, note the -events must be grouped using {}: - - perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' - -An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to -events. In this case, the grouping is implied e.g. - - perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u - -is the same as: - - perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' - -but allows for also using an address filter e.g.: - - perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls - -It is important to select a sample size that is big enough to contain at least -one PSB packet. If not a warning will be displayed: - - Intel PT sample size (%zu) may be too small for PSB period (%zu) - -The calculation used for that is: if sample_size <= psb_period + 256 display the -warning. When sampling is used, psb_period defaults to 0 (2KiB). - -The default sample size is 4KiB. - -The sample size is passed in aux_sample_size in struct perf_event_attr. The -sample size is limited by the maximum event size which is 64KiB. It is -difficult to know how big the event might be without the trace sample attached, -but the tool validates that the sample size is not greater than 60KiB. - - -new snapshot option -------------------- - -The difference between full trace and snapshot from the kernel's perspective is -that in full trace we don't overwrite trace data that the user hasn't collected -yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let -the trace run and overwrite older data in the buffer so that whenever something -interesting happens, we can stop it and grab a snapshot of what was going on -around that interesting moment. - -To select snapshot mode a new option has been added: - - -S - -Optionally it can be followed by the snapshot size e.g. - - -S0x100000 - -The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size -nor snapshot size is specified, then the default is 4MiB for privileged users -(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. -If an unprivileged user does not specify mmap pages, the mmap pages will be -reduced as described in the 'new auxtrace mmap size option' section below. - -The snapshot size is displayed if the option -vv is used e.g. - - Intel PT snapshot size: %zu - - -new auxtrace mmap size option ---------------------------- - -Intel PT buffer size is specified by an addition to the -m option e.g. - - -m,16 - -selects a buffer size of 16 pages i.e. 64KiB. - -Note that the existing functionality of -m is unchanged. The auxtrace mmap size -is specified by the optional addition of a comma and the value. - -The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users -(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. -If an unprivileged user does not specify mmap pages, the mmap pages will be -reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the -user is likely to get an error as they exceed their mlock limit (Max locked -memory as shown in /proc/self/limits). Note that perf does not count the first -512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu -against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus -their mlock limit (which defaults to 64KiB but is not multiplied by the number -of cpus). - -In full-trace mode, powers of two are allowed for buffer size, with a minimum -size of 2 pages. In snapshot mode or sampling mode, it is the same but the -minimum size is 1 page. - -The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. - - mmap length 528384 - auxtrace mmap length 4198400 - - -Intel PT modes of operation ---------------------------- - -Intel PT can be used in 2 modes: - full-trace mode - sample mode - snapshot mode - -Full-trace mode traces continuously e.g. - - perf record -e intel_pt//u uname - -Sample mode attaches a Intel PT sample to other events e.g. - - perf record --aux-sample -e intel_pt//u -e branch-misses:u - -Snapshot mode captures the available data when a signal is sent e.g. - - perf record -v -e intel_pt//u -S ./loopy 1000000000 & - [1] 11435 - kill -USR2 11435 - Recording AUX area tracing snapshot - -Note that the signal sent is SIGUSR2. -Note that "Recording AUX area tracing snapshot" is displayed because the -v -option is used. - -The 2 modes cannot be used together. - - -Buffer handling ---------------- - -There may be buffer limitations (i.e. single ToPa entry) which means that actual -buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to -provide other sizes, and in particular an arbitrarily large size, multiple -buffers are logically concatenated. However an interrupt must be used to switch -between buffers. That has two potential problems: - a) the interrupt may not be handled in time so that the current buffer - becomes full and some trace data is lost. - b) the interrupts may slow the system and affect the performance - results. - -If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event -which the tools report as an error. - -In full-trace mode, the driver waits for data to be copied out before allowing -the (logical) buffer to wrap-around. If data is not copied out quickly enough, -again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to -wait, the intel_pt event gets disabled. Because it is difficult to know when -that happens, perf tools always re-enable the intel_pt event after copying out -data. - - -Intel PT and build ids ----------------------- - -By default "perf record" post-processes the event stream to find all build ids -for executables for all addresses sampled. Deliberately, Intel PT is not -decoded for that purpose (it would take too long). Instead the build ids for -all executables encountered (due to mmap, comm or task events) are included -in the perf.data file. - -To see buildids included in the perf.data file use the command: - - perf buildid-list - -If the perf.data file contains Intel PT data, that is the same as: - - perf buildid-list --with-hits - - -Snapshot mode and event disabling ---------------------------------- - -In order to make a snapshot, the intel_pt event is disabled using an IOCTL, -namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the -collection of side-band information. In order to prevent that, a dummy -software event has been introduced that permits tracking events (like mmaps) to -continue to be recorded while intel_pt is disabled. That is important to ensure -there is complete side-band information to allow the decoding of subsequent -snapshots. - -A test has been created for that. To find the test: - - perf test list - ... - 23: Test using a dummy software event to keep tracking - -To run the test: - - perf test 23 - 23: Test using a dummy software event to keep tracking : Ok - - -perf record modes (nothing new here) ------------------------------------- - -perf record essentially operates in one of three modes: - per thread - per cpu - workload only - -"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a -workload). -"per cpu" is selected by -C or -a. -"workload only" mode is selected by not using the other options but providing a -command to run (i.e. the workload). - -In per-thread mode an exact list of threads is traced. There is no inheritance. -Each thread has its own event buffer. - -In per-cpu mode all processes (or processes from the selected cgroup i.e. -G -option, or processes selected with -p or -u) are traced. Each cpu has its own -buffer. Inheritance is allowed. - -In workload-only mode, the workload is traced but with per-cpu buffers. -Inheritance is allowed. Note that you can now trace a workload in per-thread -mode by using the --per-thread option. - - -Privileged vs non-privileged users ----------------------------------- - -Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users -have memory limits imposed upon them. That affects what buffer sizes they can -have as outlined above. - -The v4.2 kernel introduced support for a context switch metadata event, -PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes -are scheduled out and in, just not by whom, which is left for the -PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, -which in turn requires CAP_SYS_ADMIN. - -Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context -switches") commit, that introduces these metadata events for further info. - -When working with kernels < v4.2, the following considerations must be taken, -as the sched:sched_switch tracepoints will be used to receive such information: - -Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are -not permitted to use tracepoints which means there is insufficient side-band -information to decode Intel PT in per-cpu mode, and potentially workload-only -mode too if the workload creates new processes. - -Note also, that to use tracepoints, read-access to debugfs is required. So if -debugfs is not mounted or the user does not have read-access, it will again not -be possible to decode Intel PT in per-cpu mode. - - -sched_switch tracepoint ------------------------ - -The sched_switch tracepoint is used to provide side-band data for Intel PT -decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't -available. - -The sched_switch events are automatically added. e.g. the second event shown -below: - - $ perf record -vv -e intel_pt//u uname - ------------------------------------------------------------ - perf_event_attr: - type 6 - size 112 - config 0x400 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - enable_on_exec 1 - sample_id_all 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - perf_event_attr: - type 2 - size 112 - config 0x108 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER - read_format ID - inherit 1 - sample_id_all 1 - exclude_guest 1 - ------------------------------------------------------------ - sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - perf_event_attr: - type 1 - size 112 - config 0x9 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - mmap 1 - comm 1 - enable_on_exec 1 - task 1 - sample_id_all 1 - mmap2 1 - comm_exec 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - mmap size 528384B - AUX area mmap length 4194304 - perf event ring buffer mmapped per cpu - Synthesizing auxtrace information - Linux - [ perf record: Woken up 1 times to write data ] - [ perf record: Captured and wrote 0.042 MB perf.data ] - -Note, the sched_switch event is only added if the user is permitted to use it -and only in per-cpu mode. - -Note also, the sched_switch event is only added if TSC packets are requested. -That is because, in the absence of timing information, the sched_switch events -cannot be matched against the Intel PT trace. - - -perf script -=========== - -By default, perf script will decode trace data found in the perf.data file. -This can be further controlled by new option --itrace. - - -New --itrace option -------------------- - -Having no option is the same as - - --itrace - -which, in turn, is the same as - - --itrace=cepwx - -The letters are: - - i synthesize "instructions" events - b synthesize "branches" events - x synthesize "transactions" events - w synthesize "ptwrite" events - p synthesize "power" events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - e synthesize tracing error events - d create a debug log - g synthesize a call chain (use with i or x) - l synthesize last branch entries (use with i or x) - s skip initial number of events - -"Instructions" events look like they were recorded by "perf record -e -instructions". - -"Branches" events look like they were recorded by "perf record -e branches". "c" -and "r" can be combined to get calls and returns. - -"Transactions" events correspond to the start or end of transactions. The -'flags' field can be used in perf script to determine whether the event is a -tranasaction start, commit or abort. - -Note that "instructions", "branches" and "transactions" events depend on code -flow packets which can be disabled by using the config term "branch=0". Refer -to the config terms section above. - -"ptwrite" events record the payload of the ptwrite instruction and whether -"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are -recorded only if the "ptw" config term was used. Refer to the config terms -section above. perf script "synth" field displays "ptwrite" information like -this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was -used. - -"Power" events correspond to power event packets and CBR (core-to-bus ratio) -packets. While CBR packets are always recorded when tracing is enabled, power -event packets are recorded only if the "pwr_evt" config term was used. Refer to -the config terms section above. The power events record information about -C-state changes, whereas CBR is indicative of CPU frequency. perf script -"event,synth" fields display information like this: - cbr: cbr: 22 freq: 2189 MHz (200%) - mwait: hints: 0x60 extensions: 0x1 - pwre: hw: 0 cstate: 2 sub-cstate: 0 - exstop: ip: 1 - pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 -Where: - "cbr" includes the frequency and the percentage of maximum non-turbo - "mwait" shows mwait hints and extensions - "pwre" shows C-state transitions (to a C-state deeper than C0) and - whether initiated by hardware - "exstop" indicates execution stopped and whether the IP was recorded - exactly, - "pwrx" indicates return to C0 -For more details refer to the Intel 64 and IA-32 Architectures Software -Developer Manuals. - -Error events show where the decoder lost the trace. Error events -are quite important. Users must know if what they are seeing is a complete -picture or not. - -The "d" option will cause the creation of a file "intel_pt.log" containing all -decoded packets and instructions. Note that this option slows down the decoder -and that the resulting file may be very large. - -In addition, the period of the "instructions" event can be specified. e.g. - - --itrace=i10us - -sets the period to 10us i.e. one instruction sample is synthesized for each 10 -microseconds of trace. Alternatives to "us" are "ms" (milliseconds), -"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). - -"ms", "us" and "ns" are converted to TSC ticks. - -The timing information included with Intel PT does not give the time of every -instruction. Consequently, for the purpose of sampling, the decoder estimates -the time since the last timing packet based on 1 tick per instruction. The time -on the sample is *not* adjusted and reflects the last known value of TSC. - -For Intel PT, the default period is 100us. - -Setting it to a zero period means "as often as possible". - -In the case of Intel PT that is the same as a period of 1 and a unit of -'instructions' (i.e. --itrace=i1i). - -Also the call chain size (default 16, max. 1024) for instructions or -transactions events can be specified. e.g. - - --itrace=ig32 - --itrace=xg32 - -Also the number of last branch entries (default 64, max. 1024) for instructions or -transactions events can be specified. e.g. - - --itrace=il10 - --itrace=xl10 - -Note that last branch entries are cleared for each sample, so there is no overlap -from one sample to the next. - -To disable trace decoding entirely, use the option --no-itrace. - -It is also possible to skip events generated (instructions, branches, transactions) -at the beginning. This is useful to ignore initialization code. - - --itrace=i0nss1000000 - -skips the first million instructions. - -dump option ------------ - -perf script has an option (-D) to "dump" the events i.e. display the binary -data. - -When -D is used, Intel PT packets are displayed. The packet decoder does not -pay attention to PSB packets, but just decodes the bytes - so the packets seen -by the actual decoder may not be identical in places where the data is corrupt. -One example of that would be when the buffer-switching interrupt has been too -slow, and the buffer has been filled completely. In that case, the last packet -in the buffer might be truncated and immediately followed by a PSB as the trace -continues in the next buffer. - -To disable the display of Intel PT packets, combine the -D option with ---no-itrace. - - -perf report -=========== - -By default, perf report will decode trace data found in the perf.data file. -This can be further controlled by new option --itrace exactly the same as -perf script, with the exception that the default is --itrace=igxe. - - -perf inject -=========== - -perf inject also accepts the --itrace option in which case tracing data is -removed and replaced with the synthesized events. e.g. - - perf inject --itrace -i perf.data -o perf.data.new - -Below is an example of using Intel PT with autofdo. It requires autofdo -(https://github.com/google/autofdo) and gcc version 5. The bubble -sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) -amended to take the number of elements as a parameter. - - $ gcc-5 -O3 sort.c -o sort_optimized - $ ./sort_optimized 30000 - Bubble sorting array of 30000 elements - 2254 ms - - $ cat ~/.perfconfig - [intel-pt] - mispred-all = on - - $ perf record -e intel_pt//u ./sort 3000 - Bubble sorting array of 3000 elements - 58 ms - [ perf record: Woken up 2 times to write data ] - [ perf record: Captured and wrote 3.939 MB perf.data ] - $ perf inject -i perf.data -o inj --itrace=i100usle --strip - $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 - $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo - $ ./sort_autofdo 30000 - Bubble sorting array of 30000 elements - 2155 ms - -Note there is currently no advantage to using Intel PT instead of LBR, but -that may change in the future if greater use is made of the data. - - -PEBS via Intel PT -================= - -Some hardware has the feature to redirect PEBS records to the Intel PT trace. -Recording is selected by using the aux-output config term e.g. - - perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname - -Note that currently, software only supports redirecting at most one PEBS event. - -To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. - - perf script --itrace=oe diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt new file mode 100644 index 000000000000..7d743a98a9c1 --- /dev/null +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -0,0 +1,1000 @@ +perf-intel-pt(1) +================ + +NAME +---- +perf-intel-pt - Support for Intel Processor Trace within perf tools + +SYNOPSIS +-------- +[verse] +'perf record' -e intel_pt// + +DESCRIPTION +----------- + +Intel Processor Trace (Intel PT) is an extension of Intel Architecture that +collects information about software execution such as control flow, execution +modes and timings and formats it into highly compressed binary packets. +Technical details are documented in the Intel 64 and IA-32 Architectures +Software Developer Manuals, Chapter 36 Intel Processor Trace. + +Intel PT is first supported in Intel Core M and 5th generation Intel Core +processors that are based on the Intel micro-architecture code name Broadwell. + +Trace data is collected by 'perf record' and stored within the perf.data file. +See below for options to 'perf record'. + +Trace data must be 'decoded' which involves walking the object code and matching +the trace data packets. For example a TNT packet only tells whether a +conditional branch was taken or not taken, so to make use of that packet the +decoder must know precisely which instruction was being executed. + +Decoding is done on-the-fly. The decoder outputs samples in the same format as +samples output by perf hardware events, for example as though the "instructions" +or "branches" events had been recorded. Presently 3 tools support this: +'perf script', 'perf report' and 'perf inject'. See below for more information +on using those tools. + +The main distinguishing feature of Intel PT is that the decoder can determine +the exact flow of software execution. Intel PT can be used to understand why +and how did software get to a certain point, or behave a certain way. The +software does not have to be recompiled, so Intel PT works with debug or release +builds, however the executed images are needed - which makes use in JIT-compiled +environments, or with self-modified code, a challenge. Also symbols need to be +provided to make sense of addresses. + +A limitation of Intel PT is that it produces huge amounts of trace data +(hundreds of megabytes per second per core) which takes a long time to decode, +for example two or three orders of magnitude longer than it took to collect. +Another limitation is the performance impact of tracing, something that will +vary depending on the use-case and architecture. + + +Quickstart +---------- + +It is important to start small. That is because it is easy to capture vastly +more data than can possibly be processed. + +The simplest thing to do with Intel PT is userspace profiling of small programs. +Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: + + perf record -e intel_pt//u ls + +And profiled with 'perf report' e.g. + + perf report + +To also trace kernel space presents a problem, namely kernel self-modifying +code. A fairly good kernel image is available in /proc/kcore but to get an +accurate image a copy of /proc/kcore needs to be made under the same conditions +as the data capture. A script perf-with-kcore can do that, but beware that the +script makes use of 'sudo' to copy /proc/kcore. If you have perf installed +locally from the source tree you can do: + + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + +which will create a directory named 'pt_ls' and put the perf.data file and +copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use +'perf report' becomes: + + ~/libexec/perf-core/perf-with-kcore report pt_ls + +Because samples are synthesized after-the-fact, the sampling period can be +selected for reporting. e.g. sample every microsecond + + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + +See the sections below for more information about the --itrace option. + +Beware the smaller the period, the more samples that are produced, and the +longer it takes to process them. + +Also note that the coarseness of Intel PT timing information will start to +distort the statistical value of the sampling as the sampling period becomes +smaller. + +To represent software control flow, "branches" samples are produced. By default +a branch sample is synthesized for every single branch. To get an idea what +data is available you can use the 'perf script' tool with all itrace sampling +options, which will list all the samples. + + perf record -e intel_pt//u ls + perf script --itrace=ibxwpe + +An interesting field that is not printed by default is 'flags' which can be +displayed as follows: + + perf script --itrace=ibxwpe -F+flags + +The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, and +in transaction, respectively. + +Another interesting field that is not printed by default is 'ipc' which can be +displayed as follows: + + perf script --itrace=be -F+ipc + +There are two ways that instructions-per-cycle (IPC) can be calculated depending +on the recording. + +If the 'cyc' config term (see config terms section below) was used, then IPC is +calculated using the cycle count from CYC packets, otherwise MTC packets are +used - refer to the 'mtc' config term. When MTC is used, however, the values +are less accurate because the timing is less accurate. + +Because Intel PT does not update the cycle count on every branch or instruction, +the values will often be zero. When there are values, they will be the number +of instructions and number of cycles since the last update, and thus represent +the average IPC since the last IPC for that event type. Note IPC for "branches" +events is calculated separately from IPC for "instructions" events. + +Also note that the IPC instruction count may or may not include the current +instruction. If the cycle count is associated with an asynchronous branch +(e.g. page fault or interrupt), then the instruction count does not include the +current instruction, otherwise it does. That is consistent with whether or not +that instruction has retired when the cycle count is updated. + +Another note, in the case of "branches" events, non-taken branches are not +presently sampled, so IPC values for them do not appear e.g. a CYC packet with a +TNT packet that starts with a non-taken branch. To see every possible IPC +value, "instructions" events can be used e.g. --itrace=i0ns + +While it is possible to create scripts to analyze the data, an alternative +approach is available to export the data to a sqlite or postgresql database. +Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, +and to script exported-sql-viewer.py for an example of using the database. + +There is also script intel-pt-events.py which provides an example of how to +unpack the raw data for power events and PTWRITE. + +As mentioned above, it is easy to capture too much data. One way to limit the +data captured is to use 'snapshot' mode which is explained further below. +Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. + +Another problem that will be experienced is decoder errors. They can be caused +by inability to access the executed image, self-modified or JIT-ed code, or the +inability to match side-band information (such as context switches and mmaps) +which results in the decoder not knowing what code was executed. + +There is also the problem of perf not being able to copy the data fast enough, +resulting in data lost because the buffer was full. See 'Buffer handling' below +for more details. + + +perf record +----------- + +new event +~~~~~~~~~ + +The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are +selected by providing the PMU name followed by the "config" separated by slashes. +An enhancement has been made to allow default "config" e.g. the option + + -e intel_pt// + +will use a default config value. Currently that is the same as + + -e intel_pt/tsc,noretcomp=0/ + +which is the same as + + -e intel_pt/tsc=1,noretcomp=0/ + +Note there are now new config terms - see section 'config terms' further below. + +The config terms are listed in /sys/devices/intel_pt/format. They are bit +fields within the config member of the struct perf_event_attr which is +passed to the kernel by the perf_event_open system call. They correspond to bit +fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: + + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 + +Note that the default config must be overridden for each term i.e. + + -e intel_pt/noretcomp=0/ + +is the same as: + + -e intel_pt/tsc=1,noretcomp=0/ + +So, to disable TSC packets use: + + -e intel_pt/tsc=0/ + +It is also possible to specify the config value explicitly: + + -e intel_pt/config=0x400/ + +Note that, as with all events, the event is suffixed with event modifiers: + + u userspace + k kernel + h hypervisor + G guest + H host + p precise ip + +'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. +'p' is also not relevant to Intel PT. So only options 'u' and 'k' are +meaningful for Intel PT. + +perf_event_attr is displayed if the -vv option is used e.g. + + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + + +config terms +~~~~~~~~~~~~ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + +pt Specifies pass-through which enables the 'branch' config term. + + The default config selects 'pt' if it is available, so a user will + never need to specify this term. + +branch Enable branch tracing. Branch tracing is enabled by default so to + disable branch tracing use 'branch=0'. + + The default config selects 'branch' if it is available. + +ptw Enable PTWRITE packets which are produced when a ptwrite instruction + is executed. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/ptwrite + + which contains "1" if the feature is supported and + "0" otherwise. + +fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet + provides the address of the ptwrite instruction. In the absence of + fup_on_ptw, the decoder will use the address of the previous branch + if branch tracing is enabled, otherwise the address will be zero. + Note that fup_on_ptw will work even when branch tracing is disabled. + +pwr_evt Enable power events. The power events provide information about + changes to the CPU C-state. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/power_event_trace + + which contains "1" if the feature is supported and + "0" otherwise. + + +AUX area sampling option +~~~~~~~~~~~~~~~~~~~~~~~~ + +To select Intel PT "sampling" the AUX area sampling option can be used: + + --aux-sample + +Optionally it can be followed by the sample size in bytes e.g. + + --aux-sample=8192 + +In addition, the Intel PT event to sample must be defined e.g. + + -e intel_pt//u + +Samples on other events will be created containing Intel PT data e.g. the +following will create Intel PT samples on the branch-misses event, note the +events must be grouped using {}: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' + +An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to +events. In this case, the grouping is implied e.g. + + perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u + +is the same as: + + perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' + +but allows for also using an address filter e.g.: + + perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls + +It is important to select a sample size that is big enough to contain at least +one PSB packet. If not a warning will be displayed: + + Intel PT sample size (%zu) may be too small for PSB period (%zu) + +The calculation used for that is: if sample_size <= psb_period + 256 display the +warning. When sampling is used, psb_period defaults to 0 (2KiB). + +The default sample size is 4KiB. + +The sample size is passed in aux_sample_size in struct perf_event_attr. The +sample size is limited by the maximum event size which is 64KiB. It is +difficult to know how big the event might be without the trace sample attached, +but the tool validates that the sample size is not greater than 60KiB. + + +new snapshot option +~~~~~~~~~~~~~~~~~~~ + +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + +To select snapshot mode a new option has been added: + + -S + +Optionally it can be followed by the snapshot size e.g. + + -S0x100000 + +The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size +nor snapshot size is specified, then the default is 4MiB for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced as described in the 'new auxtrace mmap size option' section below. + +The snapshot size is displayed if the option -vv is used e.g. + + Intel PT snapshot size: %zu + + +new auxtrace mmap size option +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel PT buffer size is specified by an addition to the -m option e.g. + + -m,16 + +selects a buffer size of 16 pages i.e. 64KiB. + +Note that the existing functionality of -m is unchanged. The auxtrace mmap size +is specified by the optional addition of a comma and the value. + +The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the +user is likely to get an error as they exceed their mlock limit (Max locked +memory as shown in /proc/self/limits). Note that perf does not count the first +512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu +against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus +their mlock limit (which defaults to 64KiB but is not multiplied by the number +of cpus). + +In full-trace mode, powers of two are allowed for buffer size, with a minimum +size of 2 pages. In snapshot mode or sampling mode, it is the same but the +minimum size is 1 page. + +The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. + + mmap length 528384 + auxtrace mmap length 4198400 + + +Intel PT modes of operation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel PT can be used in 2 modes: + full-trace mode + sample mode + snapshot mode + +Full-trace mode traces continuously e.g. + + perf record -e intel_pt//u uname + +Sample mode attaches a Intel PT sample to other events e.g. + + perf record --aux-sample -e intel_pt//u -e branch-misses:u + +Snapshot mode captures the available data when a signal is sent e.g. + + perf record -v -e intel_pt//u -S ./loopy 1000000000 & + [1] 11435 + kill -USR2 11435 + Recording AUX area tracing snapshot + +Note that the signal sent is SIGUSR2. +Note that "Recording AUX area tracing snapshot" is displayed because the -v +option is used. + +The 2 modes cannot be used together. + + +Buffer handling +~~~~~~~~~~~~~~~ + +There may be buffer limitations (i.e. single ToPa entry) which means that actual +buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +provide other sizes, and in particular an arbitrarily large size, multiple +buffers are logically concatenated. However an interrupt must be used to switch +between buffers. That has two potential problems: + a) the interrupt may not be handled in time so that the current buffer + becomes full and some trace data is lost. + b) the interrupts may slow the system and affect the performance + results. + +If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event +which the tools report as an error. + +In full-trace mode, the driver waits for data to be copied out before allowing +the (logical) buffer to wrap-around. If data is not copied out quickly enough, +again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to +wait, the intel_pt event gets disabled. Because it is difficult to know when +that happens, perf tools always re-enable the intel_pt event after copying out +data. + + +Intel PT and build ids +~~~~~~~~~~~~~~~~~~~~~~ + +By default "perf record" post-processes the event stream to find all build ids +for executables for all addresses sampled. Deliberately, Intel PT is not +decoded for that purpose (it would take too long). Instead the build ids for +all executables encountered (due to mmap, comm or task events) are included +in the perf.data file. + +To see buildids included in the perf.data file use the command: + + perf buildid-list + +If the perf.data file contains Intel PT data, that is the same as: + + perf buildid-list --with-hits + + +Snapshot mode and event disabling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to make a snapshot, the intel_pt event is disabled using an IOCTL, +namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the +collection of side-band information. In order to prevent that, a dummy +software event has been introduced that permits tracking events (like mmaps) to +continue to be recorded while intel_pt is disabled. That is important to ensure +there is complete side-band information to allow the decoding of subsequent +snapshots. + +A test has been created for that. To find the test: + + perf test list + ... + 23: Test using a dummy software event to keep tracking + +To run the test: + + perf test 23 + 23: Test using a dummy software event to keep tracking : Ok + + +perf record modes (nothing new here) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +perf record essentially operates in one of three modes: + per thread + per cpu + workload only + +"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a +workload). +"per cpu" is selected by -C or -a. +"workload only" mode is selected by not using the other options but providing a +command to run (i.e. the workload). + +In per-thread mode an exact list of threads is traced. There is no inheritance. +Each thread has its own event buffer. + +In per-cpu mode all processes (or processes from the selected cgroup i.e. -G +option, or processes selected with -p or -u) are traced. Each cpu has its own +buffer. Inheritance is allowed. + +In workload-only mode, the workload is traced but with per-cpu buffers. +Inheritance is allowed. Note that you can now trace a workload in per-thread +mode by using the --per-thread option. + + +Privileged vs non-privileged users +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users +have memory limits imposed upon them. That affects what buffer sizes they can +have as outlined above. + +The v4.2 kernel introduced support for a context switch metadata event, +PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes +are scheduled out and in, just not by whom, which is left for the +PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, +which in turn requires CAP_SYS_ADMIN. + +Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context +switches") commit, that introduces these metadata events for further info. + +When working with kernels < v4.2, the following considerations must be taken, +as the sched:sched_switch tracepoints will be used to receive such information: + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are +not permitted to use tracepoints which means there is insufficient side-band +information to decode Intel PT in per-cpu mode, and potentially workload-only +mode too if the workload creates new processes. + +Note also, that to use tracepoints, read-access to debugfs is required. So if +debugfs is not mounted or the user does not have read-access, it will again not +be possible to decode Intel PT in per-cpu mode. + + +sched_switch tracepoint +~~~~~~~~~~~~~~~~~~~~~~~ + +The sched_switch tracepoint is used to provide side-band data for Intel PT +decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't +available. + +The sched_switch events are automatically added. e.g. the second event shown +below: + + $ perf record -vv -e intel_pt//u uname + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 2 + size 112 + config 0x108 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER + read_format ID + inherit 1 + sample_id_all 1 + exclude_guest 1 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 1 + size 112 + config 0x9 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + mmap 1 + comm 1 + enable_on_exec 1 + task 1 + sample_id_all 1 + mmap2 1 + comm_exec 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + mmap size 528384B + AUX area mmap length 4194304 + perf event ring buffer mmapped per cpu + Synthesizing auxtrace information + Linux + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.042 MB perf.data ] + +Note, the sched_switch event is only added if the user is permitted to use it +and only in per-cpu mode. + +Note also, the sched_switch event is only added if TSC packets are requested. +That is because, in the absence of timing information, the sched_switch events +cannot be matched against the Intel PT trace. + + +perf script +----------- + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace. + + +New --itrace option +~~~~~~~~~~~~~~~~~~~ + +Having no option is the same as + + --itrace + +which, in turn, is the same as + + --itrace=cepwx + +The letters are: + + i synthesize "instructions" events + b synthesize "branches" events + x synthesize "transactions" events + w synthesize "ptwrite" events + p synthesize "power" events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + e synthesize tracing error events + d create a debug log + g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) + s skip initial number of events + +"Instructions" events look like they were recorded by "perf record -e +instructions". + +"Branches" events look like they were recorded by "perf record -e branches". "c" +and "r" can be combined to get calls and returns. + +"Transactions" events correspond to the start or end of transactions. The +'flags' field can be used in perf script to determine whether the event is a +tranasaction start, commit or abort. + +Note that "instructions", "branches" and "transactions" events depend on code +flow packets which can be disabled by using the config term "branch=0". Refer +to the config terms section above. + +"ptwrite" events record the payload of the ptwrite instruction and whether +"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are +recorded only if the "ptw" config term was used. Refer to the config terms +section above. perf script "synth" field displays "ptwrite" information like +this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was +used. + +"Power" events correspond to power event packets and CBR (core-to-bus ratio) +packets. While CBR packets are always recorded when tracing is enabled, power +event packets are recorded only if the "pwr_evt" config term was used. Refer to +the config terms section above. The power events record information about +C-state changes, whereas CBR is indicative of CPU frequency. perf script +"event,synth" fields display information like this: + cbr: cbr: 22 freq: 2189 MHz (200%) + mwait: hints: 0x60 extensions: 0x1 + pwre: hw: 0 cstate: 2 sub-cstate: 0 + exstop: ip: 1 + pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 +Where: + "cbr" includes the frequency and the percentage of maximum non-turbo + "mwait" shows mwait hints and extensions + "pwre" shows C-state transitions (to a C-state deeper than C0) and + whether initiated by hardware + "exstop" indicates execution stopped and whether the IP was recorded + exactly, + "pwrx" indicates return to C0 +For more details refer to the Intel 64 and IA-32 Architectures Software +Developer Manuals. + +Error events show where the decoder lost the trace. Error events +are quite important. Users must know if what they are seeing is a complete +picture or not. + +The "d" option will cause the creation of a file "intel_pt.log" containing all +decoded packets and instructions. Note that this option slows down the decoder +and that the resulting file may be very large. + +In addition, the period of the "instructions" event can be specified. e.g. + + --itrace=i10us + +sets the period to 10us i.e. one instruction sample is synthesized for each 10 +microseconds of trace. Alternatives to "us" are "ms" (milliseconds), +"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). + +"ms", "us" and "ns" are converted to TSC ticks. + +The timing information included with Intel PT does not give the time of every +instruction. Consequently, for the purpose of sampling, the decoder estimates +the time since the last timing packet based on 1 tick per instruction. The time +on the sample is *not* adjusted and reflects the last known value of TSC. + +For Intel PT, the default period is 100us. + +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + +Also the call chain size (default 16, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=ig32 + --itrace=xg32 + +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + +To disable trace decoding entirely, use the option --no-itrace. + +It is also possible to skip events generated (instructions, branches, transactions) +at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + +skips the first million instructions. + +dump option +~~~~~~~~~~~ + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel PT packets are displayed. The packet decoder does not +pay attention to PSB packets, but just decodes the bytes - so the packets seen +by the actual decoder may not be identical in places where the data is corrupt. +One example of that would be when the buffer-switching interrupt has been too +slow, and the buffer has been filled completely. In that case, the last packet +in the buffer might be truncated and immediately followed by a PSB as the trace +continues in the next buffer. + +To disable the display of Intel PT packets, combine the -D option with +--no-itrace. + + +perf report +----------- + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script, with the exception that the default is --itrace=igxe. + + +perf inject +----------- + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all = on + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. + + +PEBS via Intel PT +----------------- + +Some hardware has the feature to redirect PEBS records to the Intel PT trace. +Recording is selected by using the aux-output config term e.g. + + perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname + +Note that currently, software only supports redirecting at most one PEBS event. + +To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. + + perf script --itrace=oe -- cgit v1.2.3 From 870d325b15fb31af031cec58b89c1fb099a94bc7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Mar 2020 14:20:33 +0200 Subject: perf intel-pt: Add Intel PT man page references Add references to Intel PT man page in man pages of associated tools. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200311122034.3697-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 3 ++- tools/perf/Documentation/perf-intel-pt.txt | 7 +++++++ tools/perf/Documentation/perf-record.txt | 2 +- tools/perf/Documentation/perf-report.txt | 3 ++- tools/perf/Documentation/perf-script.txt | 2 +- 5 files changed, 13 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index a64d6588470e..70969ea73e01 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -66,4 +66,5 @@ include::itrace.txt[] SEE ALSO -------- -linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] +linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1], +linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 7d743a98a9c1..456fdcbf26ac 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -998,3 +998,10 @@ Note that currently, software only supports redirecting at most one PEBS event. To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. perf script --itrace=oe + + +SEE ALSO +-------- + +linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1], +linkperf:perf-inject[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b23a4012a606..7f4db7592467 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -589,4 +589,4 @@ appended unit character - B/K/M/G SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-list[1] +linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index db61f16ffa56..bd0a029d4c08 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -546,4 +546,5 @@ include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1], +linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2599b057e47b..db6a36aac47e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -429,4 +429,4 @@ include::itrace.txt[] SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], -linkperf:perf-script-python[1] +linkperf:perf-script-python[1], linkperf:perf-intel-pt[1] -- cgit v1.2.3 From ec2eab9deb8009fd8c69d61d04c66b77d438f17d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Mar 2020 14:20:34 +0200 Subject: perf intel-pt: Update intel-pt.txt file with new location of the documentation Make it easy for people looking in intel-pt.txt to find the new file. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200311122034.3697-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/perf/Documentation/intel-pt.txt (limited to 'tools') diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..fd9241a1b987 --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt @@ -0,0 +1 @@ +Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt -- cgit v1.2.3 From 37ccc12bbcef880e6b3fdf7e03cf9e5fe9df99f2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 10 Mar 2020 21:30:10 -0700 Subject: tools/runqslower: Add BPF_F_CURRENT_CPU for running selftest on older kernels Libbpf compiles and runs subset of selftests on each PR in its Github mirror repository. To allow still building up-to-date selftests against outdated kernel images, add back BPF_F_CURRENT_CPU definitions back. N.B. BCC's runqslower version ([0]) doesn't need BPF_F_CURRENT_CPU due to use of locally checked in vmlinux.h, generated against kernel with 1aae4bdd7879 ("bpf: Switch BPF UAPI #define constants used from BPF program side to enums") applied. [0] https://github.com/iovisor/bcc/pull/2809 Fixes: 367d82f17eff (" tools/runqslower: Drop copy/pasted BPF_F_CURRENT_CPU definiton") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200311043010.530620-1-andriin@fb.com --- tools/bpf/runqslower/runqslower.bpf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 0ba501305bad..1f18a409f044 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -5,6 +5,7 @@ #include "runqslower.h" #define TASK_RUNNING 0 +#define BPF_F_CURRENT_CPU 0xffffffffULL const volatile __u64 min_us = 0; const volatile pid_t targ_pid = 0; -- cgit v1.2.3 From 67439d555f7d46349deef56886129da96bd15744 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Wed, 11 Mar 2020 14:28:36 +0100 Subject: perf scripting perl: Add common_callchain to fix argument order Since common_callchain has been added to the argument array, we need to reflect it in perl-based scripts, because otherwise the following args would be shifted and thus incorrect. E.g. rw-by-pid and calculation of read and written bytes: Before: read counts by pid: pid comm # reads bytes_requested bytes_read ------ -------------------- ----------- ---------- ---------- 19301 dd 4 424510450039736 0 After: read counts by pid: pid comm # reads bytes_requested bytes_read ------ -------------------- ----------- ---------- ---------- 19301 dd 4 9536 4341 Committer testing: To see before after first do: # perf script record rw-by-pid ^C Now you'll have a perf.data file to report on, then do before and after using: # perf script report rw-by-pid Anbd notice the bytes_request/bytes_read, as above. Signed-off-by: Michael Petlan Tested-by: Arnaldo Carvalho de Melo Cc: Benjamin Salon Cc: Jiri Olsa LPU-Reference: 20200311132836.12693-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/perl/check-perf-trace.pl | 6 +++--- tools/perf/scripts/perl/failed-syscalls.pl | 2 +- tools/perf/scripts/perl/rw-by-file.pl | 6 +++--- tools/perf/scripts/perl/rw-by-pid.pl | 10 +++++----- tools/perf/scripts/perl/rwtop.pl | 10 +++++----- tools/perf/scripts/perl/wakeup-latency.pl | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl index 4e7076c20616..d307ce8fd6ed 100644 --- a/tools/perf/scripts/perl/check-perf-trace.pl +++ b/tools/perf/scripts/perl/check-perf-trace.pl @@ -28,7 +28,7 @@ sub trace_end sub irq::softirq_entry { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $vec) = @_; print_header($event_name, $common_cpu, $common_secs, $common_nsecs, @@ -43,7 +43,7 @@ sub irq::softirq_entry sub kmem::kmalloc { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $call_site, $ptr, $bytes_req, $bytes_alloc, $gfp_flags) = @_; @@ -92,7 +92,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl index 55e7ae4c5c88..05954a8f363a 100644 --- a/tools/perf/scripts/perl/failed-syscalls.pl +++ b/tools/perf/scripts/perl/failed-syscalls.pl @@ -18,7 +18,7 @@ my %failed_syscalls; sub raw_syscalls::sys_exit { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $id, $ret) = @_; if ($ret < 0) { diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl index 168fa5e94b44..92a750b8552b 100644 --- a/tools/perf/scripts/perl/rw-by-file.pl +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -28,7 +28,7 @@ my %writes; sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; if ($common_comm eq $for_comm) { $reads{$fd}{bytes_requested} += $count; @@ -39,7 +39,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; if ($common_comm eq $for_comm) { $writes{$fd}{bytes_written} += $count; @@ -98,7 +98,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl index 495698250b2f..d789fe39caab 100644 --- a/tools/perf/scripts/perl/rw-by-pid.pl +++ b/tools/perf/scripts/perl/rw-by-pid.pl @@ -24,7 +24,7 @@ my %writes; sub syscalls::sys_exit_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; if ($ret > 0) { @@ -40,7 +40,7 @@ sub syscalls::sys_exit_read sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; $reads{$common_pid}{bytes_requested} += $count; @@ -51,7 +51,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_exit_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; if ($ret <= 0) { @@ -62,7 +62,7 @@ sub syscalls::sys_exit_write sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; $writes{$common_pid}{bytes_written} += $count; @@ -178,7 +178,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl index 6473442568a2..eba4df67af6b 100644 --- a/tools/perf/scripts/perl/rwtop.pl +++ b/tools/perf/scripts/perl/rwtop.pl @@ -35,7 +35,7 @@ if (!$interval) { sub syscalls::sys_exit_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; print_check(); @@ -53,7 +53,7 @@ sub syscalls::sys_exit_read sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; print_check(); @@ -66,7 +66,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_exit_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; print_check(); @@ -79,7 +79,7 @@ sub syscalls::sys_exit_write sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; print_check(); @@ -197,7 +197,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl index efcfec5e347a..53444ff4ec7f 100644 --- a/tools/perf/scripts/perl/wakeup-latency.pl +++ b/tools/perf/scripts/perl/wakeup-latency.pl @@ -28,7 +28,7 @@ my $total_wakeups = 0; sub sched::sched_switch { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid, $next_prio) = @_; @@ -51,7 +51,7 @@ sub sched::sched_switch sub sched::sched_wakeup { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $comm, $pid, $prio, $success, $target_cpu) = @_; $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs); @@ -101,7 +101,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } -- cgit v1.2.3 From 44d462acc0bf3eabe1522471fd1f683d8ce612cb Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Thu, 12 Mar 2020 15:21:45 +0300 Subject: perf record: Fix binding of AIO user space buffers to nodes Correct maxnode parameter value passed to mbind() syscall to be the amount of node mask bits to analyze plus 1. Dynamically allocate node mask memory depending on the index of node of cpu being profiled. Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes") Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/c7ea8ffe-1357-bf9e-3a89-1da1d8e9b75b@linux.intel.com [ Remove leftover nr_bits + 1 comment in mbind() call ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 3b664fa673a6..ab7108d22428 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) { void *data; size_t mmap_len; - unsigned long node_mask; + unsigned long *node_mask; + unsigned long node_index; + int err = 0; if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { data = map->aio.data[idx]; mmap_len = mmap__mmap_len(map); - node_mask = 1UL << cpu__get_node(cpu); - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", - data, data + mmap_len, cpu__get_node(cpu)); + node_index = cpu__get_node(cpu); + node_mask = bitmap_alloc(node_index + 1); + if (!node_mask) { + pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; } + set_bit(node_index, node_mask); + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { + pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", + data, data + mmap_len, node_index); + err = -1; + } + bitmap_free(node_mask); } - return 0; + return err; } #else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct mmap *map, int idx) -- cgit v1.2.3 From fe4eb069edb7ab845160350d9e67d572c026a4a7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 12 Mar 2020 14:03:30 +0100 Subject: bpftool: Use linux/types.h from source tree for profiler build When compiling bpftool on a system where the /usr/include/asm symlink doesn't exist (e.g. on an Ubuntu system without gcc-multilib installed), the build fails with: CLANG skeleton/profiler.bpf.o In file included from skeleton/profiler.bpf.c:4: In file included from /usr/include/linux/bpf.h:11: /usr/include/linux/types.h:5:10: fatal error: 'asm/types.h' file not found #include ^~~~~~~~~~~~~ 1 error generated. make: *** [Makefile:123: skeleton/profiler.bpf.o] Error 1 This indicates that the build is using linux/types.h from system headers instead of source tree headers. To fix this, adjust the clang search path to include the necessary headers from tools/testing/selftests/bpf/include/uapi and tools/include/uapi. Also use __bitwise__ instead of __bitwise in skeleton/profiler.h to avoid clashing with the definition in tools/testing/selftests/bpf/include/uapi/linux/types.h. Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200312130330.32239-1-tklauser@distanz.ch --- tools/bpf/bpftool/Makefile | 5 ++++- tools/bpf/bpftool/skeleton/profiler.h | 17 ++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 20a90d8450f8..f294f6c1e795 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -120,7 +120,10 @@ $(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) skeleton/profiler.bpf.o: skeleton/profiler.bpf.c - $(QUIET_CLANG)$(CLANG) -I$(srctree)/tools/lib -g -O2 -target bpf -c $< -o $@ + $(QUIET_CLANG)$(CLANG) \ + -I$(srctree)/tools/include/uapi/ \ + -I$(srctree)/tools/testing/selftests/bpf/include/uapi \ + -I$(srctree)/tools/lib -g -O2 -target bpf -c $< -o $@ profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h index e03b53eae767..1f767e9510f7 100644 --- a/tools/bpf/bpftool/skeleton/profiler.h +++ b/tools/bpf/bpftool/skeleton/profiler.h @@ -32,16 +32,15 @@ enum { #else #define __bitwise__ #endif -#define __bitwise __bitwise__ -typedef __u16 __bitwise __le16; -typedef __u16 __bitwise __be16; -typedef __u32 __bitwise __le32; -typedef __u32 __bitwise __be32; -typedef __u64 __bitwise __le64; -typedef __u64 __bitwise __be64; +typedef __u16 __bitwise__ __le16; +typedef __u16 __bitwise__ __be16; +typedef __u32 __bitwise__ __le32; +typedef __u32 __bitwise__ __be32; +typedef __u64 __bitwise__ __le64; +typedef __u64 __bitwise__ __be64; -typedef __u16 __bitwise __sum16; -typedef __u32 __bitwise __wsum; +typedef __u16 __bitwise__ __sum16; +typedef __u32 __bitwise__ __wsum; #endif /* __PROFILER_H */ -- cgit v1.2.3 From 7f204a7de8b08542aca3c1daa96ed20e1177ba87 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Mar 2020 17:05:27 +0900 Subject: selftests: net: Add SO_REUSEADDR test to check if 4-tuples are fully utilized. This commit adds a test to check if we can fully utilize 4-tuples for connect() when all ephemeral ports are exhausted. The test program changes the local port range to use only one port and binds two sockets with or without SO_REUSEADDR and SO_REUSEPORT, and with the same EUID or with different EUIDs, then do listen(). We should be able to bind only one socket having both SO_REUSEADDR and SO_REUSEPORT per EUID, which restriction is to prevent unintentional listen(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 2 + .../selftests/net/reuseaddr_ports_exhausted.c | 162 +++++++++++++++++++++ .../selftests/net/reuseaddr_ports_exhausted.sh | 35 +++++ 4 files changed, 200 insertions(+) create mode 100644 tools/testing/selftests/net/reuseaddr_ports_exhausted.c create mode 100755 tools/testing/selftests/net/reuseaddr_ports_exhausted.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ecc52d4c034d..91f9aea853b1 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -23,3 +23,4 @@ so_txtime tcp_fastopen_backup_key nettest fin_ack_lat +reuseaddr_ports_exhausted \ No newline at end of file diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 287ae916ec0b..48063fd69924 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -12,6 +12,7 @@ TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_a TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh +TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -22,6 +23,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_FILES += reuseaddr_ports_exhausted KSFT_KHDR_INSTALL := 1 include ../lib.mk diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c new file mode 100644 index 000000000000..7b01b7c2ec10 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Check if we can fully utilize 4-tuples for connect(). + * + * Rules to bind sockets to the same port when all ephemeral ports are + * exhausted. + * + * 1. if there are TCP_LISTEN sockets on the port, fail to bind. + * 2. if there are sockets without SO_REUSEADDR, fail to bind. + * 3. if SO_REUSEADDR is disabled, fail to bind. + * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled, + * succeed to bind. + * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and + * there is no socket having the both options and the same EUID, + * succeed to bind. + * 6. fail to bind. + * + * Author: Kuniyuki Iwashima + */ +#include +#include +#include +#include +#include +#include "../kselftest_harness.h" + +struct reuse_opts { + int reuseaddr[2]; + int reuseport[2]; +}; + +struct reuse_opts unreusable_opts[12] = { + {0, 0, 0, 0}, + {0, 0, 0, 1}, + {0, 0, 1, 0}, + {0, 0, 1, 1}, + {0, 1, 0, 0}, + {0, 1, 0, 1}, + {0, 1, 1, 0}, + {0, 1, 1, 1}, + {1, 0, 0, 0}, + {1, 0, 0, 1}, + {1, 0, 1, 0}, + {1, 0, 1, 1}, +}; + +struct reuse_opts reusable_opts[4] = { + {1, 1, 0, 0}, + {1, 1, 0, 1}, + {1, 1, 1, 0}, + {1, 1, 1, 1}, +}; + +int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) +{ + struct sockaddr_in local_addr; + int len = sizeof(local_addr); + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(-1, fd) TH_LOG("failed to open socket."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT."); + + local_addr.sin_family = AF_INET; + local_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + local_addr.sin_port = 0; + + if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) { + close(fd); + return -1; + } + + return fd; +} + +TEST(reuseaddr_ports_exhausted_unreusable) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 12; i++) { + opts = &unreusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind."); + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_same_euid) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + + if (opts->reuseport[0] && opts->reuseport[1]) { + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + } else { + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_different_euid) +{ + struct reuse_opts *opts; + int i, j, ret, fd[2]; + uid_t euid[2] = {10, 20}; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) { + ret = seteuid(euid[j]); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]); + + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ret = seteuid(0); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0."); + } + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid."); + + if (fd[1] != -1) { + ret = listen(fd[0], 5); + ASSERT_EQ(0, ret) TH_LOG("failed to listen."); + + ret = listen(fd[1], 5); + EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh new file mode 100755 index 000000000000..20e3a2913d06 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run tests when all ephemeral ports are exhausted. +# +# Author: Kuniyuki Iwashima + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_local_port_range="32768 32768" \ + > /dev/null 2>&1 + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +do_test() { + ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted +} + +do_test +echo "tests done" -- cgit v1.2.3 From 7e934cf5ace1dceeb804f7493fa28bb697ed3c52 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Mar 2020 17:29:11 -0400 Subject: xarray: Fix early termination of xas_for_each_marked xas_for_each_marked() is using entry == NULL as a termination condition of the iteration. When xas_for_each_marked() is used protected only by RCU, this can however race with xas_store(xas, NULL) in the following way: TASK1 TASK2 page_cache_delete() find_get_pages_range_tag() xas_for_each_marked() xas_find_marked() off = xas_find_chunk() xas_store(&xas, NULL) xas_init_marks(&xas); ... rcu_assign_pointer(*slot, NULL); entry = xa_entry(off); And thus xas_for_each_marked() terminates prematurely possibly leading to missed entries in the iteration (translating to missing writeback of some pages or a similar problem). If we find a NULL entry that has been marked, skip it (unless we're trying to allocate an entry). Reported-by: Jan Kara CC: stable@vger.kernel.org Fixes: ef8e5717db01 ("page cache: Convert delete_batch to XArray") Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 6 +- lib/xarray.c | 2 + tools/testing/radix-tree/Makefile | 4 +- tools/testing/radix-tree/iteration_check_2.c | 87 ++++++++++++++++++++++++++++ tools/testing/radix-tree/main.c | 1 + tools/testing/radix-tree/test.h | 1 + 6 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 tools/testing/radix-tree/iteration_check_2.c (limited to 'tools') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index a491653d8882..14c893433139 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1648,6 +1648,7 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) { struct xa_node *node = xas->xa_node; + void *entry; unsigned int offset; if (unlikely(xas_not_node(node) || node->shift)) @@ -1659,7 +1660,10 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max, return NULL; if (offset == XA_CHUNK_SIZE) return xas_find_marked(xas, max, mark); - return xa_entry(xas->xa, node, offset); + entry = xa_entry(xas->xa, node, offset); + if (!entry) + return xas_find_marked(xas, max, mark); + return entry; } /* diff --git a/lib/xarray.c b/lib/xarray.c index f448bcd263ac..e9e641d3c0c3 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1208,6 +1208,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) } entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK)) + continue; if (!xa_is_node(entry)) return entry; xas->xa_node = xa_to_node(entry); diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 397d6b612502..aa6abfe0749c 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -7,8 +7,8 @@ LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ - regression4.o \ - tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o + regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ + iteration_check_2.o benchmark.o ifndef SHIFT SHIFT=3 diff --git a/tools/testing/radix-tree/iteration_check_2.c b/tools/testing/radix-tree/iteration_check_2.c new file mode 100644 index 000000000000..aac5c50a3674 --- /dev/null +++ b/tools/testing/radix-tree/iteration_check_2.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * iteration_check_2.c: Check that deleting a tagged entry doesn't cause + * an RCU walker to finish early. + * Copyright (c) 2020 Oracle + * Author: Matthew Wilcox + */ +#include +#include "test.h" + +static volatile bool test_complete; + +static void *iterator(void *arg) +{ + XA_STATE(xas, arg, 0); + void *entry; + + rcu_register_thread(); + + while (!test_complete) { + xas_set(&xas, 0); + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) + ; + rcu_read_unlock(); + assert(xas.xa_index >= 100); + } + + rcu_unregister_thread(); + return NULL; +} + +static void *throbber(void *arg) +{ + struct xarray *xa = arg; + + rcu_register_thread(); + + while (!test_complete) { + int i; + + for (i = 0; i < 100; i++) { + xa_store(xa, i, xa_mk_value(i), GFP_KERNEL); + xa_set_mark(xa, i, XA_MARK_0); + } + for (i = 0; i < 100; i++) + xa_erase(xa, i); + } + + rcu_unregister_thread(); + return NULL; +} + +void iteration_test2(unsigned test_duration) +{ + pthread_t threads[2]; + DEFINE_XARRAY(array); + int i; + + printv(1, "Running iteration test 2 for %d seconds\n", test_duration); + + test_complete = false; + + xa_store(&array, 100, xa_mk_value(100), GFP_KERNEL); + xa_set_mark(&array, 100, XA_MARK_0); + + if (pthread_create(&threads[0], NULL, iterator, &array)) { + perror("create iterator thread"); + exit(1); + } + if (pthread_create(&threads[1], NULL, throbber, &array)) { + perror("create throbber thread"); + exit(1); + } + + sleep(test_duration); + test_complete = true; + + for (i = 0; i < 2; i++) { + if (pthread_join(threads[i], NULL)) { + perror("pthread_join"); + exit(1); + } + } + + xa_destroy(&array); +} diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 7a22d6e3732e..f2cbc8e5b97c 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -311,6 +311,7 @@ int main(int argc, char **argv) regression4_test(); iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); + iteration_test2(10 + 90 * long_run); single_thread_tests(long_run); /* Free any remaining preallocated nodes */ diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 1ee4b2c0ad10..34dab4d18744 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -34,6 +34,7 @@ void xarray_tests(void); void tag_check(void); void multiorder_checks(void); void iteration_test(unsigned order, unsigned duration); +void iteration_test2(unsigned duration); void benchmark(void); void idr_checks(void); void ida_tests(void); -- cgit v1.2.3 From 14e5728ff8176d4d5924b0bf5ab9b1c94d6b3381 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Mar 2020 11:23:30 -0700 Subject: bpftool: Only build bpftool-prog-profile if supported by clang bpftool-prog-profile requires clang to generate BTF for global variables. When compared with older clang, skip this command. This is achieved by adding a new feature, clang-bpf-global-var, to tools/build/feature. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200312182332.3953408-2-songliubraving@fb.com --- tools/bpf/bpftool/Makefile | 15 +++++++++++---- tools/bpf/bpftool/prog.c | 1 + tools/build/feature/Makefile | 9 ++++++++- tools/build/feature/test-clang-bpf-global-var.c | 4 ++++ 4 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 tools/build/feature/test-clang-bpf-global-var.c (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f294f6c1e795..b0574751f231 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -62,8 +62,9 @@ RM ?= rm -f CLANG ?= clang FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib -FEATURE_DISPLAY = libbfd disassembler-four-args zlib +FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \ + clang-bpf-global-var +FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -113,6 +114,12 @@ endif OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o _OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o +ifeq ($(feature-clang-bpf-global-var),1) + __OBJS = $(OBJS) +else + __OBJS = $(_OBJS) +endif + $(OUTPUT)_prog.o: prog.c $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< @@ -136,8 +143,8 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)feature.o: | zdep -$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) +$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 576ddd82bc96..925c6c66aad7 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1545,6 +1545,7 @@ static int do_loadall(int argc, char **argv) static int do_profile(int argc, char **argv) { + p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0"); return 0; } diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 7ac0d8088565..ab8e89a7009c 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -67,7 +67,8 @@ FILES= \ test-llvm.bin \ test-llvm-version.bin \ test-libaio.bin \ - test-libzstd.bin + test-libzstd.bin \ + test-clang-bpf-global-var.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -75,6 +76,7 @@ CC ?= $(CROSS_COMPILE)gcc CXX ?= $(CROSS_COMPILE)g++ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config +CLANG ?= clang all: $(FILES) @@ -321,6 +323,11 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd +$(OUTPUT)test-clang-bpf-global-var.bin: + $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ + grep BTF_KIND_VAR + + ############################### clean: diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c new file mode 100644 index 000000000000..221f1481d52e --- /dev/null +++ b/tools/build/feature/test-clang-bpf-global-var.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +volatile int global_value_for_test = 1; -- cgit v1.2.3 From 39be909c38a42543239de97087d3c93ea9272864 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Mar 2020 11:23:31 -0700 Subject: bpftool: Skeleton should depend on libbpf Add the dependency to libbpf, to fix build errors like: In file included from skeleton/profiler.bpf.c:5: .../bpf_helpers.h:5:10: fatal error: 'bpf_helper_defs.h' file not found #include "bpf_helper_defs.h" ^~~~~~~~~~~~~~~~~~~ 1 error generated. make: *** [skeleton/profiler.bpf.o] Error 1 make: *** Waiting for unfinished jobs.... Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command") Suggested-by: Quentin Monnet Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200312182332.3953408-3-songliubraving@fb.com --- tools/bpf/bpftool/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index b0574751f231..9ca3bfbb9ac4 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -126,11 +126,12 @@ $(OUTPUT)_prog.o: prog.c $(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) -skeleton/profiler.bpf.o: skeleton/profiler.bpf.c +skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(srctree)/tools/include/uapi/ \ -I$(srctree)/tools/testing/selftests/bpf/include/uapi \ - -I$(srctree)/tools/lib -g -O2 -target bpf -c $< -o $@ + -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ + -g -O2 -target bpf -c $< -o $@ profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ -- cgit v1.2.3 From 8d830f549dbd2f9cc4656a17704a2896682cbb5b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Mar 2020 11:23:32 -0700 Subject: bpftool: Add _bpftool and profiler.skel.h to .gitignore These files are generated, so ignore them. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200312182332.3953408-4-songliubraving@fb.com --- tools/bpf/bpftool/.gitignore | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index b13926432b84..8d6e8901ed2b 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,7 +1,9 @@ *.d +/_bpftool /bpftool bpftool*.8 bpf-helpers.* FEATURE-DUMP.bpftool feature libbpf +profiler.skel.h -- cgit v1.2.3 From b35f14f410416f06ec54d187dedc372405757290 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 12 Mar 2020 11:50:33 -0700 Subject: libbpf: Split BTF presence checks into libbpf- and kernel-specific parts Needs for application BTF being present differs between user-space libbpf needs and kernel needs. Currently, BTF is mandatory only in kernel only when BPF application is using STRUCT_OPS. While libbpf itself relies more heavily on presense of BTF: - for BTF-defined maps; - for Kconfig externs; - for STRUCT_OPS as well. Thus, checks for presence and validness of bpf_object's BPF needs to be performed separately, which is patch does. Fixes: 5327644614a1 ("libbpf: Relax check whether BTF is mandatory") Reported-by: Michal Rostecki Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Cc: Quentin Monnet Link: https://lore.kernel.org/bpf/20200312185033.736911-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 223be01dc466..1a787a2faf58 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2284,9 +2284,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) } } -static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) +static bool libbpf_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0 || obj->nr_extern > 0; + return obj->efile.btf_maps_shndx >= 0 || + obj->efile.st_ops_shndx >= 0 || + obj->nr_extern > 0; +} + +static bool kernel_needs_btf(const struct bpf_object *obj) +{ + return obj->efile.st_ops_shndx >= 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -2322,7 +2329,7 @@ static int bpf_object__init_btf(struct bpf_object *obj, } } out: - if (err && bpf_object__is_btf_mandatory(obj)) { + if (err && libbpf_needs_btf(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); return err; } @@ -2346,7 +2353,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) btf_ext__free(obj->btf_ext); obj->btf_ext = NULL; - if (bpf_object__is_btf_mandatory(obj)) { + if (libbpf_needs_btf(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); return -ENOENT; } @@ -2410,7 +2417,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) obj->btf_ext = NULL; } - if (bpf_object__is_btf_mandatory(obj)) + if (kernel_needs_btf(obj)) return err; } return 0; -- cgit v1.2.3 From 75a1e792c335b5c6d7fdb1014da47aeb64c5944f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 12 Mar 2020 18:46:07 +0000 Subject: tools: bpftool: Allow all prog/map handles for pinning objects Documentation and interactive help for bpftool have always explained that the regular handles for programs (id|name|tag|pinned) and maps (id|name|pinned) can be passed to the utility when attempting to pin objects (bpftool prog pin PROG / bpftool map pin MAP). THIS IS A LIE!! The tool actually accepts only ids, as the parsing is done in do_pin_any() in common.c instead of reusing the parsing functions that have long been generic for program and map handles. Instead of fixing the doc, fix the code. It is trivial to reuse the generic parsing, and to simplify do_pin_any() in the process. Do not accept to pin multiple objects at the same time with prog_parse_fds() or map_parse_fds() (this would require a more complex syntax for passing multiple sysfs paths and validating that they correspond to the number of e.g. programs we find for a given name or tag). Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200312184608.12050-2-quentin@isovalent.com --- tools/bpf/bpftool/common.c | 33 ++++----------------------------- tools/bpf/bpftool/main.h | 2 +- tools/bpf/bpftool/map.c | 2 +- tools/bpf/bpftool/prog.c | 2 +- 4 files changed, 7 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b75b8ec5469c..ad634516ba80 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name) return err; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) { - unsigned int id; - char *endptr; int err; int fd; - if (argc < 3) { - p_err("too few arguments, id ID and FILE path is required"); - return -1; - } else if (argc > 3) { - p_err("too many arguments"); - return -1; - } - - if (!is_prefix(*argv, "id")) { - p_err("expected 'id' got %s", *argv); - return -1; - } - NEXT_ARG(); - - id = strtoul(*argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", *argv); - return -1; - } - NEXT_ARG(); - - fd = get_fd_by_id(id); - if (fd < 0) { - p_err("can't open object by id (%u): %s", id, strerror(errno)); - return -1; - } + fd = get_fd(&argc, &argv); + if (fd < 0) + return fd; err = do_pin_fd(fd, *argv); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 724ef9d941d3..d57972dd0f2b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -146,7 +146,7 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path, bool quiet); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int mount_bpffs_for_pin(const char *name); -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e6c85680b34d..693a632f6813 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, bpf_map_get_fd_by_id); + err = do_pin_any(argc, argv, map_parse_fd); if (!err && json_output) jsonw_null(json_wtr); return err; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 925c6c66aad7..d0966380ad0e 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -813,7 +813,7 @@ static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id); + err = do_pin_any(argc, argv, prog_parse_fd); if (!err && json_output) jsonw_null(json_wtr); return err; -- cgit v1.2.3 From 132c1af0a23d049e2ec93eb6a180d9de71d3a32f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 12 Mar 2020 18:46:08 +0000 Subject: tools: bpftool: Fix minor bash completion mistakes Minor fixes for bash completion: addition of program name completion for two subcommands, and correction for program test-runs and map pinning. The completion for the following commands is fixed or improved: # bpftool prog run [TAB] # bpftool prog pin [TAB] # bpftool map pin [TAB] # bpftool net attach xdp name [TAB] Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200312184608.12050-3-quentin@isovalent.com --- tools/bpf/bpftool/bash-completion/bpftool | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index a9cce9d3745a..9b0534f558f1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -542,8 +542,8 @@ _bpftool() esac ;; run) - if [[ ${#words[@]} -lt 5 ]]; then - _filedir + if [[ ${#words[@]} -eq 4 ]]; then + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) return 0 fi case $prev in @@ -551,6 +551,10 @@ _bpftool() _bpftool_get_prog_ids return 0 ;; + name) + _bpftool_get_prog_names + return 0 + ;; data_in|data_out|ctx_in|ctx_out) _filedir return 0 @@ -756,11 +760,17 @@ _bpftool() esac ;; pin) - if [[ $prev == "$command" ]]; then - COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) - else - _filedir - fi + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + ;; + id) + _bpftool_get_map_ids + ;; + name) + _bpftool_get_map_names + ;; + esac return 0 ;; event_pipe) @@ -887,7 +897,7 @@ _bpftool() case $command in skeleton) _filedir - ;; + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) @@ -987,6 +997,9 @@ _bpftool() id) _bpftool_get_prog_ids ;; + name) + _bpftool_get_prog_names + ;; pinned) _filedir ;; -- cgit v1.2.3 From b4490c5c4e023f09b7d27c9a9d3e7ad7d09ea6bf Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:56 -0300 Subject: bpf: Added new helper bpf_get_ns_current_pid_tgid New bpf helper bpf_get_ns_current_pid_tgid, This helper will return pid and tgid from current task which namespace matches dev_t and inode number provided, this will allows us to instrument a process inside a container. Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200304204157.58695-3-cneirabustos@gmail.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 20 ++++++++++++++++++- kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 45 ++++++++++++++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 2 ++ scripts/bpf_helpers_doc.py | 1 + tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++- 7 files changed, 88 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fd91b7c95ea..4ec835334a1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1497,6 +1497,7 @@ extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 40b2d9476268..15b239da775b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2914,6 +2914,19 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3035,7 +3048,8 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3829,4 +3843,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 973a20d49749..0f9ca46e1978 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2149,6 +2149,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d8b7b110a1c5..01878db15eaf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "../../lib/kstrtox.h" @@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg4_type = ARG_PTR_TO_LONG, }; #endif + +BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, + struct bpf_pidns_info *, nsdata, u32, size) +{ + struct task_struct *task = current; + struct pid_namespace *pidns; + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_pidns_info))) + goto clear; + + if (unlikely((u64)(dev_t)dev != dev)) + goto clear; + + if (unlikely(!task)) + goto clear; + + pidns = task_active_pid_ns(task); + if (unlikely(!pidns)) { + err = -ENOENT; + goto clear; + } + + if (!ns_match(&pidns->ns, (dev_t)dev, ino)) + goto clear; + + nsdata->pid = task_pid_nr_ns(task, pidns); + nsdata->tgid = task_tgid_nr_ns(task, pidns); + return 0; +clear: + memset((void *)nsdata, 0, (size_t) size); + return err; +} + +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { + .func = bpf_get_ns_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6a490d8ce9de..b5071c7e93ca 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_send_signal_thread_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; + case BPF_FUNC_get_ns_current_pid_tgid: + return &bpf_get_ns_current_pid_tgid_proto; default: return NULL; } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index cebed6fb5bbb..c1e2b5410faa 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -435,6 +435,7 @@ class PrinterHelpers(Printer): 'struct bpf_fib_lookup', 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', + 'struct bpf_pidns_info', 'struct bpf_sock', 'struct bpf_sock_addr', 'struct bpf_sock_ops', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 40b2d9476268..15b239da775b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2914,6 +2914,19 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3035,7 +3048,8 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3829,4 +3843,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 1c1052e0140af8f211c283c0a333ecff2a6edfc9 Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:57 -0300 Subject: tools/testing/selftests/bpf: Add self-tests for new helper bpf_get_ns_current_pid_tgid. Self tests added for new helper bpf_get_ns_current_pid_tgid Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200304204157.58695-4-cneirabustos@gmail.com --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/ns_current_pid_tgid.c | 88 ++++++++++++ .../selftests/bpf/progs/test_ns_current_pid_tgid.c | 37 +++++ .../selftests/bpf/test_current_pid_tgid_new_ns.c | 159 +++++++++++++++++++++ 5 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c create mode 100644 tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c create mode 100644 tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index ec464859c6b6..2198cd876675 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -31,6 +31,7 @@ test_tcp_check_syncookie_user test_sysctl test_hashmap test_btf_dump +test_current_pid_tgid_new_ns xdping test_cpp *.skel.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ee4ad34adb4a..da4389dde9f7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_progs-no_alu32 + test_progs-no_alu32 \ + test_current_pid_tgid_new_ns # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c new file mode 100644 index 000000000000..542240e16564 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ +#include +#include +#include +#include +#include + +struct bss { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +}; + +void test_ns_current_pid_tgid(void) +{ + const char *probe_name = "raw_tracepoint/sys_enter"; + const char *file = "test_ns_current_pid_tgid.o"; + int err, key = 0, duration = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + struct bss bss; + struct stat st; + __u64 id; + + obj = bpf_object__open_file(file, NULL); + if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); + if (CHECK(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", + probe_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + pid_t tid = syscall(SYS_gettid); + pid_t pid = getpid(); + + id = (__u64) tid << 32 | pid; + bss.user_pid_tgid = id; + + if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) { + perror("Failed to stat /proc/self/ns/pid"); + goto cleanup; + } + + bss.dev = st.st_dev; + bss.ino = st.st_ino; + + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); + if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger some syscalls */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); + if (CHECK(err, "set_bss", "failed to get bss : %d\n", err)) + goto cleanup; + + if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) + goto cleanup; +cleanup: + if (!link) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c new file mode 100644 index 000000000000..1dca70a6de2f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */ + +#include +#include +#include + +static volatile struct { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +} res; + +SEC("raw_tracepoint/sys_enter") +int trace(void *ctx) +{ + __u64 ns_pid_tgid, expected_pid; + struct bpf_pidns_info nsdata; + __u32 key = 0; + + if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata, + sizeof(struct bpf_pidns_info))) + return 0; + + ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid; + expected_pid = res.user_pid_tgid; + + if (expected_pid != ns_pid_tgid) + return 0; + + res.pid_tgid = ns_pid_tgid; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c new file mode 100644 index 000000000000..ed253f252cd0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "test_progs.h" + +#define CHECK_NEWNS(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + printf("%s:PASS:%s\n", __func__, tag); \ + } \ + __ret; \ +}) + +struct bss { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +}; + +int main(int argc, char **argv) +{ + pid_t pid; + int exit_code = 1; + struct stat st; + + printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n"); + + if (stat("/proc/self/ns/pid", &st)) { + perror("stat failed on /proc/self/ns/pid ns\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS), + "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno)) + return exit_code; + + pid = fork(); + if (pid == -1) { + perror("Fork() failed\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (pid > 0) { + int status; + + usleep(5); + waitpid(pid, &status, 0); + return 0; + } else { + + pid = fork(); + if (pid == -1) { + perror("Fork() failed\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (pid > 0) { + int status; + waitpid(pid, &status, 0); + return 0; + } else { + if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL), + "Unmounting proc", "Cannot umount proc! errno=%d\n", errno)) + return exit_code; + + if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL), + "Mounting proc", "Cannot mount proc! errno=%d\n", errno)) + return exit_code; + + const char *probe_name = "raw_tracepoint/sys_enter"; + const char *file = "test_ns_current_pid_tgid.o"; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + int exit_code = 1; + int err, key = 0; + struct bss bss; + struct stat st; + __u64 id; + + obj = bpf_object__open_file(file, NULL); + if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return exit_code; + + err = bpf_object__load(obj); + if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); + if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n", + probe_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + pid_t tid = syscall(SYS_gettid); + pid_t pid = getpid(); + + id = (__u64) tid << 32 | pid; + bss.user_pid_tgid = id; + + if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st), + "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno)) + goto cleanup; + + bss.dev = st.st_dev; + bss.ino = st.st_ino; + + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); + if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger some syscalls */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); + if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err)) + goto cleanup; + + if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) + goto cleanup; + + exit_code = 0; + printf("%s:PASS\n", argv[0]); +cleanup: + if (!link) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); + } + } +} -- cgit v1.2.3 From d831ee84bfc9173eecf30dbbc2553ae81b996c60 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 6 Mar 2020 08:59:23 +0000 Subject: bpf: Add bpf_xdp_output() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce new helper that reuses existing xdp perf_event output implementation, but can be called from raw_tracepoint programs that receive 'struct xdp_buff *' as a tracepoint argument. Signed-off-by: Eelco Chaudron Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/158348514556.2239.11050972434793741444.stgit@xdp-tutorial --- include/uapi/linux/bpf.h | 26 ++++++++++- kernel/bpf/verifier.c | 4 +- kernel/trace/bpf_trace.c | 3 ++ net/core/filter.c | 16 ++++++- tools/include/uapi/linux/bpf.h | 26 ++++++++++- .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 53 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 24 ++++++++++ 7 files changed, 148 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 15b239da775b..5d01c5c7e598 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2927,6 +2927,29 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3049,7 +3072,8 @@ union bpf_attr { FN(send_signal_thread), \ FN(jiffies64), \ FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 55d376c53f7d..745f3cfdf3b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && func_id != BPF_FUNC_skb_output && - func_id != BPF_FUNC_perf_event_read_value) + func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_xdp_output) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: case BPF_FUNC_skb_output: + case BPF_FUNC_xdp_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b5071c7e93ca..e619eedb5919 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1145,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { }; extern const struct bpf_func_proto bpf_skb_output_proto; +extern const struct bpf_func_proto bpf_xdp_output_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) @@ -1220,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_NET case BPF_FUNC_skb_output: return &bpf_skb_output_proto; + case BPF_FUNC_xdp_output: + return &bpf_xdp_output_proto; #endif default: return raw_tp_prog_func_proto(func_id, prog); diff --git a/net/core/filter.c b/net/core/filter.c index cd0a532db4e7..22219544410f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + if (unlikely(!xdp || + xdp_size > (unsigned long)(xdp->data_end - xdp->data))) return -EFAULT; return bpf_event_output(map, flags, meta, meta_size, xdp->data, @@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +static int bpf_xdp_output_btf_ids[5]; +const struct bpf_func_proto bpf_xdp_output_proto = { + .func = bpf_xdp_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_xdp_output_btf_ids, +}; + BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) { return skb->sk ? sock_gen_cookie(skb->sk) : 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 15b239da775b..5d01c5c7e598 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2927,6 +2927,29 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3049,7 +3072,8 @@ union bpf_attr { FN(send_signal_thread), \ FN(jiffies64), \ FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 4ba011031d4c..a0f688c37023 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -4,17 +4,51 @@ #include "test_xdp.skel.h" #include "test_xdp_bpf2bpf.skel.h" +struct meta { + int ifindex; + int pkt_len; +}; + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + int duration = 0; + struct meta *meta = (struct meta *)data; + struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + + if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), + "check_size", "size %u < %zu\n", + size, sizeof(pkt_v4) + sizeof(*meta))) + return; + + if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) + return; + + if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", + "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + return; + + if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), + "check_packet_content", "content not the same\n")) + return; + + *(bool *)ctx = true; +} + void test_xdp_bpf2bpf(void) { __u32 duration = 0, retval, size; char buf[128]; int err, pkt_fd, map_fd; + bool passed = false; struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; struct bpf_program *prog; + struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void) if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) goto out; + /* Set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), + 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto out; + /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); @@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void) err, errno, retval, size)) goto out; + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto out; + + CHECK_FAIL(!passed); + /* Verify test results */ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), "result", "fentry failed err %llu\n", @@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void) "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); out: + if (pb) + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 42dd2fedd588..a038e827f850 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -3,6 +3,8 @@ #include #include +char _license[] SEC("license") = "GPL"; + struct net_device { /* Structure does not need to contain all entries, * as "preserve_access_index" will use BTF to fix this... @@ -27,10 +29,32 @@ struct xdp_buff { struct xdp_rxq_info *rxq; } __attribute__((preserve_access_index)); +struct meta { + int ifindex; + int pkt_len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + __u64 test_result_fentry = 0; SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { + struct meta meta; + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + meta.ifindex = xdp->rxq->dev->ifindex; + meta.pkt_len = data_end - data; + bpf_xdp_output(xdp, &perf_buf_map, + ((__u64) meta.pkt_len << 32) | + BPF_F_CURRENT_CPU, + &meta, sizeof(meta)); + test_result_fentry = xdp->rxq->dev->ifindex; return 0; } -- cgit v1.2.3 From b2bf6660709c3bdd92d7558f4aa3cf07c0b0dda8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 12 Mar 2020 17:56:02 -0700 Subject: perf test: Print if shell directory isn't present If the shell test directory isn't present the exit code will be 255 but with no error messages printed. Add an error message. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200313005602.45236-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5f05db75cdd8..54d9516c9839 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -543,8 +543,11 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) return -1; dir = opendir(st.dir); - if (!dir) + if (!dir) { + pr_err("failed to open shell test directory: %s\n", + st.dir); return -1; + } for_each_shell_test(dir, st.dir, ent) { int curr = i++; -- cgit v1.2.3 From 14f4283aa3e69a68e0a2e3bdb8a651a39bf18c52 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 12 Feb 2020 10:38:29 +0000 Subject: selftests/resctrl: fix spelling mistake "Errror" -> "Error" There are two spelling mistakes in error messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan --- tools/testing/selftests/resctrl/fill_buf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index 84d2a8b9657a..79c611c99a3d 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -181,7 +181,7 @@ fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush, ret = fill_cache_write(start_ptr, end_ptr, resctrl_val); if (ret) { - printf("\n Errror in fill cache read/write...\n"); + printf("\n Error in fill cache read/write...\n"); return -1; } @@ -205,7 +205,7 @@ int run_fill_buf(unsigned long span, int malloc_and_init_memory, ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op, resctrl_val); if (ret) { - printf("\n Errror in fill cache\n"); + printf("\n Error in fill cache\n"); return -1; } -- cgit v1.2.3 From 785c4e834f5f34dc00a398e935a89fd38416ff64 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Mar 2020 11:17:05 +0800 Subject: selftests/timens: Remove duplicated include Remove duplicated include. Signed-off-by: YueHaibing Reviewed-by: Dmitry Safonov Signed-off-by: Shuah Khan --- tools/testing/selftests/timens/exec.c | 1 - tools/testing/selftests/timens/procfs.c | 1 - tools/testing/selftests/timens/timens.c | 1 - tools/testing/selftests/timens/timer.c | 1 - 4 files changed, 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index 87b47b557a7a..e40dc5be2f66 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "log.h" diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c index 43d93f4006b9..7f14f0fdac84 100644 --- a/tools/testing/selftests/timens/procfs.c +++ b/tools/testing/selftests/timens/procfs.c @@ -12,7 +12,6 @@ #include #include #include -#include #include "log.h" #include "timens.h" diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c index 559d26e21ba0..098be7c83be3 100644 --- a/tools/testing/selftests/timens/timens.c +++ b/tools/testing/selftests/timens/timens.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "log.h" diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 0cca7aafc4bd..96dba11ebe44 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "log.h" #include "timens.h" -- cgit v1.2.3 From 9c249ec312dbdb3ef05f5b2672c8c1d1f7562269 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 12 Mar 2020 13:03:37 +0900 Subject: selftests/ftrace: Fix typo in trigger-multihist.tc This patch fix a spelling typo in trigger-multihist.tc Signed-off-by: Masanari Iida Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 18fdaab9f570..68ff3f45c720 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -23,7 +23,7 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then exit_unsupported fi -echo "Test histogram multiple tiggers" +echo "Test histogram multiple triggers" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger -- cgit v1.2.3 From 1ae81d78a8b2f6314fb90dc4296202611e710b37 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Mar 2020 12:21:28 -0700 Subject: selftests/seccomp: Adjust test fixture counts The seccomp selftest reported the wrong test counts since it was using slightly the wrong API for defining text fixtures. Adjust the API usage. Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index ee1b727ede04..7bf82fb07f67 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -909,7 +909,7 @@ TEST(ERRNO_order) EXPECT_EQ(12, errno); } -FIXTURE_DATA(TRAP) { +FIXTURE(TRAP) { struct sock_fprog prog; }; @@ -1020,7 +1020,7 @@ TEST_F(TRAP, handler) EXPECT_NE(0, (unsigned long)sigsys->_call_addr); } -FIXTURE_DATA(precedence) { +FIXTURE(precedence) { struct sock_fprog allow; struct sock_fprog log; struct sock_fprog trace; @@ -1509,7 +1509,7 @@ void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, EXPECT_EQ(0, ret); } -FIXTURE_DATA(TRACE_poke) { +FIXTURE(TRACE_poke) { struct sock_fprog prog; pid_t tracer; long poked; @@ -1817,7 +1817,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, change_syscall(_metadata, tracee, -1, -ESRCH); } -FIXTURE_DATA(TRACE_syscall) { +FIXTURE(TRACE_syscall) { struct sock_fprog prog; pid_t tracer, mytid, mypid, parent; }; @@ -2321,7 +2321,7 @@ struct tsync_sibling { } \ } while (0) -FIXTURE_DATA(TSYNC) { +FIXTURE(TSYNC) { struct sock_fprog root_prog, apply_prog; struct tsync_sibling sibling[TSYNC_SIBLINGS]; sem_t started; -- cgit v1.2.3 From 29e911ef7b706215caf02a82b0d3076611d6abe8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 26 Feb 2020 15:54:49 -0700 Subject: selftests: Fix kselftest O=objdir build from cluttering top level objdir make kselftest-all O=objdir builds create generated objects in objdir. This clutters the top level directory with kselftest objects. Fix it to create sub-directory under objdir for kselftest objects. Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 63430e2664c2..be22dbe94a4c 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -85,7 +85,7 @@ override LDFLAGS = override MAKEFLAGS = endif -# Append kselftest to KBUILD_OUTPUT to avoid cluttering +# Append kselftest to KBUILD_OUTPUT and O to avoid cluttering # KBUILD_OUTPUT with selftest objects and headers installed # by selftests Makefile or lib.mk. ifdef building_out_of_srctree @@ -93,7 +93,7 @@ override LDFLAGS = endif ifneq ($(O),) - BUILD := $(O) + BUILD := $(O)/kselftest else ifneq ($(KBUILD_OUTPUT),) BUILD := $(KBUILD_OUTPUT)/kselftest -- cgit v1.2.3 From 1dc74544edc6753d628fcd1059559fb0ddc1b422 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Mar 2020 14:44:21 -0700 Subject: selftests: android: ion: Fix ionmap_test compile error ionmap_test compile rule is missing ipcsocket.c dependency. Add it to fix the following compile errors: ..android/ion/ionutils.c:221: undefined reference to `sendtosocket' ..android/ion/ionutils.c:243: undefined reference to `receivefromsocket' Signed-off-by: Shuah Khan --- tools/testing/selftests/android/ion/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile index 0eb7ab626e1c..42b71f005332 100644 --- a/tools/testing/selftests/android/ion/Makefile +++ b/tools/testing/selftests/android/ion/Makefile @@ -17,4 +17,4 @@ include ../../lib.mk $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c $(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c -$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c +$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c -- cgit v1.2.3 From fb0bb3952401eac4e5a39551239f4e3bc9aeee4a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Mar 2020 14:55:51 -0700 Subject: selftests: android: Fix custom install from skipping test progs Update custom install rule to install all generated test programs. This fixes android/ion tests to be installed correctly. Signed-off-by: Shuah Khan --- tools/testing/selftests/android/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index 7c462714b418..9258306cafe9 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -21,7 +21,7 @@ all: override define INSTALL_RULE mkdir -p $(INSTALL_PATH) - install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) +install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) @for SUBDIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \ -- cgit v1.2.3 From 6ae32b29c09ba9b99c4c7317eed029587bd2706d Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 11 Mar 2020 02:12:05 +0000 Subject: tools: bpftool: Restore message on failure to guess program type In commit 4a3d6c6a6e4d ("libbpf: Reduce log level for custom section names"), log level for messages for libbpf_attach_type_by_name() and libbpf_prog_type_by_name() was downgraded from "info" to "debug". The latter function, in particular, is used by bpftool when attempting to load programs, and this change caused bpftool to exit with no hint or error message when it fails to detect the type of the program to load (unless "-d" option was provided). To help users understand why bpftool fails to load the program, let's do a second run of the function with log level in "debug" mode in case of failure. Before: # bpftool prog load sample_ret0.o /sys/fs/bpf/sample_ret0 # echo $? 255 Or really verbose with -d flag: # bpftool -d prog load sample_ret0.o /sys/fs/bpf/sample_ret0 libbpf: loading sample_ret0.o libbpf: section(1) .strtab, size 134, link 0, flags 0, type=3 libbpf: skip section(1) .strtab libbpf: section(2) .text, size 16, link 0, flags 6, type=1 libbpf: found program .text libbpf: section(3) .debug_abbrev, size 55, link 0, flags 0, type=1 libbpf: skip section(3) .debug_abbrev libbpf: section(4) .debug_info, size 75, link 0, flags 0, type=1 libbpf: skip section(4) .debug_info libbpf: section(5) .rel.debug_info, size 32, link 14, flags 0, type=9 libbpf: skip relo .rel.debug_info(5) for section(4) libbpf: section(6) .debug_str, size 150, link 0, flags 30, type=1 libbpf: skip section(6) .debug_str libbpf: section(7) .BTF, size 155, link 0, flags 0, type=1 libbpf: section(8) .BTF.ext, size 80, link 0, flags 0, type=1 libbpf: section(9) .rel.BTF.ext, size 32, link 14, flags 0, type=9 libbpf: skip relo .rel.BTF.ext(9) for section(8) libbpf: section(10) .debug_frame, size 40, link 0, flags 0, type=1 libbpf: skip section(10) .debug_frame libbpf: section(11) .rel.debug_frame, size 16, link 14, flags 0, type=9 libbpf: skip relo .rel.debug_frame(11) for section(10) libbpf: section(12) .debug_line, size 74, link 0, flags 0, type=1 libbpf: skip section(12) .debug_line libbpf: section(13) .rel.debug_line, size 16, link 14, flags 0, type=9 libbpf: skip relo .rel.debug_line(13) for section(12) libbpf: section(14) .symtab, size 96, link 1, flags 0, type=2 libbpf: looking for externs among 4 symbols... libbpf: collected 0 externs total libbpf: failed to guess program type from ELF section '.text' libbpf: supported section(type) names are: socket sk_reuseport kprobe/ [...] After: # bpftool prog load sample_ret0.o /sys/fs/bpf/sample_ret0 libbpf: failed to guess program type from ELF section '.text' libbpf: supported section(type) names are: socket sk_reuseport kprobe/ [...] Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200311021205.9755-1-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/common.c | 7 +++++++ tools/bpf/bpftool/main.c | 7 ------- tools/bpf/bpftool/main.h | 5 +++++ tools/bpf/bpftool/prog.c | 27 +++++++++++++++++++++++---- 4 files changed, 35 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index ad634516ba80..f2223dbdfb0a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -572,3 +572,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what) return 0; } + +int __printf(2, 0) +print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 6d41bbfc6459..06449e846e4b 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -79,13 +79,6 @@ static int do_version(int argc, char **argv) return 0; } -static int __printf(2, 0) -print_all_levels(__maybe_unused enum libbpf_print_level level, - const char *format, va_list args) -{ - return vfprintf(stderr, format, args); -} - int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)) { diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d57972dd0f2b..5f6dccd43622 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -14,6 +14,8 @@ #include #include +#include + #include "json_writer.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -229,4 +231,7 @@ struct tcmsg; int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, const char *devname, int ifindex); + +int print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args); #endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d0966380ad0e..f6a5974a7b0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1247,6 +1247,25 @@ free_data_in: return err; } +static int +get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + libbpf_print_fn_t print_backup; + int ret; + + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); + if (!ret) + return ret; + + /* libbpf_prog_type_by_name() failed, let's re-run with debug level */ + print_backup = libbpf_set_print(print_all_levels); + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); + libbpf_set_print(print_backup); + + return ret; +} + static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; @@ -1296,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) strcat(type, *argv); strcat(type, "/"); - err = libbpf_prog_type_by_name(type, &common_prog_type, - &expected_attach_type); + err = get_prog_type_by_name(type, &common_prog_type, + &expected_attach_type); free(type); if (err < 0) goto err_free_reuse_maps; @@ -1396,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (prog_type == BPF_PROG_TYPE_UNSPEC) { const char *sec_name = bpf_program__title(pos, false); - err = libbpf_prog_type_by_name(sec_name, &prog_type, - &expected_attach_type); + err = get_prog_type_by_name(sec_name, &prog_type, + &expected_attach_type); if (err < 0) goto err_close_obj; } -- cgit v1.2.3 From 2b5cf9fb74848fe5742a56e872e6847b79933c0b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 11 Mar 2020 11:53:45 -0700 Subject: selftests/bpf: Guarantee that useep() calls nanosleep() syscall Some implementations of C runtime library won't call nanosleep() syscall from usleep(). But a bunch of kprobe/tracepoint selftests rely on nanosleep being called to trigger them. To make this more reliable, "override" usleep implementation and call nanosleep explicitly. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Cc: Julia Kartseva Link: https://lore.kernel.org/bpf/20200311185345.3874602-1-andriin@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index a969c77e9456..2b0bc1171c9c 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -29,6 +29,15 @@ struct prog_test_def { int old_error_cnt; }; +/* Override C runtime library's usleep() implementation to ensure nanosleep() + * is always called. Usleep is frequently used in selftests as a way to + * trigger kprobe and tracepoints. + */ +int usleep(useconds_t usec) +{ + return syscall(__NR_nanosleep, usec * 1000UL); +} + static bool should_run(struct test_selector *sel, int num, const char *name) { int i; -- cgit v1.2.3 From 4cd729fa022cb5142e5b65f25589af61c8148cf6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 11 Mar 2020 15:27:49 -0700 Subject: selftests/bpf: Make tcp_rtt test more robust to failures Switch to non-blocking accept and wait for server thread to exit before proceeding. I noticed that sometimes tcp_rtt server thread failure would "spill over" into other tests (that would run after tcp_rtt), probably just because server thread exits much later and tcp_rtt doesn't wait for it. v1->v2: - add usleep() while waiting on initial non-blocking accept() (Stanislav); Fixes: 8a03222f508b ("selftests/bpf: test_progs: fix client/server race in tcp_rtt") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20200311222749.458015-1-andriin@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index f4cd60d6fba2..e08f6bb17700 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -188,7 +188,7 @@ static int start_server(void) }; int fd; - fd = socket(AF_INET, SOCK_STREAM, 0); + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -205,6 +205,7 @@ static int start_server(void) static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; +static volatile bool server_done = false; static void *server_thread(void *arg) { @@ -222,23 +223,24 @@ static void *server_thread(void *arg) if (CHECK_FAIL(err < 0)) { perror("Failed to listed on socket"); - return NULL; + return ERR_PTR(err); } - client_fd = accept(fd, (struct sockaddr *)&addr, &len); + while (!server_done) { + client_fd = accept(fd, (struct sockaddr *)&addr, &len); + if (client_fd == -1 && errno == EAGAIN) { + usleep(50); + continue; + } + break; + } if (CHECK_FAIL(client_fd < 0)) { perror("Failed to accept client"); - return NULL; + return ERR_PTR(err); } - /* Wait for the next connection (that never arrives) - * to keep this thread alive to prevent calling - * close() on client_fd. - */ - if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) { - perror("Unexpected success in second accept"); - return NULL; - } + while (!server_done) + usleep(50); close(client_fd); @@ -249,6 +251,7 @@ void test_tcp_rtt(void) { int server_fd, cgroup_fd; pthread_t tid; + void *server_res; cgroup_fd = test__join_cgroup("/tcp_rtt"); if (CHECK_FAIL(cgroup_fd < 0)) @@ -267,6 +270,11 @@ void test_tcp_rtt(void) pthread_mutex_unlock(&server_started_mtx); CHECK_FAIL(run_test(cgroup_fd, server_fd)); + + server_done = true; + pthread_join(tid, &server_res); + CHECK_FAIL(IS_ERR(server_res)); + close_server_fd: close(server_fd); close_cgroup_fd: -- cgit v1.2.3 From 4e1fd25d19e83774e41008c1ca35c6c27eb30270 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 12 Mar 2020 23:18:37 -0700 Subject: selftests/bpf: Fix usleep() implementation nanosleep syscall expects pointer to struct timespec, not nanoseconds directly. Current implementation fulfills its purpose of invoking nanosleep syscall, but doesn't really provide sleeping capabilities, which can cause flakiness for tests relying on usleep() to wait for something. Fixes: ec12a57b822c ("selftests/bpf: Guarantee that useep() calls nanosleep() syscall") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200313061837.3685572-1-andriin@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2b0bc1171c9c..b6201dd82edf 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -35,7 +35,16 @@ struct prog_test_def { */ int usleep(useconds_t usec) { - return syscall(__NR_nanosleep, usec * 1000UL); + struct timespec ts; + + if (usec > 999999) { + ts.tv_sec = usec / 1000000; + ts.tv_nsec = usec % 1000000; + } else { + ts.tv_sec = 0; + ts.tv_nsec = usec; + } + return nanosleep(&ts, NULL); } static bool should_run(struct test_selector *sel, int num, const char *name) -- cgit v1.2.3 From bcd66b10b5e956b3e81f76a61abfed2501ff4038 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 13 Mar 2020 12:31:05 +0100 Subject: tools/bpf: Move linux/types.h for selftests and bpftool Commit fe4eb069edb7 ("bpftool: Use linux/types.h from source tree for profiler build") added a build dependency on tools/testing/selftests/bpf to tools/bpf/bpftool. This is suboptimal with respect to a possible stand-alone build of bpftool. Fix this by moving tools/testing/selftests/bpf/include/uapi/linux/types.h to tools/include/uapi/linux/types.h. This requires an adjustment in the include search path order for the tests in tools/testing/selftests/bpf so that tools/include/linux/types.h is selected when building host binaries and tools/include/uapi/linux/types.h is selected when building bpf binaries. Verified by compiling bpftool and the bpf selftests on x86_64 with this change. Fixes: fe4eb069edb7 ("bpftool: Use linux/types.h from source tree for profiler build") Suggested-by: Andrii Nakryiko Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200313113105.6918-1-tklauser@distanz.ch --- tools/bpf/bpftool/Makefile | 1 - tools/include/uapi/linux/types.h | 23 ++++++++++++++++++++++ tools/testing/selftests/bpf/Makefile | 7 ++++--- .../selftests/bpf/include/uapi/linux/types.h | 23 ---------------------- 4 files changed, 27 insertions(+), 27 deletions(-) create mode 100644 tools/include/uapi/linux/types.h delete mode 100644 tools/testing/selftests/bpf/include/uapi/linux/types.h (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 9ca3bfbb9ac4..f584d1fdfc64 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -129,7 +129,6 @@ $(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(srctree)/tools/include/uapi/ \ - -I$(srctree)/tools/testing/selftests/bpf/include/uapi \ -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ -g -O2 -target bpf -c $< -o $@ diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h new file mode 100644 index 000000000000..91fa51a9c31d --- /dev/null +++ b/tools/include/uapi/linux/types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI_LINUX_TYPES_H +#define _UAPI_LINUX_TYPES_H + +#include + +/* copied from linux:include/uapi/linux/types.h */ +#define __bitwise +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + +#endif /* _UAPI_LINUX_TYPES_H */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index da4389dde9f7..074a05efd1ca 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,8 +20,9 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \ -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ + -I$(APIDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread @@ -194,8 +195,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ - -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ - -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ + -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h deleted file mode 100644 index 91fa51a9c31d..000000000000 --- a/tools/testing/selftests/bpf/include/uapi/linux/types.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UAPI_LINUX_TYPES_H -#define _UAPI_LINUX_TYPES_H - -#include - -/* copied from linux:include/uapi/linux/types.h */ -#define __bitwise -typedef __u16 __bitwise __le16; -typedef __u16 __bitwise __be16; -typedef __u32 __bitwise __le32; -typedef __u32 __bitwise __be32; -typedef __u64 __bitwise __le64; -typedef __u64 __bitwise __be64; - -typedef __u16 __bitwise __sum16; -typedef __u32 __bitwise __wsum; - -#define __aligned_u64 __u64 __attribute__((aligned(8))) -#define __aligned_be64 __be64 __attribute__((aligned(8))) -#define __aligned_le64 __le64 __attribute__((aligned(8))) - -#endif /* _UAPI_LINUX_TYPES_H */ -- cgit v1.2.3 From 30b4cb36b11144e29b4f837057f8ab31aac10b8a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 13 Mar 2020 17:10:49 +0100 Subject: selftests/bpf: Fix spurious failures in accept due to EAGAIN Andrii Nakryiko reports that sockmap_listen test suite is frequently failing due to accept() calls erroring out with EAGAIN: ./test_progs:connect_accept_thread:733: accept: Resource temporarily unavailable connect_accept_thread:FAIL:733 This is because we are using a non-blocking listening TCP socket to accept() connections without polling on the socket. While at first switching to blocking mode seems like the right thing to do, this could lead to test process blocking indefinitely in face of a network issue, like loopback interface being down, as Andrii pointed out. Hence, stick to non-blocking mode for TCP listening sockets but with polling for incoming connection for a limited time before giving up. Apply this approach to all socket I/O calls in the test suite that we expect to block indefinitely, that is accept() for TCP and recv() for UDP. Fixes: 44d28be2b8d4 ("selftests/bpf: Tests for sockmap/sockhash holding listening sockets") Reported-by: Andrii Nakryiko Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200313161049.677700-1-jakub@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 77 ++++++++++++++++------ 1 file changed, 58 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 52aa468bdccd..d7d65a700799 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ #include "test_progs.h" #include "test_sockmap_listen.skel.h" +#define IO_TIMEOUT_SEC 30 #define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 @@ -44,9 +46,10 @@ /* Wrappers that fail the test on error and report it. */ -#define xaccept(fd, addr, len) \ +#define xaccept_nonblock(fd, addr, len) \ ({ \ - int __ret = accept((fd), (addr), (len)); \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ if (__ret == -1) \ FAIL_ERRNO("accept"); \ __ret; \ @@ -116,9 +119,10 @@ __ret; \ }) -#define xrecv(fd, buf, len, flags) \ +#define xrecv_nonblock(fd, buf, len, flags) \ ({ \ - ssize_t __ret = recv((fd), (buf), (len), (flags)); \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ if (__ret == -1) \ FAIL_ERRNO("recv"); \ __ret; \ @@ -191,6 +195,40 @@ __ret; \ }) +static int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) { struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); @@ -265,7 +303,7 @@ static int socket_loopback_reuseport(int family, int sotype, int progfd) if (err) goto close; - if (sotype == SOCK_DGRAM) + if (sotype & SOCK_DGRAM) return s; err = xlisten(s, SOMAXCONN); @@ -589,7 +627,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd) socklen_t len; u64 value; - s = socket_loopback(family, sotype); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s == -1) return; @@ -617,7 +655,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd) if (err) goto close_cli; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p == -1) goto close_cli; @@ -643,7 +681,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd) socklen_t len; u64 value; - s = socket_loopback(family, sotype); + s = socket_loopback(family, sotype | SOCK_NONBLOCK); if (s == -1) return; @@ -666,7 +704,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd) if (err) goto close_cli; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p == -1) goto close_cli; @@ -730,7 +768,7 @@ static void *connect_accept_thread(void *arg) break; } - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p < 0) { xclose(c); break; @@ -912,7 +950,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (err) goto close_cli0; - p0 = xaccept(s, NULL, NULL); + p0 = xaccept_nonblock(s, NULL, NULL); if (p0 < 0) goto close_cli0; @@ -923,7 +961,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (err) goto close_cli1; - p1 = xaccept(s, NULL, NULL); + p1 = xaccept_nonblock(s, NULL, NULL); if (p1 < 0) goto close_cli1; @@ -1044,7 +1082,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, if (err) goto close_cli; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p < 0) goto close_cli; @@ -1139,7 +1177,8 @@ static void test_reuseport_select_listening(int family, int sotype, zero_verdict_count(verd_map); - s = socket_loopback_reuseport(family, sotype, reuseport_prog); + s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, + reuseport_prog); if (s < 0) return; @@ -1164,7 +1203,7 @@ static void test_reuseport_select_listening(int family, int sotype, if (sotype == SOCK_STREAM) { int p; - p = xaccept(s, NULL, NULL); + p = xaccept_nonblock(s, NULL, NULL); if (p < 0) goto close_cli; xclose(p); @@ -1176,7 +1215,7 @@ static void test_reuseport_select_listening(int family, int sotype, if (n == -1) goto close_cli; - n = xrecv(s, &b, sizeof(b), 0); + n = xrecv_nonblock(s, &b, sizeof(b), 0); if (n == -1) goto close_cli; } @@ -1232,7 +1271,7 @@ static void test_reuseport_select_connected(int family, int sotype, goto close_cli0; if (sotype == SOCK_STREAM) { - p0 = xaccept(s, NULL, NULL); + p0 = xaccept_nonblock(s, NULL, NULL); if (p0 < 0) goto close_cli0; } else { @@ -1276,7 +1315,7 @@ static void test_reuseport_select_connected(int family, int sotype, if (n == -1) goto close_cli1; - n = recv(c1, &b, sizeof(b), 0); + n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); err = n == -1; } if (!err || errno != ECONNREFUSED) @@ -1350,7 +1389,7 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, if (n == -1) goto close_cli; - n = recv(c, &b, sizeof(b), 0); + n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); err = n == -1; } if (!err || errno != ECONNREFUSED) { -- cgit v1.2.3 From 3e2671fb9a95d2b46990832466383ec8384d88a3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 10:23:33 -0700 Subject: selftests/bpf: Ensure consistent test failure output printf() doesn't seem to honor using overwritten stdout/stderr (as part of stdio hijacking), so ensure all "standard" invocations of printf() do fprintf(stdout, ...) instead. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200313172336.1879637-2-andriin@fb.com --- tools/testing/selftests/bpf/test_progs.c | 10 +++++----- tools/testing/selftests/bpf/test_progs.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index b6201dd82edf..f85a06512541 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -216,7 +216,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) map = bpf_object__find_map_by_name(obj, name); if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); + fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name); test__fail(); return -1; } @@ -387,7 +387,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, { if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) return 0; - vprintf(format, args); + vfprintf(stdout, format, args); return 0; } @@ -633,7 +633,7 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - printf("Switching to flavor '%s' subdirectory...\n", flavor); + fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); return chdir(flavor); } @@ -716,8 +716,8 @@ int main(int argc, char **argv) cleanup_cgroup_environment(); } stdio_restore(); - printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); free(env.test_selector.blacklist.strs); free(env.test_selector.whitelist.strs); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index bcfa9ef23fda..fd85fa61dbf7 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -109,10 +109,10 @@ extern struct ipv6_packet pkt_v6; int __save_errno = errno; \ if (__ret) { \ test__fail(); \ - printf("%s:FAIL:%s ", __func__, tag); \ - printf(format); \ + fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \ + fprintf(stdout, ##format); \ } else { \ - printf("%s:PASS:%s %d nsec\n", \ + fprintf(stdout, "%s:PASS:%s %d nsec\n", \ __func__, tag, duration); \ } \ errno = __save_errno; \ @@ -124,7 +124,7 @@ extern struct ipv6_packet pkt_v6; int __save_errno = errno; \ if (__ret) { \ test__fail(); \ - printf("%s:FAIL:%d\n", __func__, __LINE__); \ + fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \ } \ errno = __save_errno; \ __ret; \ -- cgit v1.2.3 From d121e1d34b72c4975ff0340901d926c0aaf98174 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 10:23:34 -0700 Subject: libbpf: Ignore incompatible types with matching name during CO-RE relocation When finding target type candidates, ignore forward declarations, functions, and other named types of incompatible kind. Not doing this can cause false errors. See [0] for one such case (due to struct pt_regs forward declaration). [0] https://github.com/iovisor/bcc/pull/2806#issuecomment-598543645 Fixes: ddc7c3042614 ("libbpf: implement BPF CO-RE offset relocation algorithm") Reported-by: Wenbo Zhang Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200313172336.1879637-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1a787a2faf58..085e41f9b68e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3873,6 +3873,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (str_is_empty(targ_name)) continue; + t = skip_mods_and_typedefs(targ_btf, i, NULL); + if (!btf_is_composite(t) && !btf_is_array(t)) + continue; + targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != local_essent_len) continue; -- cgit v1.2.3 From b8ebce86ffe655ac15a841bba2d645105ffe3d38 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 10:23:35 -0700 Subject: libbpf: Provide CO-RE variants of PT_REGS macros Syscall raw tracepoints have struct pt_regs pointer as tracepoint's first argument. After that, reading any of pt_regs fields requires bpf_probe_read(), even for tp_btf programs. Due to that, PT_REGS_PARMx macros are not usable as is. This patch adds CO-RE variants of those macros that use BPF_CORE_READ() to read necessary fields. This provides relocatable architecture-agnostic pt_regs field accesses. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200313172336.1879637-4-andriin@fb.com --- tools/lib/bpf/bpf_tracing.h | 103 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 379d03b211ea..b0c9ae5c73b5 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -50,6 +50,7 @@ #if defined(bpf_target_x86) #if defined(__KERNEL__) || defined(__VMLINUX_H__) + #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) @@ -60,7 +61,20 @@ #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) + #else + #ifdef __i386__ /* i386 kernel is built with -mregparm=3 */ #define PT_REGS_PARM1(x) ((x)->eax) @@ -73,7 +87,20 @@ #define PT_REGS_RC(x) ((x)->eax) #define PT_REGS_SP(x) ((x)->esp) #define PT_REGS_IP(x) ((x)->eip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) +#define PT_REGS_PARM4_CORE(x) 0 +#define PT_REGS_PARM5_CORE(x) 0 +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) + #else + #define PT_REGS_PARM1(x) ((x)->rdi) #define PT_REGS_PARM2(x) ((x)->rsi) #define PT_REGS_PARM3(x) ((x)->rdx) @@ -84,6 +111,18 @@ #define PT_REGS_RC(x) ((x)->rax) #define PT_REGS_SP(x) ((x)->rsp) #define PT_REGS_IP(x) ((x)->rip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) + #endif #endif @@ -104,6 +143,17 @@ struct pt_regs; #define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) #define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr) + #elif defined(bpf_target_arm) #define PT_REGS_PARM1(x) ((x)->uregs[0]) @@ -117,6 +167,17 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->uregs[13]) #define PT_REGS_IP(x) ((x)->uregs[12]) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) + #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ @@ -134,6 +195,17 @@ struct pt_regs; #define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) #define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) + #elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) @@ -147,6 +219,17 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) + #elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) @@ -158,6 +241,15 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) + #elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) @@ -169,11 +261,22 @@ struct pt_regs; #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) + /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) #else #define PT_REGS_IP(x) ((x)->pc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif #endif -- cgit v1.2.3 From acbd06206bbbe59ffd2415c0b902dd244910e42e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 10:23:36 -0700 Subject: selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls Add vmlinux.h generation to selftest/bpf's Makefile. Use it from newly added test_vmlinux to trace nanosleep syscall using 5 different types of programs: - tracepoint; - raw tracepoint; - raw tracepoint w/ direct memory reads (tp_btf); - kprobe; - fentry. These programs are realistic variants of real-life tracing programs, excercising vmlinux.h's usage with tracing applications. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200313172336.1879637-5-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 7 +- tools/testing/selftests/bpf/prog_tests/vmlinux.c | 43 ++++++++++++ tools/testing/selftests/bpf/progs/test_vmlinux.c | 84 ++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/vmlinux.c create mode 100644 tools/testing/selftests/bpf/progs/test_vmlinux.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 074a05efd1ca..7729892e0b04 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -136,7 +136,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF:= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ @@ -177,6 +177,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): $(call msg,MKDIR,,$@) mkdir -p $@ +$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) + $(call msg,GEN,,$@) + $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. @@ -285,6 +289,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ + $$(INCLUDE_DIR)/vmlinux.h \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c new file mode 100644 index 000000000000..04939eda1325 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include "test_vmlinux.skel.h" + +#define MY_TV_NSEC 1337 + +static void nsleep() +{ + struct timespec ts = { .tv_nsec = MY_TV_NSEC }; + + (void)nanosleep(&ts, NULL); +} + +void test_vmlinux(void) +{ + int duration = 0, err; + struct test_vmlinux* skel; + struct test_vmlinux__bss *bss; + + skel = test_vmlinux__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + + err = test_vmlinux__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger everything */ + nsleep(); + + CHECK(!bss->tp_called, "tp", "not called\n"); + CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); + CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); + CHECK(!bss->kprobe_called, "kprobe", "not called\n"); + CHECK(!bss->fentry_called, "fentry", "not called\n"); + +cleanup: + test_vmlinux__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c new file mode 100644 index 000000000000..5611b564d3b1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include +#include +#include + +#define MY_TV_NSEC 1337 + +bool tp_called = false; +bool raw_tp_called = false; +bool tp_btf_called = false; +bool kprobe_called = false; +bool fentry_called = false; + +SEC("tp/syscalls/sys_enter_nanosleep") +int handle__tp(struct trace_event_raw_sys_enter *args) +{ + struct __kernel_timespec *ts; + + if (args->id != __NR_nanosleep) + return 0; + + ts = (void *)args->args[0]; + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + tp_called = true; + return 0; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) +{ + struct __kernel_timespec *ts; + + if (id != __NR_nanosleep) + return 0; + + ts = (void *)PT_REGS_PARM1_CORE(regs); + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + raw_tp_called = true; + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) +{ + struct __kernel_timespec *ts; + + if (id != __NR_nanosleep) + return 0; + + ts = (void *)PT_REGS_PARM1_CORE(regs); + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + tp_btf_called = true; + return 0; +} + +SEC("kprobe/hrtimer_nanosleep") +int BPF_KPROBE(handle__kprobe, + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +{ + if (rqtp == MY_TV_NSEC) + kprobe_called = true; + return 0; +} + +SEC("fentry/hrtimer_nanosleep") +int BPF_PROG(handle__fentry, + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +{ + if (rqtp == MY_TV_NSEC) + fentry_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 10ef49bdcc793ab3e9c2d1ade93a4ff9c82ddf99 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:10:55 +0200 Subject: selftests: qdiscs: Add TDC test for RED Add a handful of tests for creating RED with different flags. Signed-off-by: Petr Machata Reviewed-by: Roman Mashak Signed-off-by: David S. Miller --- .../selftests/tc-testing/tc-tests/qdiscs/red.json | 117 +++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json new file mode 100644 index 000000000000..b70a54464897 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json @@ -0,0 +1,117 @@ +[ + { + "id": "8b6e", + "name": "Create RED with no flags", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "342e", + "name": "Create RED with adaptive flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2d4b", + "name": "Create RED with ECN flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "650f", + "name": "Create RED with flags ECN, adaptive", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "5f15", + "name": "Create RED with flags ECN, harddrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + } +] -- cgit v1.2.3 From 058e56ac9ee63687f1633b2ed669edf4633815e7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:10:59 +0200 Subject: selftests: qdiscs: RED: Add nodrop tests Add tests for the new "nodrop" flag. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/tc-testing/tc-tests/qdiscs/red.json | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json index b70a54464897..0703a2a255eb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json @@ -113,5 +113,73 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP link del dev $DUMMY type dummy" ] + }, + { + "id": "53e8", + "name": "Create RED with flags ECN, nodrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "d091", + "name": "Fail to create RED with only nodrop flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "af8e", + "name": "Create RED with flags ECN, nodrop, harddrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] } ] -- cgit v1.2.3 From 63f3c1d06f2597de5d68715a53f00900233ed0bd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:11:00 +0200 Subject: selftests: mlxsw: RED: Test RED ECN nodrop offload Extend RED testsuite to cover the new nodrop mode of RED-ECN. This test is really similar to ECN test, diverging only in the last step, where UDP traffic should go to backlog instead of being dropped. Thus extract a common helper, ecn_test_common(), make do_ecn_test() into a relatively simple wrapper, and add another one, do_ecn_nodrop_test(). Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/sch_red_core.sh | 50 ++++++++++++++++++---- .../selftests/drivers/net/mlxsw/sch_red_ets.sh | 11 +++++ .../selftests/drivers/net/mlxsw/sch_red_root.sh | 8 ++++ 3 files changed, 61 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 8f833678ac4d..0d347d48c112 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -389,17 +389,14 @@ check_marking() ((pct $cond)) } -do_ecn_test() +ecn_test_common() { + local name=$1; shift local vlan=$1; shift local limit=$1; shift local backlog local pct - # Main stream. - start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ - $h3_mac tos=0x01 - # Build the below-the-limit backlog using UDP. We could use TCP just # fine, but this way we get a proof that UDP is accepted when queue # length is below the limit. The main stream is using TCP, and if the @@ -409,7 +406,7 @@ do_ecn_test() check_err $? "Could not build the requested backlog" pct=$(check_marking $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." - log_test "TC $((vlan - 10)): ECN backlog < limit" + log_test "TC $((vlan - 10)): $name backlog < limit" # Now push TCP, because non-TCP traffic would be early-dropped after the # backlog crosses the limit, and we want to make sure that the backlog @@ -419,7 +416,20 @@ do_ecn_test() check_err $? "Could not build the requested backlog" pct=$(check_marking $vlan ">= 95") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." - log_test "TC $((vlan - 10)): ECN backlog > limit" + log_test "TC $((vlan - 10)): $name backlog > limit" +} + +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name=ECN + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + sleep 1 + + ecn_test_common "$name" $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, it should all get dropped, and backlog @@ -427,7 +437,31 @@ do_ecn_test() RET=0 build_backlog $vlan $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" - log_test "TC $((vlan - 10)): ECN backlog > limit: UDP early-dropped" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_ecn_nodrop_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name="ECN nodrop" + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + sleep 1 + + ecn_test_common "$name" $vlan $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped" stop_traffic sleep 1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index af83efe9ccf1..1c36c576613b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_nodrop_test red_test mc_backlog_test " @@ -50,6 +51,16 @@ ecn_test() uninstall_qdisc } +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + + do_ecn_nodrop_test 10 $BACKLOG1 + do_ecn_nodrop_test 11 $BACKLOG2 + + uninstall_qdisc +} + red_test() { install_qdisc diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index b2217493a88e..558667ea11ec 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_nodrop_test red_test mc_backlog_test " @@ -33,6 +34,13 @@ ecn_test() uninstall_qdisc } +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + do_ecn_nodrop_test 10 $BACKLOG + uninstall_qdisc +} + red_test() { install_qdisc -- cgit v1.2.3 From 4f72180eb4da9ce0bad2f284e81875bb15ecfbb7 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 20 Feb 2020 18:09:12 +0100 Subject: KVM: selftests: Add demand paging content to the demand paging test The demand paging test is currently a simple page access test which, while potentially useful, doesn't add much versus the existing dirty logging test. To improve the demand paging test, add a basic userfaultfd demand paging implementation. Signed-off-by: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/arch/x86/include/asm/unistd_64.h | 3 + tools/testing/selftests/kvm/demand_paging_test.c | 210 ++++++++++++++++++++++- 2 files changed, 209 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h index cb52a3a8b8fc..4205ed4158bf 100644 --- a/tools/arch/x86/include/asm/unistd_64.h +++ b/tools/arch/x86/include/asm/unistd_64.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_userfaultfd +#define __NR_userfaultfd 282 +#endif #ifndef __NR_perf_event_open # define __NR_perf_event_open 298 #endif diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index e3d49172e2c3..6be9793789f1 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -11,16 +11,21 @@ #include #include +#include #include +#include #include +#include #include #include #include +#include #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#ifdef __NR_userfaultfd #define VCPU_ID 1 /* The memory slot index demand page */ @@ -39,6 +44,8 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; +static char *guest_data_prototype; + /* * Guest physical memory offset of the testing memory slot. * This will be set to the topmost valid physical address minus @@ -110,13 +117,169 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, return vm; } +static int handle_uffd_page_request(int uffd, uint64_t addr) +{ + pid_t tid; + struct uffdio_copy copy; + int r; + + tid = syscall(__NR_gettid); + + copy.src = (uint64_t)guest_data_prototype; + copy.dst = addr; + copy.len = host_page_size; + copy.mode = 0; + + r = ioctl(uffd, UFFDIO_COPY, ©); + if (r == -1) { + DEBUG("Failed Paged in 0x%lx from thread %d with errno: %d\n", + addr, tid, errno); + return r; + } + + return 0; +} + +bool quit_uffd_thread; + +struct uffd_handler_args { + int uffd; + int pipefd; +}; + +static void *uffd_handler_thread_fn(void *arg) +{ + struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; + int uffd = uffd_args->uffd; + int pipefd = uffd_args->pipefd; + int64_t pages = 0; + + while (!quit_uffd_thread) { + struct uffd_msg msg; + struct pollfd pollfd[2]; + char tmp_chr; + int r; + uint64_t addr; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd; + pollfd[1].events = POLLIN; + + r = poll(pollfd, 2, -1); + switch (r) { + case -1: + DEBUG("poll err"); + continue; + case 0: + continue; + case 1: + break; + default: + DEBUG("Polling uffd returned %d", r); + return NULL; + } + + if (pollfd[0].revents & POLLERR) { + DEBUG("uffd revents has POLLERR"); + return NULL; + } + + if (pollfd[1].revents & POLLIN) { + r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(r == 1, + "Error reading pipefd in UFFD thread\n"); + return NULL; + } + + if (!pollfd[0].revents & POLLIN) + continue; + + r = read(uffd, &msg, sizeof(msg)); + if (r == -1) { + if (errno == EAGAIN) + continue; + DEBUG("Read of uffd gor errno %d", errno); + return NULL; + } + + if (r != sizeof(msg)) { + DEBUG("Read on uffd returned unexpected size: %d bytes", + r); + return NULL; + } + + if (!(msg.event & UFFD_EVENT_PAGEFAULT)) + continue; + + addr = msg.arg.pagefault.address; + r = handle_uffd_page_request(uffd, addr); + if (r < 0) + return NULL; + pages++; + } + + return NULL; +} + +static int setup_demand_paging(struct kvm_vm *vm, + pthread_t *uffd_handler_thread, int pipefd) +{ + int uffd; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + struct uffd_handler_args uffd_args; + + guest_data_prototype = malloc(host_page_size); + TEST_ASSERT(guest_data_prototype, + "Failed to allocate buffer for guest data pattern"); + memset(guest_data_prototype, 0xAB, host_page_size); + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd == -1) { + DEBUG("uffd creation failed\n"); + return -1; + } + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { + DEBUG("ioctl uffdio_api failed\n"); + return -1; + } + + uffdio_register.range.start = (uint64_t)host_test_mem; + uffdio_register.range.len = host_num_pages * host_page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { + DEBUG("ioctl uffdio_register failed\n"); + return -1; + } + + if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != + UFFD_API_RANGE_IOCTLS) { + DEBUG("unexpected userfaultfd ioctl set\n"); + return -1; + } + + uffd_args.uffd = uffd; + uffd_args.pipefd = pipefd; + pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, + &uffd_args); + + return 0; +} + #define GUEST_MEM_SHIFT 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode) +static void run_test(enum vm_guest_mode mode, bool use_uffd) { pthread_t vcpu_thread; + pthread_t uffd_handler_thread; + int pipefd[2]; struct kvm_vm *vm; + int r; /* * We reserve page table for 2 times of extra dirty mem which @@ -173,6 +336,16 @@ static void run_test(enum vm_guest_mode mode) /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + if (use_uffd) { + /* Set up user fault fd to handle demand paging requests. */ + r = pipe2(pipefd, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!r, "Failed to set up pipefd"); + + r = setup_demand_paging(vm, &uffd_handler_thread, pipefd[0]); + if (r < 0) + exit(-r); + } + #ifdef __x86_64__ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); #endif @@ -191,8 +364,20 @@ static void run_test(enum vm_guest_mode mode) /* Wait for the vcpu thread to quit */ pthread_join(vcpu_thread, NULL); + if (use_uffd) { + char c; + + /* Tell the user fault fd handler thread to quit */ + r = write(pipefd[1], &c, 1); + TEST_ASSERT(r == 1, "Unable to write to pipefd"); + + pthread_join(uffd_handler_thread, NULL); + } + ucall_uninit(vm); kvm_vm_free(vm); + + free(guest_data_prototype); } struct guest_mode { @@ -210,7 +395,7 @@ static void help(char *name) int i; puts(""); - printf("usage: %s [-h] [-m mode]\n", name); + printf("usage: %s [-h] [-m mode] [-u]\n", name); printf(" -m: specify the guest mode ID to test\n" " (default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -219,6 +404,7 @@ static void help(char *name) printf(" %d: %s%s\n", i, vm_guest_mode_string(i), guest_modes[i].supported ? " (supported)" : ""); } + printf(" -u: Use User Fault FD to handle vCPU page faults.\n"); puts(""); exit(0); } @@ -228,6 +414,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; unsigned int mode; int opt, i; + bool use_uffd = false; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -250,7 +437,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hm:")) != -1) { + while ((opt = getopt(argc, argv, "hm:u")) != -1) { switch (opt) { case 'm': if (!mode_selected) { @@ -263,6 +450,9 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'u': + use_uffd = true; + break; case 'h': default: help(argv[0]); @@ -276,8 +466,20 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i); + run_test(i, use_uffd); } return 0; } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ -- cgit v1.2.3 From 0119cb365c93621535187c7527486c3b378a622d Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 20 Feb 2020 18:09:59 +0100 Subject: KVM: selftests: Add configurable demand paging delay When running the demand paging test with the -u option, the User Fault FD handler essentially adds an arbitrary delay to page fault resolution. To enable better simulation of a real demand paging scenario, add a configurable delay to the UFFD handler. Reviewed-by: Peter Xu Signed-off-by: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 32 ++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 6be9793789f1..ab302e1f5230 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -145,6 +145,7 @@ bool quit_uffd_thread; struct uffd_handler_args { int uffd; int pipefd; + useconds_t delay; }; static void *uffd_handler_thread_fn(void *arg) @@ -152,6 +153,7 @@ static void *uffd_handler_thread_fn(void *arg) struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; int uffd = uffd_args->uffd; int pipefd = uffd_args->pipefd; + useconds_t delay = uffd_args->delay; int64_t pages = 0; while (!quit_uffd_thread) { @@ -212,6 +214,8 @@ static void *uffd_handler_thread_fn(void *arg) if (!(msg.event & UFFD_EVENT_PAGEFAULT)) continue; + if (delay) + usleep(delay); addr = msg.arg.pagefault.address; r = handle_uffd_page_request(uffd, addr); if (r < 0) @@ -223,7 +227,8 @@ static void *uffd_handler_thread_fn(void *arg) } static int setup_demand_paging(struct kvm_vm *vm, - pthread_t *uffd_handler_thread, int pipefd) + pthread_t *uffd_handler_thread, int pipefd, + useconds_t uffd_delay) { int uffd; struct uffdio_api uffdio_api; @@ -264,6 +269,7 @@ static int setup_demand_paging(struct kvm_vm *vm, uffd_args.uffd = uffd; uffd_args.pipefd = pipefd; + uffd_args.delay = uffd_delay; pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, &uffd_args); @@ -273,7 +279,8 @@ static int setup_demand_paging(struct kvm_vm *vm, #define GUEST_MEM_SHIFT 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode, bool use_uffd) +static void run_test(enum vm_guest_mode mode, bool use_uffd, + useconds_t uffd_delay) { pthread_t vcpu_thread; pthread_t uffd_handler_thread; @@ -341,7 +348,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd) r = pipe2(pipefd, O_CLOEXEC | O_NONBLOCK); TEST_ASSERT(!r, "Failed to set up pipefd"); - r = setup_demand_paging(vm, &uffd_handler_thread, pipefd[0]); + r = setup_demand_paging(vm, &uffd_handler_thread, pipefd[0], + uffd_delay); if (r < 0) exit(-r); } @@ -395,7 +403,7 @@ static void help(char *name) int i; puts(""); - printf("usage: %s [-h] [-m mode] [-u]\n", name); + printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n", name); printf(" -m: specify the guest mode ID to test\n" " (default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -404,7 +412,11 @@ static void help(char *name) printf(" %d: %s%s\n", i, vm_guest_mode_string(i), guest_modes[i].supported ? " (supported)" : ""); } - printf(" -u: Use User Fault FD to handle vCPU page faults.\n"); + printf(" -u: use User Fault FD to handle vCPU page\n" + " faults.\n"); + printf(" -d: add a delay in usec to the User Fault\n" + " FD handler to simulate demand paging\n" + " overheads. Ignored without -u.\n"); puts(""); exit(0); } @@ -415,6 +427,7 @@ int main(int argc, char *argv[]) unsigned int mode; int opt, i; bool use_uffd = false; + useconds_t uffd_delay = 0; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -437,7 +450,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hm:u")) != -1) { + while ((opt = getopt(argc, argv, "hm:ud:")) != -1) { switch (opt) { case 'm': if (!mode_selected) { @@ -453,6 +466,11 @@ int main(int argc, char *argv[]) case 'u': use_uffd = true; break; + case 'd': + uffd_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(uffd_delay >= 0, + "A negative UFFD delay is not supported."); + break; case 'h': default: help(argv[0]); @@ -466,7 +484,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd); + run_test(i, use_uffd, uffd_delay); } return 0; -- cgit v1.2.3 From af99e1ad7e708d1a1a4e4c1bb10a2b851974fc04 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 23 Jan 2020 10:04:30 -0800 Subject: KVM: selftests: Add memory size parameter to the demand paging test Add an argument to allow the demand paging test to work on larger and smaller guest sizes. Signed-off-by: Ben Gardon [Rewrote parse_size() to simplify and provide user more flexibility as to how sizes are input. Also fixed size overflow assert.] Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/demand_paging_test.c | 57 +++++++++++++++--------- tools/testing/selftests/kvm/include/test_util.h | 2 + tools/testing/selftests/kvm/lib/test_util.c | 51 +++++++++++++++++++++ 4 files changed, 90 insertions(+), 22 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/test_util.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 1bc9f41d3fcd..1bda24d30b3a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -7,7 +7,7 @@ top_srcdir = ../../../.. KSFT_KHDR_INSTALL := 1 UNAME_M := $(shell uname -m) -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index ab302e1f5230..c1880b3e3041 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -34,6 +34,8 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 +#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -276,11 +278,10 @@ static int setup_demand_paging(struct kvm_vm *vm, return 0; } -#define GUEST_MEM_SHIFT 30 /* 1G */ #define PAGE_SHIFT_4K 12 static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay) + useconds_t uffd_delay, uint64_t guest_memory_bytes) { pthread_t vcpu_thread; pthread_t uffd_handler_thread; @@ -289,33 +290,40 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int r; /* - * We reserve page table for 2 times of extra dirty mem which - * will definitely cover the original (1G+) test range. Here - * we do the calculation with 4K page size which is the - * smallest so the page number will be enough for all archs - * (e.g., 64K page size guest will need even less memory for - * page tables). + * We reserve page table for twice the ammount of memory we intend + * to use in the test region for demand paging. Here we do the + * calculation with 4K page size which is the smallest so the page + * number will be enough for all archs. (e.g., 64K page size guest + * will need even less memory for page tables). */ vm = create_vm(mode, VCPU_ID, - 2ul << (GUEST_MEM_SHIFT - PAGE_SHIFT_4K), + (2 * guest_memory_bytes) >> PAGE_SHIFT_4K, guest_code); guest_page_size = vm_get_page_size(vm); - /* - * A little more than 1G of guest page sized pages. Cover the - * case where the size is not aligned to 64 pages. - */ - guest_num_pages = (1ul << (GUEST_MEM_SHIFT - - vm_get_page_shift(vm))) + 16; + + TEST_ASSERT(guest_memory_bytes % guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = guest_memory_bytes / guest_page_size; + #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; #endif + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %lx\n", + guest_num_pages, vm_get_max_gfn(vm)); host_page_size = getpagesize(); - host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + - !!((guest_num_pages * guest_page_size) % - host_page_size); + TEST_ASSERT(guest_memory_bytes % host_page_size == 0, + "Guest memory size is not host page size aligned."); + host_num_pages = guest_memory_bytes / host_page_size; guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; @@ -403,7 +411,8 @@ static void help(char *name) int i; puts(""); - printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n", name); + printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" + " [-b memory]\n", name); printf(" -m: specify the guest mode ID to test\n" " (default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -417,6 +426,8 @@ static void help(char *name) printf(" -d: add a delay in usec to the User Fault\n" " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); + printf(" -b: specify the size of the memory region which should be\n" + " demand paged. e.g. 10M or 3G. Default: 1G\n"); puts(""); exit(0); } @@ -424,6 +435,7 @@ static void help(char *name) int main(int argc, char *argv[]) { bool mode_selected = false; + uint64_t guest_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; unsigned int mode; int opt, i; bool use_uffd = false; @@ -450,7 +462,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hm:ud:")) != -1) { + while ((opt = getopt(argc, argv, "hm:ud:b:")) != -1) { switch (opt) { case 'm': if (!mode_selected) { @@ -471,6 +483,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(uffd_delay >= 0, "A negative UFFD delay is not supported."); break; + case 'b': + guest_memory_bytes = parse_size(optarg); + break; case 'h': default: help(argv[0]); @@ -484,7 +499,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay); + run_test(i, use_uffd, uffd_delay, guest_memory_bytes); } return 0; diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index a41db6fb7e24..e696c8219d69 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -39,4 +39,6 @@ void test_assert(bool exp, const char *exp_str, #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ } while (0) +size_t parse_size(const char *size); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c new file mode 100644 index 000000000000..cbd7f51b07a1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/lib/test_util.c + * + * Copyright (C) 2020, Google LLC. + */ +#include +#include +#include +#include "test_util.h" + +/* + * Parses "[0-9]+[kmgt]?". + */ +size_t parse_size(const char *size) +{ + size_t base; + char *scale; + int shift = 0; + + TEST_ASSERT(size && isdigit(size[0]), "Need at least one digit in '%s'", size); + + base = strtoull(size, &scale, 0); + + TEST_ASSERT(base != ULLONG_MAX, "Overflow parsing size!"); + + switch (tolower(*scale)) { + case 't': + shift = 40; + break; + case 'g': + shift = 30; + break; + case 'm': + shift = 20; + break; + case 'k': + shift = 10; + break; + case 'b': + case '\0': + shift = 0; + break; + default: + TEST_ASSERT(false, "Unknown size letter %c", *scale); + } + + TEST_ASSERT((base << shift) >> shift == base, "Overflow scaling size!"); + + return base << shift; +} -- cgit v1.2.3 From 56a4210f4e4ed9c8ebec87d212453be8f6f8750f Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 23 Jan 2020 10:04:31 -0800 Subject: KVM: selftests: Pass args to vCPU in global vCPU args struct In preparation for supporting multiple vCPUs in the demand paging test, pass arguments to the vCPU in a consolidated global struct instead of syncing multiple globals. Signed-off-by: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 38 +++++++++++++++++------- 1 file changed, 27 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index c1880b3e3041..aa39d065b3f2 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -44,7 +44,6 @@ */ static uint64_t host_page_size; static uint64_t guest_page_size; -static uint64_t guest_num_pages; static char *guest_data_prototype; @@ -61,18 +60,30 @@ static uint64_t guest_test_phys_mem; */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; + struct kvm_vm *vm; +}; + +static struct vcpu_args vcpu_args; + /* * Continuously write to the first 8 bytes of each page in the demand paging * memory region. */ static void guest_code(void) { + uint64_t gva = vcpu_args.gva; + uint64_t pages = vcpu_args.pages; int i; - for (i = 0; i < guest_num_pages; i++) { - uint64_t addr = guest_test_virt_mem; + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * guest_page_size); - addr += i * guest_page_size; addr &= ~(host_page_size - 1); *(uint64_t *)addr = 0x0123456789ABCDEF; } @@ -87,15 +98,16 @@ static uint64_t host_num_pages; static void *vcpu_worker(void *data) { int ret; - struct kvm_vm *vm = data; + struct kvm_vm *vm = vcpu_args.vm; + int vcpu_id = vcpu_args.vcpu_id; struct kvm_run *run; - run = vcpu_state(vm, VCPU_ID); + run = vcpu_state(vm, vcpu_id); /* Let the guest access its memory */ - ret = _vcpu_run(vm, VCPU_ID); + ret = _vcpu_run(vm, vcpu_id); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - if (get_ucall(vm, VCPU_ID, NULL) != UCALL_SYNC) { + if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { TEST_ASSERT(false, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); @@ -287,6 +299,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pthread_t uffd_handler_thread; int pipefd[2]; struct kvm_vm *vm; + uint64_t guest_num_pages; int r; /* @@ -372,10 +385,13 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, guest_test_virt_mem); - sync_global_to_guest(vm, guest_num_pages); - pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); + vcpu_args.vm = vm; + vcpu_args.vcpu_id = VCPU_ID; + vcpu_args.gva = guest_test_virt_mem; + vcpu_args.pages = guest_num_pages; + sync_global_to_guest(vm, vcpu_args); + pthread_create(&vcpu_thread, NULL, vcpu_worker, &vcpu_args); /* Wait for the vcpu thread to quit */ pthread_join(vcpu_thread, NULL); -- cgit v1.2.3 From 9bbf24744e12bb6b9b32e30917b07f9242cc0341 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 23 Jan 2020 10:04:32 -0800 Subject: KVM: selftests: Add support for vcpu_args_set to aarch64 and s390x Currently vcpu_args_set is only implemented for x86. This makes writing tests with multiple vCPUs difficult as each guest vCPU must either a.) do the same thing or b.) derive some kind of unique token from it's registers or the architecture. To simplify the process of writing tests with multiple vCPUs for s390 and aarch64, add set args functions for those architectures. Signed-off-by: Ben Gardon [Fixed array index (num => i) and made some style changes.] Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/lib/aarch64/processor.c | 35 +++++++++++++++++++++ tools/testing/selftests/kvm/lib/s390x/processor.c | 36 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index f9decadfbe71..216d802479dd 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -333,3 +333,38 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code); } + + +/* VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first num function input arguments to the values + * given as variable args. Each of the variable args is expected to + * be of type uint64_t. The registers set by this function are r0-r7. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + int i; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + " num: %u\n", num); + + va_start(ap, num); + + for (i = 0; i < num; i++) { + set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]), + va_arg(ap, uint64_t)); + } + + va_end(ap); +} diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 32a02360b1eb..a0b84235c848 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -269,6 +269,42 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) run->psw_addr = (uintptr_t)guest_code; } +/* VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first num function input arguments to the values + * given as variable args. Each of the variable args is expected to + * be of type uint64_t. The registers set by this function are r2-r6. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + int i; + + TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" + " num: %u\n", + num); + + va_start(ap, num); + vcpu_regs_get(vm, vcpuid, ®s); + + for (i = 0; i < num; i++) + regs.gprs[i + 2] = va_arg(ap, uint64_t); + + vcpu_regs_set(vm, vcpuid, ®s); + va_end(ap); +} + void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) { struct vcpu *vcpu = vm->vcpu_head; -- cgit v1.2.3 From 018494e6d8234c420e4f7236f502993df5584812 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 23 Jan 2020 10:04:33 -0800 Subject: KVM: selftests: Support multiple vCPUs in demand paging test Most VMs have multiple vCPUs, the concurrent execution of which has a substantial impact on demand paging performance. Add an option to create multiple vCPUs to each access disjoint regions of memory. Signed-off-by: Ben Gardon [guest_code() can't return, use GUEST_ASSERT(). Ensure the number of guests pages is compatible with the host.] Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 253 +++++++++++++++-------- 1 file changed, 171 insertions(+), 82 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index aa39d065b3f2..c516cece2368 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -26,7 +26,6 @@ #include "processor.h" #ifdef __NR_userfaultfd -#define VCPU_ID 1 /* The memory slot index demand page */ #define TEST_MEM_SLOT_INDEX 1 @@ -36,6 +35,14 @@ #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ +#ifdef PRINT_PER_VCPU_UPDATES +#define PER_VCPU_DEBUG(...) DEBUG(__VA_ARGS__) +#else +#define PER_VCPU_DEBUG(...) +#endif + +#define MAX_VCPUS 512 + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -69,18 +76,24 @@ struct vcpu_args { struct kvm_vm *vm; }; -static struct vcpu_args vcpu_args; +static struct vcpu_args vcpu_args[MAX_VCPUS]; /* * Continuously write to the first 8 bytes of each page in the demand paging * memory region. */ -static void guest_code(void) +static void guest_code(uint32_t vcpu_id) { - uint64_t gva = vcpu_args.gva; - uint64_t pages = vcpu_args.pages; + uint64_t gva; + uint64_t pages; int i; + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); + + gva = vcpu_args[vcpu_id].gva; + pages = vcpu_args[vcpu_id].pages; + for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * guest_page_size); @@ -91,17 +104,15 @@ static void guest_code(void) GUEST_SYNC(1); } -/* Points to the test VM memory region on which we are doing demand paging */ -static void *host_test_mem; -static uint64_t host_num_pages; - static void *vcpu_worker(void *data) { int ret; - struct kvm_vm *vm = vcpu_args.vm; - int vcpu_id = vcpu_args.vcpu_id; + struct vcpu_args *args = (struct vcpu_args *)data; + struct kvm_vm *vm = args->vm; + int vcpu_id = args->vcpu_id; struct kvm_run *run; + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); /* Let the guest access its memory */ @@ -116,18 +127,34 @@ static void *vcpu_worker(void *data) return NULL; } -static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, - uint64_t extra_mem_pages, void *guest_code) +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) { struct kvm_vm *vm; - uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = _vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); #endif - vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -242,17 +269,13 @@ static void *uffd_handler_thread_fn(void *arg) static int setup_demand_paging(struct kvm_vm *vm, pthread_t *uffd_handler_thread, int pipefd, - useconds_t uffd_delay) + useconds_t uffd_delay, + struct uffd_handler_args *uffd_args, + void *hva, uint64_t len) { int uffd; struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; - struct uffd_handler_args uffd_args; - - guest_data_prototype = malloc(host_page_size); - TEST_ASSERT(guest_data_prototype, - "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1) { @@ -267,8 +290,8 @@ static int setup_demand_paging(struct kvm_vm *vm, return -1; } - uffdio_register.range.start = (uint64_t)host_test_mem; - uffdio_register.range.len = host_num_pages * host_page_size; + uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.len = len; uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { DEBUG("ioctl uffdio_register failed\n"); @@ -281,44 +304,40 @@ static int setup_demand_paging(struct kvm_vm *vm, return -1; } - uffd_args.uffd = uffd; - uffd_args.pipefd = pipefd; - uffd_args.delay = uffd_delay; + uffd_args->uffd = uffd; + uffd_args->pipefd = pipefd; + uffd_args->delay = uffd_delay; pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, - &uffd_args); + uffd_args); + + PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", + hva, hva + len); return 0; } -#define PAGE_SHIFT_4K 12 - static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, uint64_t guest_memory_bytes) + useconds_t uffd_delay, int vcpus, + uint64_t vcpu_memory_bytes) { - pthread_t vcpu_thread; - pthread_t uffd_handler_thread; - int pipefd[2]; + pthread_t *vcpu_threads; + pthread_t *uffd_handler_threads = NULL; + struct uffd_handler_args *uffd_args = NULL; + int *pipefds = NULL; struct kvm_vm *vm; uint64_t guest_num_pages; + int vcpu_id; int r; - /* - * We reserve page table for twice the ammount of memory we intend - * to use in the test region for demand paging. Here we do the - * calculation with 4K page size which is the smallest so the page - * number will be enough for all archs. (e.g., 64K page size guest - * will need even less memory for page tables). - */ - vm = create_vm(mode, VCPU_ID, - (2 * guest_memory_bytes) >> PAGE_SHIFT_4K, - guest_code); + vm = create_vm(mode, vcpus, vcpu_memory_bytes); guest_page_size = vm_get_page_size(vm); - TEST_ASSERT(guest_memory_bytes % guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, "Guest memory size is not guest page size aligned."); - guest_num_pages = guest_memory_bytes / guest_page_size; + guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ @@ -330,13 +349,13 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, */ TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %lx\n", - guest_num_pages, vm_get_max_gfn(vm)); + " guest pages: %lx max gfn: %lx vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); host_page_size = getpagesize(); - TEST_ASSERT(guest_memory_bytes % host_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, "Guest memory size is not host page size aligned."); - host_num_pages = guest_memory_bytes / host_page_size; guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; @@ -361,55 +380,114 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages * guest_page_size, 0); - /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + ucall_init(vm, NULL); + + guest_data_prototype = malloc(host_page_size); + TEST_ASSERT(guest_data_prototype, + "Failed to allocate buffer for guest data pattern"); + memset(guest_data_prototype, 0xAB, host_page_size); + + vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); if (use_uffd) { - /* Set up user fault fd to handle demand paging requests. */ - r = pipe2(pipefd, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!r, "Failed to set up pipefd"); + uffd_handler_threads = + malloc(vcpus * sizeof(*uffd_handler_threads)); + TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - r = setup_demand_paging(vm, &uffd_handler_thread, pipefd[0], - uffd_delay); - if (r < 0) - exit(-r); + uffd_args = malloc(vcpus * sizeof(*uffd_args)); + TEST_ASSERT(uffd_args, "Memory allocation failed"); + + pipefds = malloc(sizeof(int) * vcpus * 2); + TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); } + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + + if (use_uffd) { + /* + * Set up user fault fd to handle demand paging + * requests. + */ + r = pipe2(&pipefds[vcpu_id * 2], + O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!r, "Failed to set up pipefd"); + + r = setup_demand_paging(vm, + &uffd_handler_threads[vcpu_id], + pipefds[vcpu_id * 2], + uffd_delay, &uffd_args[vcpu_id], + vcpu_hva, vcpu_memory_bytes); + if (r < 0) + exit(-r); + } + #ifdef __x86_64__ - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); -#endif -#ifdef __aarch64__ - ucall_init(vm, NULL); + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); #endif + vcpu_args[vcpu_id].vm = vm; + vcpu_args[vcpu_id].vcpu_id = vcpu_id; + vcpu_args[vcpu_id].gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; + } + /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); - - vcpu_args.vm = vm; - vcpu_args.vcpu_id = VCPU_ID; - vcpu_args.gva = guest_test_virt_mem; - vcpu_args.pages = guest_num_pages; sync_global_to_guest(vm, vcpu_args); - pthread_create(&vcpu_thread, NULL, vcpu_worker, &vcpu_args); - /* Wait for the vcpu thread to quit */ - pthread_join(vcpu_thread, NULL); + DEBUG("Finished creating vCPUs and starting uffd threads\n"); + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &vcpu_args[vcpu_id]); + } + + DEBUG("Started all vCPUs\n"); + + /* Wait for the vcpu threads to quit */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + pthread_join(vcpu_threads[vcpu_id], NULL); + PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); + } + + DEBUG("All vCPU threads joined\n"); if (use_uffd) { char c; - /* Tell the user fault fd handler thread to quit */ - r = write(pipefd[1], &c, 1); - TEST_ASSERT(r == 1, "Unable to write to pipefd"); + /* Tell the user fault fd handler threads to quit */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + r = write(pipefds[vcpu_id * 2 + 1], &c, 1); + TEST_ASSERT(r == 1, "Unable to write to pipefd"); - pthread_join(uffd_handler_thread, NULL); + pthread_join(uffd_handler_threads[vcpu_id], NULL); + } } ucall_uninit(vm); kvm_vm_free(vm); free(guest_data_prototype); + free(vcpu_threads); + if (use_uffd) { + free(uffd_handler_threads); + free(uffd_args); + free(pipefds); + } } struct guest_mode { @@ -428,7 +506,7 @@ static void help(char *name) puts(""); printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" - " [-b memory]\n", name); + " [-b memory] [-v vcpus]\n", name); printf(" -m: specify the guest mode ID to test\n" " (default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -443,7 +521,9 @@ static void help(char *name) " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); printf(" -b: specify the size of the memory region which should be\n" - " demand paged. e.g. 10M or 3G. Default: 1G\n"); + " demand paged by each vCPU. e.g. 10M or 3G.\n" + " Default: 1G\n"); + printf(" -v: specify the number of vCPUs to run.\n"); puts(""); exit(0); } @@ -451,7 +531,8 @@ static void help(char *name) int main(int argc, char *argv[]) { bool mode_selected = false; - uint64_t guest_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; + uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; + int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -478,7 +559,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hm:ud:b:")) != -1) { + while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { switch (opt) { case 'm': if (!mode_selected) { @@ -500,7 +581,15 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - guest_memory_bytes = parse_size(optarg); + vcpu_memory_bytes = parse_size(optarg); + break; + case 'v': + vcpus = atoi(optarg); + TEST_ASSERT(vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); break; case 'h': default: @@ -515,7 +604,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, guest_memory_bytes); + run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); } return 0; -- cgit v1.2.3 From f09205b99832f353088b7c82778b3f8175627620 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 23 Jan 2020 10:04:34 -0800 Subject: KVM: selftests: Time guest demand paging In order to quantify demand paging performance, time guest execution during demand paging. Signed-off-by: Ben Gardon [Move timespec-diff to test_util.h] Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 50 +++++++++++++++++++++++- tools/testing/selftests/kvm/include/test_util.h | 3 ++ tools/testing/selftests/kvm/lib/test_util.c | 20 ++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index c516cece2368..8cdb8871e4d8 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -35,6 +35,12 @@ #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ +#ifdef PRINT_PER_PAGE_UPDATES +#define PER_PAGE_DEBUG(...) DEBUG(__VA_ARGS__) +#else +#define PER_PAGE_DEBUG(...) +#endif + #ifdef PRINT_PER_VCPU_UPDATES #define PER_VCPU_DEBUG(...) DEBUG(__VA_ARGS__) #else @@ -111,10 +117,14 @@ static void *vcpu_worker(void *data) struct kvm_vm *vm = args->vm; int vcpu_id = args->vcpu_id; struct kvm_run *run; + struct timespec start; + struct timespec end; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); + clock_gettime(CLOCK_MONOTONIC, &start); + /* Let the guest access its memory */ ret = _vcpu_run(vm, vcpu_id); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); @@ -124,6 +134,11 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } + clock_gettime(CLOCK_MONOTONIC, &end); + PER_VCPU_DEBUG("vCPU %d execution time: %lld.%.9lds\n", vcpu_id, + (long long)(timespec_diff(start, end).tv_sec), + timespec_diff(start, end).tv_nsec); + return NULL; } @@ -161,6 +176,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; + struct timespec start; + struct timespec end; struct uffdio_copy copy; int r; @@ -171,6 +188,8 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.len = host_page_size; copy.mode = 0; + clock_gettime(CLOCK_MONOTONIC, &start); + r = ioctl(uffd, UFFDIO_COPY, ©); if (r == -1) { DEBUG("Failed Paged in 0x%lx from thread %d with errno: %d\n", @@ -178,6 +197,13 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } + clock_gettime(CLOCK_MONOTONIC, &end); + + PER_PAGE_DEBUG("UFFDIO_COPY %d \t%lld ns\n", tid, + (long long)timespec_to_ns(timespec_diff(start, end))); + PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", + host_page_size, addr, tid); + return 0; } @@ -196,7 +222,10 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; + struct timespec start; + struct timespec end; + clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { struct uffd_msg msg; struct pollfd pollfd[2]; @@ -264,6 +293,13 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } + clock_gettime(CLOCK_MONOTONIC, &end); + PER_VCPU_DEBUG("userfaulted %ld pages over %lld.%.9lds. (%f/sec)\n", + pages, (long long)(timespec_diff(start, end).tv_sec), + timespec_diff(start, end).tv_nsec, pages / + ((double)timespec_diff(start, end).tv_sec + + (double)timespec_diff(start, end).tv_nsec / 100000000.0)); + return NULL; } @@ -328,6 +364,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, uint64_t guest_num_pages; int vcpu_id; int r; + struct timespec start; + struct timespec end; vm = create_vm(mode, vcpus, vcpu_memory_bytes); @@ -369,7 +407,6 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - /* Add an extra memory slot for testing demand paging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -451,6 +488,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, DEBUG("Finished creating vCPUs and starting uffd threads\n"); + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, &vcpu_args[vcpu_id]); @@ -466,6 +505,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, DEBUG("All vCPU threads joined\n"); + clock_gettime(CLOCK_MONOTONIC, &end); + if (use_uffd) { char c; @@ -478,6 +519,13 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } + DEBUG("Total guest execution time: %lld.%.9lds\n", + (long long)(timespec_diff(start, end).tv_sec), + timespec_diff(start, end).tv_nsec); + DEBUG("Overall demand paging rate: %f pgs/sec\n", + guest_num_pages / ((double)timespec_diff(start, end).tv_sec + + (double)timespec_diff(start, end).tv_nsec / 100000000.0)); + ucall_uninit(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index e696c8219d69..920328ca5f7e 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -41,4 +41,7 @@ void test_assert(bool exp, const char *exp_str, size_t parse_size(const char *size); +int64_t timespec_to_ns(struct timespec ts); +struct timespec timespec_diff(struct timespec start, struct timespec end); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index cbd7f51b07a1..1c0d45afdf36 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -49,3 +49,23 @@ size_t parse_size(const char *size) return base << shift; } + +int64_t timespec_to_ns(struct timespec ts) +{ + return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec; +} + +struct timespec timespec_diff(struct timespec start, struct timespec end) +{ + struct timespec temp; + + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = 1000000000LL + end.tv_nsec - start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + + return temp; +} -- cgit v1.2.3 From 3439d886e4d9b79b6b226e70c08d312bd31acbd4 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:16 +0100 Subject: KVM: selftests: Rework debug message printing There were a few problems with the way we output "debug" messages. The first is that we used DEBUG() which is defined when NDEBUG is not defined, but NDEBUG will never be defined for kselftests because it relies too much on assert(). The next is that most of the DEBUG() messages were actually "info" messages, which users may want to turn off if they just want a silent test that either completes or asserts. Finally, a debug message output from a library function, and thus for all tests, was annoying when its information wasn't interesting for a test. Rework these messages so debug messages only output when DEBUG is defined and info messages output unless QUIET is defined. Also name the functions pr_debug and pr_info and make sure that when they're disabled we eat all the inputs. The later avoids unused variable warnings when the variables were only defined for the purpose of printing. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 54 +++++++++++----------- tools/testing/selftests/kvm/dirty_log_test.c | 16 ++++--- tools/testing/selftests/kvm/include/kvm_util.h | 6 --- tools/testing/selftests/kvm/include/test_util.h | 13 ++++++ .../testing/selftests/kvm/lib/aarch64/processor.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 7 +-- 6 files changed, 54 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 8cdb8871e4d8..c1e326d3ed7f 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -36,15 +36,15 @@ #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ #ifdef PRINT_PER_PAGE_UPDATES -#define PER_PAGE_DEBUG(...) DEBUG(__VA_ARGS__) +#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else -#define PER_PAGE_DEBUG(...) +#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) #endif #ifdef PRINT_PER_VCPU_UPDATES -#define PER_VCPU_DEBUG(...) DEBUG(__VA_ARGS__) +#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) #else -#define PER_VCPU_DEBUG(...) +#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif #define MAX_VCPUS 512 @@ -165,6 +165,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, PTES_PER_4K_PT; pages = vm_adjust_num_guest_pages(mode, pages); + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + vm = _vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ @@ -192,8 +194,8 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) r = ioctl(uffd, UFFDIO_COPY, ©); if (r == -1) { - DEBUG("Failed Paged in 0x%lx from thread %d with errno: %d\n", - addr, tid, errno); + pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n", + addr, tid, errno); return r; } @@ -241,19 +243,19 @@ static void *uffd_handler_thread_fn(void *arg) r = poll(pollfd, 2, -1); switch (r) { case -1: - DEBUG("poll err"); + pr_info("poll err"); continue; case 0: continue; case 1: break; default: - DEBUG("Polling uffd returned %d", r); + pr_info("Polling uffd returned %d", r); return NULL; } if (pollfd[0].revents & POLLERR) { - DEBUG("uffd revents has POLLERR"); + pr_info("uffd revents has POLLERR"); return NULL; } @@ -271,13 +273,12 @@ static void *uffd_handler_thread_fn(void *arg) if (r == -1) { if (errno == EAGAIN) continue; - DEBUG("Read of uffd gor errno %d", errno); + pr_info("Read of uffd gor errno %d", errno); return NULL; } if (r != sizeof(msg)) { - DEBUG("Read on uffd returned unexpected size: %d bytes", - r); + pr_info("Read on uffd returned unexpected size: %d bytes", r); return NULL; } @@ -315,14 +316,14 @@ static int setup_demand_paging(struct kvm_vm *vm, uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1) { - DEBUG("uffd creation failed\n"); + pr_info("uffd creation failed\n"); return -1; } uffdio_api.api = UFFD_API; uffdio_api.features = 0; if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { - DEBUG("ioctl uffdio_api failed\n"); + pr_info("ioctl uffdio_api failed\n"); return -1; } @@ -330,13 +331,13 @@ static int setup_demand_paging(struct kvm_vm *vm, uffdio_register.range.len = len; uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { - DEBUG("ioctl uffdio_register failed\n"); + pr_info("ioctl uffdio_register failed\n"); return -1; } if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != UFFD_API_RANGE_IOCTLS) { - DEBUG("unexpected userfaultfd ioctl set\n"); + pr_info("unexpected userfaultfd ioctl set\n"); return -1; } @@ -404,8 +405,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, guest_test_phys_mem &= ~((1 << 20) - 1); #endif - DEBUG("guest physical test memory offset: 0x%lx\n", - guest_test_phys_mem); + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); /* Add an extra memory slot for testing demand paging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -486,7 +486,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, sync_global_to_guest(vm, guest_page_size); sync_global_to_guest(vm, vcpu_args); - DEBUG("Finished creating vCPUs and starting uffd threads\n"); + pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); @@ -495,7 +495,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &vcpu_args[vcpu_id]); } - DEBUG("Started all vCPUs\n"); + pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { @@ -503,7 +503,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - DEBUG("All vCPU threads joined\n"); + pr_info("All vCPU threads joined\n"); clock_gettime(CLOCK_MONOTONIC, &end); @@ -519,12 +519,12 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - DEBUG("Total guest execution time: %lld.%.9lds\n", - (long long)(timespec_diff(start, end).tv_sec), - timespec_diff(start, end).tv_nsec); - DEBUG("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)timespec_diff(start, end).tv_sec + - (double)timespec_diff(start, end).tv_nsec / 100000000.0)); + pr_info("Total guest execution time: %lld.%.9lds\n", + (long long)(timespec_diff(start, end).tv_sec), + timespec_diff(start, end).tv_nsec); + pr_info("Overall demand paging rate: %f pgs/sec\n", + guest_num_pages / ((double)timespec_diff(start, end).tv_sec + + (double)timespec_diff(start, end).tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index edc5c071bf02..a723333b138a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -173,7 +173,7 @@ static void *vcpu_worker(void *data) } } - DEBUG("Dirtied %"PRIu64" pages\n", pages_count); + pr_info("Dirtied %"PRIu64" pages\n", pages_count); return NULL; } @@ -251,6 +251,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, struct kvm_vm *vm; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ @@ -310,7 +312,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, guest_test_phys_mem &= ~((1 << 20) - 1); #endif - DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); @@ -375,9 +377,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_quit = true; pthread_join(vcpu_thread, NULL); - DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " + "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, + host_track_next_count); free(bmap); free(host_bmap_track); @@ -491,8 +493,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); TEST_ASSERT(interval > 0, "Interval must be greater than zero"); - DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", + iterations, interval); srandom(time(0)); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 1dc13bfa88b7..bc7c67913fe0 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -24,12 +24,6 @@ struct kvm_vm; typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ -#ifndef NDEBUG -#define DEBUG(...) printf(__VA_ARGS__); -#else -#define DEBUG(...) -#endif - /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 920328ca5f7e..c921ea719ae0 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -19,6 +19,19 @@ #include #include "kselftest.h" +static inline int _no_printf(const char *format, ...) { return 0; } + +#ifdef DEBUG +#define pr_debug(...) printf(__VA_ARGS__) +#else +#define pr_debug(...) _no_printf(__VA_ARGS__) +#endif +#ifndef QUIET +#define pr_info(...) printf(__VA_ARGS__) +#else +#define pr_info(...) _no_printf(__VA_ARGS__) +#endif + ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); int test_seq_read(const char *path, char **bufp, size_t *sizep); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 216d802479dd..ba2ff3241781 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -186,7 +186,7 @@ unmapped_gva: static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) { -#ifdef DEBUG_VM +#ifdef DEBUG static const char * const type[] = { "", "pud", "pmd", "pte" }; uint64_t pte, *ptep; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 67f5dc9a6a32..9e784c5ccc0a 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -155,7 +155,8 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; - DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__, + vm_guest_mode_string(mode), phy_pages, perm); vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -193,8 +194,8 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); TEST_ASSERT(vm->va_bits == 48, "Linear address width " "(%d bits) not supported", vm->va_bits); - DEBUG("Guest physical address width detected: %d\n", - vm->pa_bits); + pr_debug("Guest physical address width detected: %d\n", + vm->pa_bits); vm->pgtable_levels = 4; #else TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " -- cgit v1.2.3 From 244c6b6df99b7a7ce3c9997858c3c8fd3c800421 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 14 Feb 2020 15:59:17 +0100 Subject: KVM: selftests: Convert some printf's to pr_info's We leave some printf's because they inform the user the test is being skipped. QUIET should not disable those. We also leave the printf's used for help text. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/kvm_create_max_vcpus.c | 8 ++++---- tools/testing/selftests/kvm/s390x/resets.c | 6 +++--- tools/testing/selftests/kvm/x86_64/mmio_warning_test.c | 2 +- tools/testing/selftests/kvm/x86_64/smm_test.c | 2 +- tools/testing/selftests/kvm/x86_64/state_test.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/xss_msr_test.c | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 6f38c3dc0d56..0299cd81b8ba 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -24,8 +24,8 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus) struct kvm_vm *vm; int i; - printf("Testing creating %d vCPUs, with IDs %d...%d.\n", - num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); + pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n", + num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); @@ -41,8 +41,8 @@ int main(int argc, char *argv[]) int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); - printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); + pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); + pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 1485bc6c8999..c59db2c95e9e 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -134,7 +134,7 @@ static void inject_irq(int cpu_id) static void test_normal(void) { - printf("Testing normal reset\n"); + pr_info("Testing normal reset\n"); /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code_initial); run = vcpu_state(vm, VCPU_ID); @@ -151,7 +151,7 @@ static void test_normal(void) static void test_initial(void) { - printf("Testing initial reset\n"); + pr_info("Testing initial reset\n"); vm = vm_create_default(VCPU_ID, 0, guest_code_initial); run = vcpu_state(vm, VCPU_ID); regs = &run->s.regs; @@ -168,7 +168,7 @@ static void test_initial(void) static void test_clear(void) { - printf("Testing clear reset\n"); + pr_info("Testing clear reset\n"); vm = vm_create_default(VCPU_ID, 0, guest_code_initial); run = vcpu_state(vm, VCPU_ID); regs = &run->s.regs; diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index 00bb97d76000..5350c2d6f736 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -44,7 +44,7 @@ void *thr(void *arg) struct kvm_run *run = tc->run; res = ioctl(kvmcpu, KVM_RUN, 0); - printf("ret1=%d exit_reason=%d suberror=%d\n", + pr_info("ret1=%d exit_reason=%d suberror=%d\n", res, run->exit_reason, run->internal.suberror); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 8c063646f2a0..8230b6bc6b8f 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); } else { - printf("will skip SMM test with VMX enabled\n"); + pr_info("will skip SMM test with VMX enabled\n"); vcpu_args_set(vm, VCPU_ID, 1, 0); } diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 3ab5ec3da9f4..9d2daffd6110 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]) vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); } else { - printf("will skip nested state checks\n"); + pr_info("will skip nested state checks\n"); vcpu_args_set(vm, VCPU_ID, 1, 0); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 69e482a95c47..64f7cb81f28d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -121,8 +121,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) static void report(int64_t val) { - printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", - val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); + pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); } int main(int argc, char *argv[]) diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index 851ea81b9d9f..fc8328d8d5b0 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES); } if (!xss_supported) { - printf("IA32_XSS is not supported by the vCPU.\n"); + printf("IA32_XSS is not supported by the vCPU, skipping test\n"); exit(KSFT_SKIP); } -- cgit v1.2.3 From 13e48aa9429d1be05ecf8b9eefb212ac58f3f704 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:33 -0800 Subject: KVM: selftests: Add test for KVM_SET_USER_MEMORY_REGION Add a KVM selftest to test moving the base gfn of a userspace memory region. Although the basic concept of moving memory regions is not x86 specific, the assumptions regarding large pages and MMIO shenanigans used to verify the correctness make this x86_64 only for the time being. Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 30 +++++ .../selftests/kvm/x86_64/set_memory_region_test.c | 142 +++++++++++++++++++++ 5 files changed, 175 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/set_memory_region_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 9619d96e15c4..0abf0a8f00d5 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,6 +5,7 @@ /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test +/x86_64/set_memory_region_test /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 1bda24d30b3a..a871184ebb6d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -17,6 +17,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bc7c67913fe0..707b44805149 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -94,6 +94,7 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, uint32_t data_memslot, uint32_t pgd_memslot); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9e784c5ccc0a..69a28a9211b4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -759,6 +759,36 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } +/* + * VM Memory Region Move + * + * Input Args: + * vm - Virtual Machine + * slot - Slot of the memory region to move + * flags - Starting guest physical address + * + * Output Args: None + * + * Return: None + * + * Change the gpa of a memory region. + */ +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) +{ + struct userspace_mem_region *region; + int ret; + + region = memslot2region(vm, slot); + + region->region.guest_phys_addr = new_gpa; + + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" + "ret: %i errno: %i slot: %u flags: 0x%x", + ret, errno, slot, new_gpa); +} + /* * VCPU mmap Size * diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c new file mode 100644 index 000000000000..125aeab59ab6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#define VCPU_ID 0 + +/* + * Somewhat arbitrary location and slot, intended to not overlap anything. The + * location and size are specifically 2mb sized/aligned so that the initial + * region corresponds to exactly one large page. + */ +#define MEM_REGION_GPA 0xc0000000 +#define MEM_REGION_SIZE 0x200000 +#define MEM_REGION_SLOT 10 + +static void guest_code(void) +{ + uint64_t val; + + do { + val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA)); + } while (!val); + + if (val != 1) + ucall(UCALL_ABORT, 1, val); + + GUEST_DONE(); +} + +static void *vcpu_worker(void *data) +{ + struct kvm_vm *vm = data; + struct kvm_run *run; + struct ucall uc; + uint64_t cmd; + + /* + * Loop until the guest is done. Re-enter the guest on all MMIO exits, + * which will occur if the guest attempts to access a memslot while it + * is being moved. + */ + run = vcpu_state(vm, VCPU_ID); + do { + vcpu_run(vm, VCPU_ID); + } while (run->exit_reason == KVM_EXIT_MMIO); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason = %d", run->exit_reason); + + cmd = get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %llu", + uc.args[0]); + return NULL; +} + +static void test_move_memory_region(void) +{ + pthread_t vcpu_thread; + struct kvm_vm *vm; + uint64_t *hva; + uint64_t gpa; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP, + MEM_REGION_GPA, MEM_REGION_SLOT, + MEM_REGION_SIZE / getpagesize(), 0); + + /* + * Allocate and map two pages so that the GPA accessed by guest_code() + * stays valid across the memslot move. + */ + gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2 * 4096, 0); + + /* Ditto for the host mapping so that both pages can be zeroed. */ + hva = addr_gpa2hva(vm, MEM_REGION_GPA); + memset(hva, 0, 2 * 4096); + + pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); + + /* Ensure the guest thread is spun up. */ + usleep(100000); + + /* + * Shift the region's base GPA. The guest should not see "2" as the + * hva->gpa translation is misaligned, i.e. the guest is accessing a + * different host pfn. + */ + vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096); + WRITE_ONCE(*hva, 2); + + usleep(100000); + + /* + * Note, value in memory needs to be changed *before* restoring the + * memslot, else the guest could race the update and see "2". + */ + WRITE_ONCE(*hva, 1); + + /* Restore the original base, the guest should see "1". */ + vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA); + + pthread_join(vcpu_thread, NULL); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + int i, loops; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + if (argc > 1) + loops = atoi(argv[1]); + else + loops = 10; + + for (i = 0; i < loops; i++) + test_move_memory_region(); + + return 0; +} -- cgit v1.2.3 From 23581ea8ceffb3d2896325e273b4708c3aeae375 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 24 Feb 2020 17:10:49 +0100 Subject: KVM: selftests: Fix unknown ucall command asserts The TEST_ASSERT in x86_64/platform_info_test.c would have print 'ucall' instead of 'uc.cmd'. Also fix all uc.cmd format types. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 2 +- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 2 +- tools/testing/selftests/kvm/x86_64/platform_info_test.c | 3 +-- tools/testing/selftests/kvm/x86_64/state_test.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 2 +- 7 files changed, 7 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 63cc9c3f5ab6..003d1422705a 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); } } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 92915e6408e7..185226c39c03 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); } /* UCALL_SYNC is handled here. */ diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index f9334bd3cce9..54a960ff63aa 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -58,8 +58,7 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm) exit_reason_str(run->exit_reason)); get_ucall(vm, VCPU_ID, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %u\n", - ucall); + "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd); TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == MSR_PLATFORM_INFO_MAX_TURBO_RATIO, "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 9d2daffd6110..164774206170 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -160,7 +160,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); } /* UCALL_SYNC is handled here. */ diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 5dfb53546a26..cc17a3d67e1f 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(false, "%s", (const char *)uc.args[0]); /* NOT REACHED */ default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); } } } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index a223a6401258..fe0734d9ef75 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]) done = true; break; default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); } } } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 64f7cb81f28d..5f46ffeedbf0 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -158,7 +158,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); } } -- cgit v1.2.3 From e743664bea8e04d2735b958fe912e7cce3ab350f Mon Sep 17 00:00:00 2001 From: Jay Zhou Date: Tue, 3 Mar 2020 16:07:10 +0800 Subject: kvm: selftests: Support dirty log initial-all-set test Since the new capability KVM_DIRTY_LOG_INITIALLY_SET of KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 has been introduced, tweak the clear_dirty_log_test to use it. Signed-off-by: Jay Zhou Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/clear_dirty_log_test.c | 4 ++++ tools/testing/selftests/kvm/dirty_log_test.c | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c index 749336937d37..11672ec6f74e 100644 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c @@ -1,2 +1,6 @@ #define USE_CLEAR_DIRTY_LOG +#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) +#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) #include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index a723333b138a..518a94a7a8b5 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -265,6 +265,10 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -321,7 +325,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_enable_cap cap = {}; cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = 1; + cap.args[0] = dirty_log_manual_caps; vm_enable_cap(vm, &cap); #endif @@ -433,10 +437,15 @@ int main(int argc, char *argv[]) int opt, i; #ifdef USE_CLEAR_DIRTY_LOG - if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) { - fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n"); + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, " + "skipping tests\n"); exit(KSFT_SKIP); } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); #endif #ifdef __x86_64__ -- cgit v1.2.3 From 331b4de9a7e780f9648ced959c08f4d593aa2e7b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 12 Mar 2020 11:40:55 +0100 Subject: KVM: selftests: s390x: Provide additional num-guest-pages adjustment s390 requires 1M aligned guest sizes. Embedding the rounding in vm_adjust_num_guest_pages() allows us to remove it from a few other places. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 4 ---- tools/testing/selftests/kvm/dirty_log_test.c | 5 +---- tools/testing/selftests/kvm/include/kvm_util.h | 8 +++++++- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index c1e326d3ed7f..ae086c5dc118 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -378,10 +378,6 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); -#ifdef __s390x__ - /* Round up to multiple of 1M (segment size) */ - guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; -#endif /* * If there should be more memory in the guest test region than there * can be pages in the guest, it will definitely cause problems. diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 518a94a7a8b5..8a79f5d6b979 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -296,10 +296,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm_get_page_shift(vm))) + 3; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); -#ifdef __s390x__ - /* Round up to multiple of 1M (segment size) */ - guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; -#endif + host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 707b44805149..ade5a40afbee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -164,7 +164,13 @@ unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_p static inline unsigned int vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) { - return vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); + unsigned int n; + n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +#ifdef __s390x__ + /* s390 requires 1M aligned guest sizes */ + n = (n + 255) & ~255; +#endif + return n; } struct kvm_userspace_memory_region * -- cgit v1.2.3 From 1914f624f5e3adc0493c60b25a861d3c9235fc87 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 10 Mar 2020 10:15:53 +0100 Subject: selftests: KVM: SVM: Add vmcall test to gitignore Add svm_vmcall_test to gitignore list, and realphabetize it. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 0abf0a8f00d5..8bc104d39e78 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,5 +1,5 @@ -/s390x/sync_regs_test /s390x/memop +/s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test /x86_64/evmcs_test /x86_64/hyperv_cpuid @@ -9,6 +9,7 @@ /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test +/x86_64/svm_vmcall_test /x86_64/sync_regs_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test @@ -16,6 +17,6 @@ /x86_64/vmx_tsc_adjust_test /x86_64/xss_msr_test /clear_dirty_log_test +/demand_paging_test /dirty_log_test /kvm_create_max_vcpus -/demand_paging_test -- cgit v1.2.3 From 425936246fbe11728ebd787e9199734f3edc2df4 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 10 Mar 2020 10:15:54 +0100 Subject: KVM: selftests: Share common API documentation Move function documentation comment blocks to the header files in order to avoid duplicating them for each architecture. While at it clean up and fix up the comment blocks. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 100 ++++++++++- .../testing/selftests/kvm/lib/aarch64/processor.c | 17 -- .../testing/selftests/kvm/lib/kvm_util_internal.h | 48 +++++ tools/testing/selftests/kvm/lib/s390x/processor.c | 74 -------- tools/testing/selftests/kvm/lib/x86_64/processor.c | 196 +++++---------------- 5 files changed, 187 insertions(+), 248 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index ade5a40afbee..24f7a93671a2 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,7 +16,8 @@ #include "sparsebit.h" -/* Callers of kvm_util only have an incomplete/opaque description of the +/* + * Callers of kvm_util only have an incomplete/opaque description of the * structure kvm_util is using to maintain the state of a VM. */ struct kvm_vm; @@ -78,6 +79,23 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, uint32_t data_memslot, uint32_t pgd_memslot); void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +/* + * VM VCPU Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the current state of the VCPU specified by @vcpuid, within the VM + * given by @vm, to the FILE stream given by @stream. + */ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent); @@ -103,6 +121,22 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); @@ -113,7 +147,27 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); + +/* + * VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first @num function input registers of the VCPU with @vcpuid, + * per the C calling convention of the architecture, to the values given + * as variable args. Each of the variable args is expected to be of type + * uint64_t. The maximum @num can be is specific to the architecture. + */ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); + void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, @@ -142,15 +196,57 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, const char *exit_reason_str(unsigned int exit_reason); void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot); + uint32_t memslot); + vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, vm_paddr_t paddr_min, uint32_t memslot); +/* + * Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * extra_mem_pages - The size of extra memories to add (this will + * decide how much extra space we will need to + * setup the page tables using memslot 0) + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size, void *guest_code); + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index ba2ff3241781..f84270f0e32c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -334,23 +334,6 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code); } - -/* VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first num function input arguments to the values - * given as variable args. Each of the variable args is expected to - * be of type uint64_t. The registers set by this function are r0-r7. - */ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2fce6750b8b3..ca56a0133127 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -53,8 +53,56 @@ struct kvm_vm { }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); + +/* + * Virtual Translation Tables Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps to the FILE stream given by @stream, the contents of all the + * virtual translation tables for the VM given by @vm. + */ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +/* + * Register Dump + * + * Input Args: + * stream - Output FILE stream + * regs - Registers + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the state of the registers given by @regs, to the FILE stream + * given by @stream. + */ void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); + +/* + * System Register Dump + * + * Input Args: + * stream - Output FILE stream + * sregs - System registers + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the state of the system registers given by @sregs, to the FILE stream + * given by @stream. + */ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); struct userspace_mem_region * diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a0b84235c848..8d94961bd046 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -51,22 +51,6 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -/* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * gva - VM Virtual Address - * gpa - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a virtual translation for the page - * starting at vaddr to the page starting at paddr. - */ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, uint32_t memslot) { @@ -107,26 +91,6 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, entry[idx] = gpa; } -/* - * Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gpa - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Translates the VM virtual address given by gva to a VM physical - * address and then locates the memory region containing the VM - * physical address, within the VM given by vm. When found, the host - * virtual address providing the memory to the vm physical address is - * returned. - * A TEST_ASSERT failure occurs if no region containing translated - * VM virtual address exists. - */ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { int ri, idx; @@ -196,21 +160,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) virt_dump_region(stream, vm, indent, vm->pgd); } -/* - * Create a VM with reasonable defaults - * - * Input Args: - * vcpuid - The id of the single VCPU to add to the VM. - * extra_mem_pages - The size of extra memories to add (this will - * decide how much extra space we will need to - * setup the page tables using mem slot 0) - * guest_code - The vCPU's entry point - * - * Output Args: None - * - * Return: - * Pointer to opaque structure that describes the created VM. - */ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, void *guest_code) { @@ -231,13 +180,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, return vm; } -/* - * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW) - * - * Input Args: - * vcpuid - The id of the VCPU to add to the VM. - * guest_code - The vCPU's entry point - */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); @@ -269,22 +211,6 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) run->psw_addr = (uintptr_t)guest_code; } -/* VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first num function input arguments to the values - * given as variable args. Each of the variable args is expected to - * be of type uint64_t. The registers set by this function are r2-r6. - */ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 683d3bdb8f6a..7ce067c8a05d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -77,20 +77,6 @@ struct pageTableEntry { uint64_t execute_disable:1; }; -/* Register Dump - * - * Input Args: - * indent - Left margin indent amount - * regs - register - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the state of the registers given by regs, to the FILE stream - * given by steam. - */ void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -115,19 +101,20 @@ void regs_dump(FILE *stream, struct kvm_regs *regs, regs->rip, regs->rflags); } -/* Segment Dump +/* + * Segment Dump * * Input Args: - * indent - Left margin indent amount + * stream - Output FILE stream * segment - KVM segment + * indent - Left margin indent amount * - * Output Args: - * stream - Output FILE stream + * Output Args: None * * Return: None * - * Dumps the state of the KVM segment given by segment, to the FILE stream - * given by steam. + * Dumps the state of the KVM segment given by @segment, to the FILE stream + * given by @stream. */ static void segment_dump(FILE *stream, struct kvm_segment *segment, uint8_t indent) @@ -146,19 +133,20 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment, segment->unusable, segment->padding); } -/* dtable Dump +/* + * dtable Dump * * Input Args: - * indent - Left margin indent amount + * stream - Output FILE stream * dtable - KVM dtable + * indent - Left margin indent amount * - * Output Args: - * stream - Output FILE stream + * Output Args: None * * Return: None * - * Dumps the state of the KVM dtable given by dtable, to the FILE stream - * given by steam. + * Dumps the state of the KVM dtable given by @dtable, to the FILE stream + * given by @stream. */ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, uint8_t indent) @@ -169,20 +157,6 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, dtable->padding[0], dtable->padding[1], dtable->padding[2]); } -/* System Register Dump - * - * Input Args: - * indent - Left margin indent amount - * sregs - System registers - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the state of the system registers given by sregs, to the FILE stream - * given by steam. - */ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) { @@ -240,21 +214,6 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) } } -/* VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * pgd_memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a virtual translation for the page - * starting at vaddr to the page starting at paddr. - */ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint32_t pgd_memslot) { @@ -326,20 +285,6 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, pte[index[0]].present = 1; } -/* Virtual Translation Tables Dump - * - * Input Args: - * vm - Virtual Machine - * indent - Left margin indent amount - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps to the FILE stream given by stream, the contents of all the - * virtual translation tables for the VM given by vm. - */ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { struct pageMapL4Entry *pml4e, *pml4e_start; @@ -421,7 +366,8 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } -/* Set Unusable Segment +/* + * Set Unusable Segment * * Input Args: None * @@ -430,7 +376,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) * * Return: None * - * Sets the segment register pointed to by segp to an unusable state. + * Sets the segment register pointed to by @segp to an unusable state. */ static void kvm_seg_set_unusable(struct kvm_segment *segp) { @@ -460,7 +406,8 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) } -/* Set Long Mode Flat Kernel Code Segment +/* + * Set Long Mode Flat Kernel Code Segment * * Input Args: * vm - VM whose GDT is being filled, or NULL to only write segp @@ -471,8 +418,8 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) * * Return: None * - * Sets up the KVM segment pointed to by segp, to be a code segment - * with the selector value given by selector. + * Sets up the KVM segment pointed to by @segp, to be a code segment + * with the selector value given by @selector. */ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, struct kvm_segment *segp) @@ -491,7 +438,8 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, kvm_seg_fill_gdt_64bit(vm, segp); } -/* Set Long Mode Flat Kernel Data Segment +/* + * Set Long Mode Flat Kernel Data Segment * * Input Args: * vm - VM whose GDT is being filled, or NULL to only write segp @@ -502,8 +450,8 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, * * Return: None * - * Sets up the KVM segment pointed to by segp, to be a data segment - * with the selector value given by selector. + * Sets up the KVM segment pointed to by @segp, to be a data segment + * with the selector value given by @selector. */ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, struct kvm_segment *segp) @@ -521,24 +469,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, kvm_seg_fill_gdt_64bit(vm, segp); } -/* Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gpa - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Translates the VM virtual address given by gva to a VM physical - * address and then locates the memory region containing the VM - * physical address, within the VM given by vm. When found, the host - * virtual address providing the memory to the vm physical address is returned. - * A TEST_ASSERT failure occurs if no region containing translated - * VM virtual address exists. - */ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint16_t index[4]; @@ -640,12 +570,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.cr3 = vm->pgd; vcpu_sregs_set(vm, vcpuid, &sregs); } -/* Adds a vCPU with reasonable defaults (i.e., a stack) - * - * Input Args: - * vcpuid - The id of the VCPU to add to the VM. - * guest_code - The vCPU's entry point - */ + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { struct kvm_mp_state mp_state; @@ -670,7 +595,8 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) vcpu_set_mp_state(vm, vcpuid, &mp_state); } -/* Allocate an instance of struct kvm_cpuid2 +/* + * Allocate an instance of struct kvm_cpuid2 * * Input Args: None * @@ -703,7 +629,8 @@ static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) return cpuid; } -/* KVM Supported CPUID Get +/* + * KVM Supported CPUID Get * * Input Args: None * @@ -735,11 +662,12 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return cpuid; } -/* Locate a cpuid entry. +/* + * Locate a cpuid entry. * * Input Args: - * cpuid: The cpuid. * function: The function of the cpuid entry to find. + * index: The index of the cpuid entry. * * Output Args: None * @@ -766,7 +694,8 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) return entry; } -/* VM VCPU CPUID Set +/* + * VM VCPU CPUID Set * * Input Args: * vm - Virtual Machine @@ -793,20 +722,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm, } -/* Create a VM with reasonable defaults - * - * Input Args: - * vcpuid - The id of the single VCPU to add to the VM. - * extra_mem_pages - The size of extra memories to add (this will - * decide how much extra space we will need to - * setup the page tables using mem slot 0) - * guest_code - The vCPU's entry point - * - * Output Args: None - * - * Return: - * Pointer to opaque structure that describes the created VM. - */ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, void *guest_code) { @@ -837,7 +752,8 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, return vm; } -/* VCPU Get MSR +/* + * VCPU Get MSR * * Input Args: * vm - Virtual Machine @@ -869,7 +785,8 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) return buffer.entry.data; } -/* _VCPU Set MSR +/* + * _VCPU Set MSR * * Input Args: * vm - Virtual Machine @@ -902,7 +819,8 @@ int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, return r; } -/* VCPU Set MSR +/* + * VCPU Set MSR * * Input Args: * vm - Virtual Machine @@ -926,22 +844,6 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, " rc: %i errno: %i", r, errno); } -/* VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first num function input arguments to the values - * given as variable args. Each of the variable args is expected to - * be of type uint64_t. - */ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) { va_list ap; @@ -976,22 +878,6 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } -/* - * VM VCPU Dump - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * indent - Left margin indent amount - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the current state of the VCPU specified by vcpuid, within the VM - * given by vm, to the FILE stream given by stream. - */ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) { struct kvm_regs regs; -- cgit v1.2.3 From 53362fe930b2dfa03a61d32af73c7e9a194a401c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 10 Mar 2020 09:01:43 -0400 Subject: selftests: KVM: s390: fixup fprintf format error in reset.c value is u64 and not string. Reported-by: Andrew Jones Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390x/resets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index c59db2c95e9e..bf04d77de16b 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -53,7 +53,7 @@ static void test_one_reg(uint64_t id, uint64_t value) reg.addr = (uintptr_t)&eval_reg; reg.id = id; vcpu_get_reg(vm, VCPU_ID, ®); - TEST_ASSERT(eval_reg == value, "value == %s", value); + TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } static void assert_noirq(void) -- cgit v1.2.3 From 6a46fcf92f063224cb29285287f99b93be8209d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 10 Mar 2020 09:01:44 -0400 Subject: selftests: KVM: s390: fix format strings for access reg test acrs are 32 bit and not 64 bit. Reported-by: Andrew Jones Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390x/sync_regs_test.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index b705637ca14b..70a56580042b 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -42,6 +42,13 @@ static void guest_code(void) " values did not match: 0x%llx, 0x%llx\n", \ left->reg, right->reg) +#define REG_COMPARE32(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%x, 0x%x\n", \ + left->reg, right->reg) + + static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right) { int i; @@ -55,7 +62,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) int i; for (i = 0; i < 16; i++) - REG_COMPARE(acrs[i]); + REG_COMPARE32(acrs[i]); for (i = 0; i < 16; i++) REG_COMPARE(crs[i]); @@ -155,7 +162,7 @@ int main(int argc, char *argv[]) "r11 sync regs value incorrect 0x%llx.", run->s.regs.gprs[11]); TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, - "acr0 sync regs value incorrect 0x%llx.", + "acr0 sync regs value incorrect 0x%x.", run->s.regs.acrs[0]); vcpu_regs_get(vm, VCPU_ID, ®s); -- cgit v1.2.3 From d9eaf19ecc12668caf280f3d8e24b22ff5ba716b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 10 Mar 2020 10:15:55 +0100 Subject: KVM: selftests: Enable printf format warnings for TEST_ASSERT Use the format attribute to enable printf format warnings, and then fix them all. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 2 +- tools/testing/selftests/kvm/include/test_util.h | 3 ++- tools/testing/selftests/kvm/lib/kvm_util.c | 8 ++++---- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 2 +- tools/testing/selftests/kvm/x86_64/set_memory_region_test.c | 3 +-- tools/testing/selftests/kvm/x86_64/state_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c | 3 +-- tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c | 2 +- 10 files changed, 16 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index ae086c5dc118..8d99b6d78f89 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -384,7 +384,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, */ TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %lx vcpus: %d wss: %lx]\n", + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index c921ea719ae0..07823740227b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -37,7 +37,8 @@ ssize_t test_read(int fd, void *buf, size_t count); int test_seq_read(const char *path, char **bufp, size_t *sizep); void test_assert(bool exp, const char *exp_str, - const char *file, unsigned int line, const char *fmt, ...); + const char *file, unsigned int line, const char *fmt, ...) + __attribute__((format(printf, 5, 6))); #define TEST_ASSERT(e, fmt, ...) \ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 69a28a9211b4..b29c5d338555 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -265,7 +265,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx", + " guest_phys_addr: 0x%llx size: 0x%llx", ret, errno, region->region.slot, region->region.flags, region->region.guest_phys_addr, @@ -280,7 +280,7 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args); TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s", - strerror(-ret)); + __func__, strerror(-ret)); } void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, @@ -293,7 +293,7 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", - strerror(-ret)); + __func__, strerror(-ret)); } /* @@ -785,7 +785,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" - "ret: %i errno: %i slot: %u flags: 0x%x", + "ret: %i errno: %i slot: %u flags: 0x%lx", ret, errno, slot, new_gpa); } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 185226c39c03..464a55217085 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -109,7 +109,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]); /* NOT REACHED */ case UCALL_SYNC: @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) /* UCALL_SYNC is handled here. */ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 443a2b54645b..3edf3b517f9f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -66,7 +66,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x4000000A), - "function %lx is our of supported range", + "function %x is our of supported range", entry->function); TEST_ASSERT(entry->index == 0, diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c index 125aeab59ab6..f2efaa576794 100644 --- a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c @@ -61,8 +61,7 @@ static void *vcpu_worker(void *data) "Unexpected exit reason = %d", run->exit_reason); cmd = get_ucall(vm, VCPU_ID, &uc); - TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %llu", - uc.args[0]); + TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %lu", uc.args[0]); return NULL; } diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 164774206170..a4dc1ee59659 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]); /* NOT REACHED */ case UCALL_SYNC: @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) /* UCALL_SYNC is handled here. */ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c index e280f68f6365..8cd841ff6305 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c @@ -69,8 +69,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: goto done; default: - TEST_ASSERT(false, - "Unknown ucall 0x%x.", uc.cmd); + TEST_ASSERT(false, "Unknown ucall 0x%lx.", uc.cmd); } } done: diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index fe0734d9ef75..d9ca948d0b72 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -126,7 +126,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]); /* NOT REACHED */ case UCALL_SYNC: diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 9ef7fab39d48..7962f2fe575d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -212,7 +212,7 @@ void test_vmx_nested_state(struct kvm_vm *vm) test_nested_state(vm, state); vcpu_nested_state_get(vm, VCPU_ID, state); TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, - "Size must be between %d and %d. The size returned was %d.", + "Size must be between %ld and %d. The size returned was %d.", sizeof(*state), state_sz, state->size); TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull."); -- cgit v1.2.3 From d0aac3320d1f15ae2113ddf210945c3686951330 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 10 Mar 2020 10:15:56 +0100 Subject: KVM: selftests: Use consistent message for test skipping Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 4 ++-- tools/testing/selftests/kvm/dirty_log_test.c | 3 +-- tools/testing/selftests/kvm/include/test_util.h | 2 ++ tools/testing/selftests/kvm/lib/assert.c | 6 ++++-- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- tools/testing/selftests/kvm/lib/test_util.c | 12 ++++++++++++ tools/testing/selftests/kvm/lib/x86_64/svm.c | 2 +- tools/testing/selftests/kvm/lib/x86_64/vmx.c | 2 +- tools/testing/selftests/kvm/s390x/memop.c | 2 +- tools/testing/selftests/kvm/s390x/sync_regs_test.c | 2 +- tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 2 +- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 2 +- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 6 ++---- tools/testing/selftests/kvm/x86_64/mmio_warning_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/platform_info_test.c | 3 +-- tools/testing/selftests/kvm/x86_64/sync_regs_test.c | 4 ++-- .../testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c | 2 +- tools/testing/selftests/kvm/x86_64/xss_msr_test.c | 2 +- 18 files changed, 37 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 8d99b6d78f89..c4fc96bd064b 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -660,8 +660,8 @@ int main(int argc, char *argv[]) int main(void) { - printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return KSFT_SKIP; + print_skip("__NR_userfaultfd must be present for userfaultfd test"); + return KSFT_SKIP; } #endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 8a79f5d6b979..051791e0f5fb 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -437,8 +437,7 @@ int main(int argc, char *argv[]) dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); if (!dirty_log_manual_caps) { - fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, " - "skipping tests\n"); + print_skip("KVM_CLEAR_DIRTY_LOG not available"); exit(KSFT_SKIP); } dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 07823740227b..1e1487a30402 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -32,6 +32,8 @@ static inline int _no_printf(const char *format, ...) { return 0; } #define pr_info(...) _no_printf(__VA_ARGS__) #endif +void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2))); + ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); int test_seq_read(const char *path, char **bufp, size_t *sizep); diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index d1cf9f6e0e6b..5ebbd0d6b472 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -82,8 +82,10 @@ test_assert(bool exp, const char *exp_str, } va_end(ap); - if (errno == EACCES) - ksft_exit_skip("Access denied - Exiting.\n"); + if (errno == EACCES) { + print_skip("Access denied - Exiting"); + exit(KSFT_SKIP); + } exit(254); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b29c5d338555..aa7697212267 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -92,7 +92,7 @@ static void vm_open(struct kvm_vm *vm, int perm) exit(KSFT_SKIP); if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { - fprintf(stderr, "immediate_exit not available, skipping test\n"); + print_skip("immediate_exit not available"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 1c0d45afdf36..26fb3d73dc74 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "test_util.h" /* @@ -69,3 +70,14 @@ struct timespec timespec_diff(struct timespec start, struct timespec end) return temp; } + +void print_skip(const char *fmt, ...) +{ + va_list ap; + + assert(fmt); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + puts(", skipping test"); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 6e05a8fc3fe0..c42401068373 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -154,7 +154,7 @@ void nested_svm_check_supported(void) kvm_get_supported_cpuid_entry(0x80000001); if (!(entry->ecx & CPUID_SVM)) { - fprintf(stderr, "nested SVM not enabled, skipping test\n"); + print_skip("nested SVM not enabled"); exit(KSFT_SKIP); } } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 7aaa99ca4dbc..ff0a657a42d3 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -381,7 +381,7 @@ void nested_vmx_check_supported(void) struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); + print_skip("nested VMX not enabled"); exit(KSFT_SKIP); } } diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9edaa9a134ce..9f49ead380ab 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -40,7 +40,7 @@ int main(int argc, char *argv[]) maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP); if (!maxsize) { - fprintf(stderr, "CAP_S390_MEM_OP not supported -> skip test\n"); + print_skip("CAP_S390_MEM_OP not supported"); exit(KSFT_SKIP); } if (maxsize > sizeof(mem1)) diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 70a56580042b..5731ccf34917 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) cap = kvm_check_cap(KVM_CAP_SYNC_REGS); if (!cap) { - fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n"); + print_skip("CAP_SYNC_REGS not supported"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 003d1422705a..a646843137c7 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) entry = kvm_get_supported_cpuid_entry(1); if (!(entry->ecx & X86_FEATURE_XSAVE)) { - printf("XSAVE feature not supported, skipping test\n"); + print_skip("XSAVE feature not supported"); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 464a55217085..15241dc2ded0 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) if (!kvm_check_cap(KVM_CAP_NESTED_STATE) || !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { - printf("capabilities not available, skipping test\n"); + print_skip("capabilities not available"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 3edf3b517f9f..83323f3d7ca0 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -141,8 +141,7 @@ int main(int argc, char *argv[]) rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID); if (!rv) { - fprintf(stderr, - "KVM_CAP_HYPERV_CPUID not supported, skip test\n"); + print_skip("KVM_CAP_HYPERV_CPUID not supported"); exit(KSFT_SKIP); } @@ -160,8 +159,7 @@ int main(int argc, char *argv[]) free(hv_cpuid_entries); if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { - fprintf(stderr, - "Enlightened VMCS is unsupported, skip related test\n"); + print_skip("Enlightened VMCS is unsupported"); goto vm_free; } diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index 5350c2d6f736..e6480fd5c4bd 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -93,12 +93,12 @@ int main(void) int warnings_before, warnings_after; if (!is_intel_cpu()) { - printf("Must be run on an Intel CPU, skipping test\n"); + print_skip("Must be run on an Intel CPU"); exit(KSFT_SKIP); } if (vm_is_unrestricted_guest(NULL)) { - printf("Unrestricted guest must be disabled, skipping test\n"); + print_skip("Unrestricted guest must be disabled"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 54a960ff63aa..1e89688cbbbf 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -88,8 +88,7 @@ int main(int argc, char *argv[]) rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO); if (!rv) { - fprintf(stderr, - "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n"); + print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 5c8224256294..d672f0a473f8 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -91,11 +91,11 @@ int main(int argc, char *argv[]) cap = kvm_check_cap(KVM_CAP_SYNC_REGS); if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { - fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + print_skip("KVM_CAP_SYNC_REGS not supported"); exit(KSFT_SKIP); } if ((cap & INVALID_SYNC_FIELD) != 0) { - fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + print_skip("The \"invalid\" field is not invalid"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 7962f2fe575d..54cdefdfb49d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -228,7 +228,7 @@ int main(int argc, char *argv[]) have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { - printf("KVM_CAP_NESTED_STATE not available, skipping test\n"); + print_skip("KVM_CAP_NESTED_STATE not available"); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index fc8328d8d5b0..3529376747c2 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES); } if (!xss_supported) { - printf("IA32_XSS is not supported by the vCPU, skipping test\n"); + print_skip("IA32_XSS is not supported by the vCPU"); exit(KSFT_SKIP); } -- cgit v1.2.3 From beca54702dc694970dd9727dde59cf5f56c4dbd8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 13 Mar 2020 16:56:43 +0100 Subject: KVM: selftests: virt_map should take npages, not size Also correct the comment and prototype for vm_create_default(), as it takes a number of pages, not a size. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 3 +-- tools/testing/selftests/kvm/dirty_log_test.c | 3 +-- tools/testing/selftests/kvm/include/kvm_util.h | 6 +++--- tools/testing/selftests/kvm/lib/kvm_util.c | 10 +++++----- tools/testing/selftests/kvm/x86_64/set_memory_region_test.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c | 11 +++++------ 6 files changed, 16 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index c4fc96bd064b..d82f7bc060c3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -410,8 +410,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, guest_num_pages, 0); /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, - guest_num_pages * guest_page_size, 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); ucall_init(vm, NULL); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 051791e0f5fb..8f5b590805a4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -334,8 +334,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, KVM_MEM_LOG_DIRTY_PAGES); /* Do mapping for the dirty track memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, - guest_num_pages * guest_page_size, 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 24f7a93671a2..3aa4d1e52284 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -117,7 +117,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, uint32_t data_memslot, uint32_t pgd_memslot); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - size_t size, uint32_t pgd_memslot); + unsigned int npages, uint32_t pgd_memslot); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); @@ -226,7 +226,7 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, * * Input Args: * vcpuid - The id of the single VCPU to add to the VM. - * extra_mem_pages - The size of extra memories to add (this will + * extra_mem_pages - The number of extra pages to add (this will * decide how much extra space we will need to * setup the page tables using memslot 0) * guest_code - The vCPU's entry point @@ -236,7 +236,7 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, * Return: * Pointer to opaque structure that describes the created VM. */ -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size, +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, void *guest_code); /* diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index aa7697212267..e26917ba25bc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1015,21 +1015,21 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, * vm - Virtual Machine * vaddr - Virtuall address to map * paddr - VM Physical Address - * size - The size of the range to map + * npages - The number of pages to map * pgd_memslot - Memory region slot for new virtual translation tables * * Output Args: None * * Return: None * - * Within the VM given by vm, creates a virtual translation for the - * page range starting at vaddr to the page range starting at paddr. + * Within the VM given by @vm, creates a virtual translation for + * @npages starting at @vaddr to the page range starting at @paddr. */ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - size_t size, uint32_t pgd_memslot) + unsigned int npages, uint32_t pgd_memslot) { size_t page_size = vm->page_size; - size_t npages = size / page_size; + size_t size = npages * page_size; TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c index f2efaa576794..c6691cff4e19 100644 --- a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c @@ -87,7 +87,7 @@ static void test_move_memory_region(void) gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT); TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); - virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2 * 4096, 0); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0); /* Ditto for the host mapping so that both pages can be zeroed. */ hva = addr_gpa2hva(vm, MEM_REGION_GPA); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index d9ca948d0b72..7a3228c80d2d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -21,7 +21,7 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 -#define TEST_MEM_SIZE 3 +#define TEST_MEM_PAGES 3 /* L1 guest test virtual memory offset */ #define GUEST_TEST_MEM 0xc0000000 @@ -91,15 +91,14 @@ int main(int argc, char *argv[]) vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, GUEST_TEST_MEM, TEST_MEM_SLOT_INDEX, - TEST_MEM_SIZE, + TEST_MEM_PAGES, KVM_MEM_LOG_DIRTY_PAGES); /* * Add an identity map for GVA range [0xc0000000, 0xc0002000). This * affects both L1 and L2. However... */ - virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, - TEST_MEM_SIZE * 4096, 0); + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0); /* * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to @@ -113,11 +112,11 @@ int main(int argc, char *argv[]) nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); - bmap = bitmap_alloc(TEST_MEM_SIZE); + bmap = bitmap_alloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096); + memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", -- cgit v1.2.3 From 94c4b76b88d40f9062dc32ff2fff551ae1791c1e Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 13 Mar 2020 16:56:44 +0100 Subject: KVM: selftests: Introduce steal-time test The steal-time test confirms what is reported to the guest as stolen time is consistent with the run_delay reported for the VCPU thread on the host. Both x86_64 and AArch64 have the concept of steal/stolen time so this test is introduced for both architectures. While adding the test we ensure .gitignore has all tests listed (it was missing s390x/resets) and that the Makefile has all tests listed in alphabetical order (not really necessary, but it almost was already...). We also extend the common API with a new num-guest- pages call and a new timespec call. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 2 + tools/testing/selftests/kvm/Makefile | 12 +- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 7 + tools/testing/selftests/kvm/lib/test_util.c | 15 + tools/testing/selftests/kvm/steal_time.c | 352 ++++++++++++++++++++++++ 7 files changed, 385 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/kvm/steal_time.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 8bc104d39e78..16877c3daabf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ /s390x/memop +/s390x/resets /s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test /x86_64/evmcs_test @@ -20,3 +21,4 @@ /demand_paging_test /dirty_log_test /kvm_create_max_vcpus +/steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a871184ebb6d..712a2ddd2a27 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -21,28 +21,30 @@ TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test +TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += clear_dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test +TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus +TEST_GEN_PROGS_aarch64 += steal_time TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += dirty_log_test +TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += demand_paging_test +TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 3aa4d1e52284..a99b875f50d2 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -255,6 +255,7 @@ unsigned int vm_get_page_size(struct kvm_vm *vm); unsigned int vm_get_page_shift(struct kvm_vm *vm); unsigned int vm_get_max_gfn(struct kvm_vm *vm); +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); static inline unsigned int diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 1e1487a30402..f556ec5fe47b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -59,5 +59,6 @@ size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_diff(struct timespec start, struct timespec end); +struct timespec timespec_add_ns(struct timespec ts, int64_t ns); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e26917ba25bc..35bd42370c21 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1769,3 +1769,10 @@ vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) return vm_calc_num_pages(num_host_pages, getpageshift(), vm_guest_mode_params[mode].page_shift, false); } + +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) +{ + unsigned int n; + n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); + return vm_adjust_num_guest_pages(mode, n); +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 26fb3d73dc74..ee12c4b9ae05 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -71,6 +71,21 @@ struct timespec timespec_diff(struct timespec start, struct timespec end) return temp; } +struct timespec timespec_add_ns(struct timespec ts, int64_t ns) +{ + struct timespec res; + + res.tv_sec = ts.tv_sec; + res.tv_nsec = ts.tv_nsec + ns; + + if (res.tv_nsec > 1000000000UL) { + res.tv_sec += 1; + res.tv_nsec -= 1000000000UL; + } + + return res; +} + void print_skip(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c new file mode 100644 index 000000000000..f976ac5e896a --- /dev/null +++ b/tools/testing/selftests/kvm/steal_time.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * steal/stolen time test + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define NR_VCPUS 4 +#define ST_GPA_BASE (1 << 30) +#define MIN_RUN_DELAY_NS 200000UL + +static void *st_gva[NR_VCPUS]; +static uint64_t guest_stolen_time[NR_VCPUS]; + +#if defined(__x86_64__) + +/* steal_time must have 64-byte alignment */ +#define STEAL_TIME_SIZE ((sizeof(struct kvm_steal_time) + 63) & ~63) + +static void check_status(struct kvm_steal_time *st) +{ + GUEST_ASSERT(!(READ_ONCE(st->version) & 1)); + GUEST_ASSERT(READ_ONCE(st->flags) == 0); + GUEST_ASSERT(READ_ONCE(st->preempted) == 0); +} + +static void guest_code(int cpu) +{ + struct kvm_steal_time *st = st_gva[cpu]; + uint32_t version; + + GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); + + memset(st, 0, sizeof(*st)); + GUEST_SYNC(0); + + check_status(st); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + version = READ_ONCE(st->version); + check_status(st); + GUEST_SYNC(1); + + check_status(st); + GUEST_ASSERT(version < READ_ONCE(st->version)); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + check_status(st); + GUEST_DONE(); +} + +static void steal_time_init(struct kvm_vm *vm) +{ + int i; + + if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax & + KVM_FEATURE_STEAL_TIME)) { + print_skip("steal-time not supported"); + exit(KSFT_SKIP); + } + + for (i = 0; i < NR_VCPUS; ++i) { + int ret; + + vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid()); + + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + sync_global_to_guest(vm, st_gva[i]); + + ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK); + TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); + + vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); + } +} + +static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]); + int i; + + pr_info("VCPU%d:\n", vcpuid); + pr_info(" steal: %lld\n", st->steal); + pr_info(" version: %d\n", st->version); + pr_info(" flags: %d\n", st->flags); + pr_info(" preempted: %d\n", st->preempted); + pr_info(" u8_pad: "); + for (i = 0; i < 3; ++i) + pr_info("%d", st->u8_pad[i]); + pr_info("\n pad: "); + for (i = 0; i < 11; ++i) + pr_info("%d", st->pad[i]); + pr_info("\n"); +} + +#elif defined(__aarch64__) + +/* PV_TIME_ST must have 64-byte alignment */ +#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) + +#define SMCCC_ARCH_FEATURES 0x80000001 +#define PV_TIME_FEATURES 0xc5000020 +#define PV_TIME_ST 0xc5000021 + +struct st_time { + uint32_t rev; + uint32_t attr; + uint64_t st_time; +}; + +static int64_t smccc(uint32_t func, uint32_t arg) +{ + unsigned long ret; + + asm volatile( + "mov x0, %1\n" + "mov x1, %2\n" + "hvc #0\n" + "mov %0, x0\n" + : "=r" (ret) : "r" (func), "r" (arg) : + "x0", "x1", "x2", "x3"); + + return ret; +} + +static void check_status(struct st_time *st) +{ + GUEST_ASSERT(READ_ONCE(st->rev) == 0); + GUEST_ASSERT(READ_ONCE(st->attr) == 0); +} + +static void guest_code(int cpu) +{ + struct st_time *st; + int64_t status; + + status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES); + GUEST_ASSERT(status == 0); + status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES); + GUEST_ASSERT(status == 0); + status = smccc(PV_TIME_FEATURES, PV_TIME_ST); + GUEST_ASSERT(status == 0); + + status = smccc(PV_TIME_ST, 0); + GUEST_ASSERT(status != -1); + GUEST_ASSERT(status == (ulong)st_gva[cpu]); + + st = (struct st_time *)status; + GUEST_SYNC(0); + + check_status(st); + WRITE_ONCE(guest_stolen_time[cpu], st->st_time); + GUEST_SYNC(1); + + check_status(st); + WRITE_ONCE(guest_stolen_time[cpu], st->st_time); + GUEST_DONE(); +} + +static void steal_time_init(struct kvm_vm *vm) +{ + struct kvm_device_attr dev = { + .group = KVM_ARM_VCPU_PVTIME_CTRL, + .attr = KVM_ARM_VCPU_PVTIME_IPA, + }; + int i, ret; + + ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev); + if (ret != 0 && errno == ENXIO) { + print_skip("steal-time not supported"); + exit(KSFT_SKIP); + } + + for (i = 0; i < NR_VCPUS; ++i) { + uint64_t st_ipa; + + vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev); + + dev.addr = (uint64_t)&st_ipa; + + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + sync_global_to_guest(vm, st_gva[i]); + + st_ipa = (ulong)st_gva[i] | 1; + ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); + + st_ipa = (ulong)st_gva[i]; + vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev); + + ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); + + } +} + +static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]); + + pr_info("VCPU%d:\n", vcpuid); + pr_info(" rev: %d\n", st->rev); + pr_info(" attr: %d\n", st->attr); + pr_info(" st_time: %ld\n", st->st_time); +} + +#endif + +static long get_run_delay(void) +{ + char path[64]; + long val[2]; + FILE *fp; + + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); + fp = fopen(path, "r"); + fscanf(fp, "%ld %ld ", &val[0], &val[1]); + fclose(fp); + + return val[1]; +} + +static void *do_steal_time(void *arg) +{ + struct timespec ts, stop; + + clock_gettime(CLOCK_MONOTONIC, &ts); + stop = timespec_add_ns(ts, MIN_RUN_DELAY_NS); + + while (1) { + clock_gettime(CLOCK_MONOTONIC, &ts); + if (ts.tv_sec > stop.tv_sec || ts.tv_nsec >= stop.tv_nsec) + break; + } + + return NULL; +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct ucall uc; + + vcpu_args_set(vm, vcpuid, 1, vcpuid); + + vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } +} + +int main(int ac, char **av) +{ + struct kvm_vm *vm; + pthread_attr_t attr; + pthread_t thread; + cpu_set_t cpuset; + unsigned int gpages; + long stolen_time; + long run_delay; + bool verbose; + int i; + + verbose = ac > 1 && (!strncmp(av[1], "-v", 3) || !strncmp(av[1], "--verbose", 10)); + + /* Set CPU affinity so we can force preemption of the VCPU */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_attr_init(&attr); + pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + /* Create a one VCPU guest and an identity mapped memslot for the steal time structure */ + vm = vm_create_default(0, 0, guest_code); + gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); + virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0); + ucall_init(vm, NULL); + + /* Add the rest of the VCPUs */ + for (i = 1; i < NR_VCPUS; ++i) + vm_vcpu_add_default(vm, i, guest_code); + + steal_time_init(vm); + + /* Run test on each VCPU */ + for (i = 0; i < NR_VCPUS; ++i) { + /* First VCPU run initializes steal-time */ + run_vcpu(vm, i); + + /* Second VCPU run, expect guest stolen time to be <= run_delay */ + run_vcpu(vm, i); + sync_global_from_guest(vm, guest_stolen_time[i]); + stolen_time = guest_stolen_time[i]; + run_delay = get_run_delay(); + TEST_ASSERT(stolen_time <= run_delay, + "Expected stolen time <= %ld, got %ld", + run_delay, stolen_time); + + /* Steal time from the VCPU. The steal time thread has the same CPU affinity as the VCPUs. */ + run_delay = get_run_delay(); + pthread_create(&thread, &attr, do_steal_time, NULL); + do + pthread_yield(); + while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS); + pthread_join(thread, NULL); + run_delay = get_run_delay() - run_delay; + TEST_ASSERT(run_delay >= MIN_RUN_DELAY_NS, + "Expected run_delay >= %ld, got %ld", + MIN_RUN_DELAY_NS, run_delay); + + /* Run VCPU again to confirm stolen time is consistent with run_delay */ + run_vcpu(vm, i); + sync_global_from_guest(vm, guest_stolen_time[i]); + stolen_time = guest_stolen_time[i] - stolen_time; + TEST_ASSERT(stolen_time >= run_delay, + "Expected stolen time >= %ld, got %ld", + run_delay, stolen_time); + + if (verbose) { + pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i, + guest_stolen_time[i], stolen_time); + if (stolen_time == run_delay) + pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)"); + pr_info("\n"); + steal_time_dump(vm, i); + } + } + + return 0; +} -- cgit v1.2.3 From 41cbed5b07b5f6ca4ae567059ae7f0ffad1fd454 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 10 Mar 2020 09:01:40 -0400 Subject: selftests: KVM: s390: fix early guest crash The guest crashes very early due to changes in the control registers used by dynamic address translation. Let us use different registers that will not crash the guest. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390x/resets.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index bf04d77de16b..83624d0b1453 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -23,25 +23,24 @@ struct kvm_run *run; struct kvm_sync_regs *regs; static uint64_t regs_null[16]; -static uint64_t crs[16] = { 0x40000ULL, - 0x42000ULL, - 0, 0, 0, 0, 0, - 0x43000ULL, - 0, 0, 0, 0, 0, - 0x44000ULL, - 0, 0 -}; - static void guest_code_initial(void) { - /* Round toward 0 */ - uint32_t fpc = 0x11; + /* set several CRs to "safe" value */ + unsigned long cr2_59 = 0x10; /* enable guarded storage */ + unsigned long cr8_63 = 0x1; /* monitor mask = 1 */ + unsigned long cr10 = 1; /* PER START */ + unsigned long cr11 = -1; /* PER END */ + /* Dirty registers */ asm volatile ( - " lctlg 0,15,%0\n" - " sfpc %1\n" - : : "Q" (crs), "d" (fpc)); + " lghi 2,0x11\n" /* Round toward 0 */ + " sfpc 2\n" /* set fpc to !=0 */ + " lctlg 2,2,%0\n" + " lctlg 8,8,%1\n" + " lctlg 10,10,%2\n" + " lctlg 11,11,%3\n" + : : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11) : "2"); GUEST_SYNC(0); } -- cgit v1.2.3 From b0435a12a6d3839dd8190982d11aee825bca4250 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 10 Mar 2020 09:01:41 -0400 Subject: selftests: KVM: s390: test more register variants for the reset ioctl We should not only test the oneregs or the get_(x)regs interfaces but also the sync_regs. Those are usually the canonical place for register content. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390x/resets.c | 50 ++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 83624d0b1453..5c53d5d439ed 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -20,8 +20,8 @@ struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS]; struct kvm_vm *vm; struct kvm_run *run; -struct kvm_sync_regs *regs; -static uint64_t regs_null[16]; +struct kvm_sync_regs *sync_regs; +static uint8_t regs_null[512]; static void guest_code_initial(void) { @@ -86,6 +86,16 @@ static void assert_clear(void) vcpu_fpu_get(vm, VCPU_ID, &fpu); TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0"); + + /* sync regs */ + TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)), + "gprs0-15 == 0 (sync_regs)"); + + TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)), + "acrs0-15 == 0 (sync_regs)"); + + TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)), + "vrs0-15 == 0 (sync_regs)"); } static void assert_initial(void) @@ -93,12 +103,32 @@ static void assert_initial(void) struct kvm_sregs sregs; struct kvm_fpu fpu; + /* KVM_GET_SREGS */ vcpu_sregs_get(vm, VCPU_ID, &sregs); - TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0"); - TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000"); + TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)"); + TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, + "cr14 == 0xC2000000 (KVM_GET_SREGS)"); TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12), - "cr1-13 == 0"); - TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0"); + "cr1-13 == 0 (KVM_GET_SREGS)"); + TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)"); + + /* sync regs */ + TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL, + "cr14 == 0xC2000000 (sync_regs)"); + TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12), + "cr1-13 == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)"); + + /* kvm_run */ + TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)"); + TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)"); vcpu_fpu_get(vm, VCPU_ID, &fpu); TEST_ASSERT(!fpu.fpc, "fpc == 0"); @@ -113,6 +143,8 @@ static void assert_initial(void) static void assert_normal(void) { test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); + TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID, + "pft == 0xff..... (sync_regs)"); assert_noirq(); } @@ -137,7 +169,7 @@ static void test_normal(void) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code_initial); run = vcpu_state(vm, VCPU_ID); - regs = &run->s.regs; + sync_regs = &run->s.regs; vcpu_run(vm, VCPU_ID); @@ -153,7 +185,7 @@ static void test_initial(void) pr_info("Testing initial reset\n"); vm = vm_create_default(VCPU_ID, 0, guest_code_initial); run = vcpu_state(vm, VCPU_ID); - regs = &run->s.regs; + sync_regs = &run->s.regs; vcpu_run(vm, VCPU_ID); @@ -170,7 +202,7 @@ static void test_clear(void) pr_info("Testing clear reset\n"); vm = vm_create_default(VCPU_ID, 0, guest_code_initial); run = vcpu_state(vm, VCPU_ID); - regs = &run->s.regs; + sync_regs = &run->s.regs; vcpu_run(vm, VCPU_ID); -- cgit v1.2.3 From 3203a01737af5c929854940cc72425bef553d403 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 10 Mar 2020 09:01:42 -0400 Subject: selftests: KVM: s390: check for registers to NOT change on reset Normal reset and initial CPU reset do not clear all registers. Add a test that those registers are NOT changed. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390x/resets.c | 55 ++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 5c53d5d439ed..b143db6d8693 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -40,8 +40,22 @@ static void guest_code_initial(void) " lctlg 8,8,%1\n" " lctlg 10,10,%2\n" " lctlg 11,11,%3\n" - : : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11) : "2"); - GUEST_SYNC(0); + /* now clobber some general purpose regs */ + " llihh 0,0xffff\n" + " llihl 1,0x5555\n" + " llilh 2,0xaaaa\n" + " llill 3,0x0000\n" + /* now clobber a floating point reg */ + " lghi 4,0x1\n" + " cdgbr 0,4\n" + /* now clobber an access reg */ + " sar 9,4\n" + /* We embed diag 501 here to control register content */ + " diag 0,0,0x501\n" + : + : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11) + /* no clobber list as this should not return */ + ); } static void test_one_reg(uint64_t id, uint64_t value) @@ -98,6 +112,21 @@ static void assert_clear(void) "vrs0-15 == 0 (sync_regs)"); } +static void assert_initial_noclear(void) +{ + TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL, + "gpr0 == 0xffff000000000000 (sync_regs)"); + TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL, + "gpr1 == 0x0000555500000000 (sync_regs)"); + TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL, + "gpr2 == 0x00000000aaaa0000 (sync_regs)"); + TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL, + "gpr3 == 0x0000000000000000 (sync_regs)"); + TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL, + "fpr0 == 0f1 (sync_regs)"); + TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)"); +} + static void assert_initial(void) { struct kvm_sregs sregs; @@ -140,6 +169,14 @@ static void assert_initial(void) test_one_reg(KVM_REG_S390_CLOCK_COMP, 0); } +static void assert_normal_noclear(void) +{ + TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)"); +} + static void assert_normal(void) { test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); @@ -176,7 +213,13 @@ static void test_normal(void) inject_irq(VCPU_ID); vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0); + + /* must clears */ assert_normal(); + /* must not clears */ + assert_normal_noclear(); + assert_initial_noclear(); + kvm_vm_free(vm); } @@ -192,8 +235,13 @@ static void test_initial(void) inject_irq(VCPU_ID); vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0); + + /* must clears */ assert_normal(); assert_initial(); + /* must not clears */ + assert_initial_noclear(); + kvm_vm_free(vm); } @@ -209,9 +257,12 @@ static void test_clear(void) inject_irq(VCPU_ID); vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0); + + /* must clears */ assert_normal(); assert_initial(); assert_clear(); + kvm_vm_free(vm); } -- cgit v1.2.3 From a46f8a63cde8d4fee05693bc5c566c1374d0baaa Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Mon, 9 Mar 2020 23:50:58 -0300 Subject: selftests: kvm: Introduce the TEST_FAIL macro Some tests/utilities use the TEST_ASSERT(false, ...) pattern to indicate a failure and stop execution. This change introduces the TEST_FAIL macro which is a wrap around TEST_ASSERT(false, ...) and so provides a direct alternative for failing a test. Signed-off-by: Wainer dos Santos Moschetta Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/test_util.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index f556ec5fe47b..f588ad1403f1 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -55,6 +55,9 @@ void test_assert(bool exp, const char *exp_str, #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ } while (0) +#define TEST_FAIL(fmt, ...) \ + TEST_ASSERT(false, fmt, ##__VA_ARGS__) + size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); -- cgit v1.2.3 From 352be2c539d01ae050b5fa3cbd90978ff19f1fc6 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Mon, 9 Mar 2020 23:50:59 -0300 Subject: selftests: kvm: Uses TEST_FAIL in tests/utilities Changed all tests and utilities to use TEST_FAIL macro instead of TEST_ASSERT(false,...). Signed-off-by: Wainer dos Santos Moschetta Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 7 +++---- tools/testing/selftests/kvm/lib/aarch64/processor.c | 17 ++++++++--------- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 2 +- tools/testing/selftests/kvm/lib/io.c | 12 ++++++------ tools/testing/selftests/kvm/lib/kvm_util.c | 21 +++++++++------------ tools/testing/selftests/kvm/lib/x86_64/processor.c | 5 ++--- .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 6 +++--- tools/testing/selftests/kvm/x86_64/state_test.c | 6 +++--- .../testing/selftests/kvm/x86_64/svm_vmcall_test.c | 5 ++--- .../kvm/x86_64/vmx_close_while_nested_test.c | 4 ++-- .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 6 +++--- .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 4 ++-- 13 files changed, 46 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 8f5b590805a4..752ec158ac59 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -166,10 +166,9 @@ static void *vcpu_worker(void *data) pages_count += TEST_PAGES_PER_LOOP; generate_random_array(guest_array, TEST_PAGES_PER_LOOP); } else { - TEST_ASSERT(false, - "Invalid guest sync status: " - "exit_reason=%s\n", - exit_reason_str(run->exit_reason)); + TEST_FAIL("Invalid guest sync status: " + "exit_reason=%s\n", + exit_reason_str(run->exit_reason)); } } diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index f84270f0e32c..2afa6618b396 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -130,7 +130,7 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; break; default: - TEST_ASSERT(false, "Page table levels must be 2, 3, or 4"); + TEST_FAIL("Page table levels must be 2, 3, or 4"); } *ptep = paddr | 3; @@ -173,14 +173,13 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) goto unmapped_gva; break; default: - TEST_ASSERT(false, "Page table levels must be 2, 3, or 4"); + TEST_FAIL("Page table levels must be 2, 3, or 4"); } return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); unmapped_gva: - TEST_ASSERT(false, "No mapping for vm virtual address, " - "gva: 0x%lx", gva); + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); exit(1); } @@ -262,11 +261,11 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini switch (vm->mode) { case VM_MODE_P52V48_4K: - TEST_ASSERT(false, "AArch64 does not support 4K sized pages " - "with 52-bit physical address ranges"); + TEST_FAIL("AArch64 does not support 4K sized pages " + "with 52-bit physical address ranges"); case VM_MODE_PXXV48_4K: - TEST_ASSERT(false, "AArch64 does not support 4K sized pages " - "with ANY-bit physical address ranges"); + TEST_FAIL("AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ @@ -288,7 +287,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ break; default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 6cd91970fbad..c8e0ec20d3bf 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -62,7 +62,7 @@ void ucall_init(struct kvm_vm *vm, void *arg) if (ucall_mmio_init(vm, start + offset)) return; } - TEST_ASSERT(false, "Can't find a ucall mmio address"); + TEST_FAIL("Can't find a ucall mmio address"); } void ucall_uninit(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c index eaf351cc7e7f..fedb2a741f0b 100644 --- a/tools/testing/selftests/kvm/lib/io.c +++ b/tools/testing/selftests/kvm/lib/io.c @@ -61,9 +61,9 @@ ssize_t test_write(int fd, const void *buf, size_t count) continue; case 0: - TEST_ASSERT(false, "Unexpected EOF,\n" - " rc: %zi num_written: %zi num_left: %zu", - rc, num_written, num_left); + TEST_FAIL("Unexpected EOF,\n" + " rc: %zi num_written: %zi num_left: %zu", + rc, num_written, num_left); break; default: @@ -138,9 +138,9 @@ ssize_t test_read(int fd, void *buf, size_t count) break; case 0: - TEST_ASSERT(false, "Unexpected EOF,\n" - " rc: %zi num_read: %zi num_left: %zu", - rc, num_read, num_left); + TEST_FAIL("Unexpected EOF,\n" + " rc: %zi num_read: %zi num_left: %zu", + rc, num_read, num_left); break; default: diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 35bd42370c21..0cf98ad59e32 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -198,12 +198,11 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->pa_bits); vm->pgtable_levels = 4; #else - TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " - "non-x86 platforms"); + TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); #endif break; default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); + TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); } #ifdef __aarch64__ @@ -603,7 +602,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region = (struct userspace_mem_region *) userspace_mem_region_find( vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); if (region != NULL) - TEST_ASSERT(false, "overlapping userspace_mem_region already " + TEST_FAIL("overlapping userspace_mem_region already " "exists\n" " requested guest_paddr: 0x%lx npages: 0x%lx " "page_size: 0x%x\n" @@ -619,7 +618,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, break; } if (region != NULL) - TEST_ASSERT(false, "A mem region with the requested slot " + TEST_FAIL("A mem region with the requested slot " "already exists.\n" " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", @@ -723,7 +722,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) " requested slot: %u\n", memslot); fputs("---- vm dump ----\n", stderr); vm_dump(stderr, vm, 2); - TEST_ASSERT(false, "Mem region not found"); + TEST_FAIL("Mem region not found"); } return region; @@ -841,7 +840,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) /* Confirm a vcpu with the specified id doesn't already exist. */ vcpu = vcpu_find(vm, vcpuid); if (vcpu != NULL) - TEST_ASSERT(false, "vcpu with the specified id " + TEST_FAIL("vcpu with the specified id " "already exists,\n" " requested vcpuid: %u\n" " existing vcpuid: %u state: %p", @@ -934,8 +933,7 @@ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, } while (pgidx_start != 0); no_va_found: - TEST_ASSERT(false, "No vaddr of specified pages available, " - "pages: 0x%lx", pages); + TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); /* NOT REACHED */ return -1; @@ -1070,7 +1068,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) + (gpa - region->region.guest_phys_addr)); } - TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa); + TEST_FAIL("No vm physical memory at 0x%lx", gpa); return NULL; } @@ -1104,8 +1102,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) + (hva - (uintptr_t) region->host_mem)); } - TEST_ASSERT(false, "No mapping to a guest physical address, " - "hva: %p", hva); + TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); return -1; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 7ce067c8a05d..f6eb34eaa0d2 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -506,8 +506,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); unmapped_gva: - TEST_ASSERT(false, "No mapping for vm virtual address, " - "gva: 0x%lx", gva); + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); exit(EXIT_FAILURE); } @@ -564,7 +563,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m break; default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } sregs.cr3 = vm->pgd; diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index a646843137c7..140e91901582 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -101,12 +101,12 @@ int main(int argc, char *argv[]) vcpu_sregs_set(vm, VCPU_ID, &sregs); break; case UCALL_ABORT: - TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); break; case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + TEST_FAIL("Unknown ucall %lu", uc.cmd); } } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 15241dc2ded0..3ce575af184c 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -109,15 +109,15 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); /* NOT REACHED */ case UCALL_SYNC: break; case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + TEST_FAIL("Unknown ucall %lu", uc.cmd); } /* UCALL_SYNC is handled here. */ diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index a4dc1ee59659..5b1a016edf55 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -152,15 +152,15 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); /* NOT REACHED */ case UCALL_SYNC: break; case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + TEST_FAIL("Unknown ucall %lu", uc.cmd); } /* UCALL_SYNC is handled here. */ diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c index 8cd841ff6305..0e1adb4e3199 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c @@ -61,15 +61,14 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s", - (const char *)uc.args[0]); + TEST_FAIL("%s", (const char *)uc.args[0]); /* NOT REACHED */ case UCALL_SYNC: break; case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall 0x%lx.", uc.cmd); + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); } } done: diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index cc17a3d67e1f..fe40ade06a49 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -78,10 +78,10 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s", (const char *)uc.args[0]); + TEST_FAIL("%s", (const char *)uc.args[0]); /* NOT REACHED */ default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + TEST_FAIL("Unknown ucall %lu", uc.cmd); } } } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 7a3228c80d2d..e894a638a155 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -125,8 +125,8 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); /* NOT REACHED */ case UCALL_SYNC: /* @@ -151,7 +151,7 @@ int main(int argc, char *argv[]) done = true; break; default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + TEST_FAIL("Unknown ucall %lu", uc.cmd); } } } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 5f46ffeedbf0..fbe8417cbc2c 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -150,7 +150,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: - TEST_ASSERT(false, "%s", (const char *)uc.args[0]); + TEST_FAIL("%s", (const char *)uc.args[0]); /* NOT REACHED */ case UCALL_SYNC: report(uc.args[1]); @@ -158,7 +158,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + TEST_FAIL("Unknown ucall %lu", uc.cmd); } } -- cgit v1.2.3 From 7bcf732e74e740fa1a535088e83d4e36f450e4b2 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 9 Mar 2020 16:52:14 +0100 Subject: KVM: selftests: define and use EVMCS_VERSION KVM allows to use revision_id from MSR_IA32_VMX_BASIC as eVMCS revision_id to workaround a bug in genuine Hyper-V (see the comment in nested_vmx_handle_enlightened_vmptrld()), this shouldn't be used by default. Switch to using KVM_EVMCS_VERSION(1). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/evmcs.h | 2 ++ tools/testing/selftests/kvm/lib/x86_64/vmx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h index 4912d23844bc..d8f4d6bfe05d 100644 --- a/tools/testing/selftests/kvm/include/evmcs.h +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -16,6 +16,8 @@ #define u32 uint32_t #define u64 uint64_t +#define EVMCS_VERSION 1 + extern bool enable_evmcs; struct hv_vp_assist_page { diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index ff0a657a42d3..6f17f69394be 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -191,7 +191,7 @@ bool load_vmcs(struct vmx_pages *vmx) if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, vmx->enlightened_vmcs)) return false; - current_evmcs->revision_id = vmcs_revision(); + current_evmcs->revision_id = EVMCS_VERSION; } return true; -- cgit v1.2.3 From 41b0552aa6931a633589f76981fad7850f3ecfe5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 9 Mar 2020 16:52:15 +0100 Subject: KVM: selftests: test enlightened vmenter with wrong eVMCS version Check that VMfailInvalid happens when eVMCS revision is is invalid. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 3ce575af184c..17da5792c2f7 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -21,10 +21,10 @@ void l2_guest_code(void) { - GUEST_SYNC(6); - GUEST_SYNC(7); + GUEST_SYNC(8); + /* Done, exit to L1 and never come back. */ vmcall(); } @@ -50,12 +50,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_SYNC(5); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + current_evmcs->revision_id = -1u; + GUEST_ASSERT(vmlaunch()); + current_evmcs->revision_id = EVMCS_VERSION; + GUEST_SYNC(6); + GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); - GUEST_SYNC(8); + GUEST_SYNC(9); GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_SYNC(9); + GUEST_SYNC(10); } void guest_code(struct vmx_pages *vmx_pages) -- cgit v1.2.3 From 6d05a965addbea9c95eed7ab66594fd4fdf33e4c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 9 Mar 2020 16:52:16 +0100 Subject: KVM: selftests: enlightened VMPTRLD with an incorrect GPA Check that guest doesn't hang when an invalid eVMCS GPA is specified. Testing that #UD is injected would probably be better but selftests lack the infrastructure currently. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 17da5792c2f7..e6e62e5e75b2 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -72,6 +72,10 @@ void guest_code(struct vmx_pages *vmx_pages) l1_guest_code(vmx_pages); GUEST_DONE(); + + /* Try enlightened vmptrld with an incorrect GPA */ + evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); + GUEST_ASSERT(vmlaunch()); } int main(int argc, char *argv[]) @@ -120,7 +124,7 @@ int main(int argc, char *argv[]) case UCALL_SYNC: break; case UCALL_DONE: - goto done; + goto part1_done; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } @@ -152,6 +156,10 @@ int main(int argc, char *argv[]) (ulong) regs2.rdi, (ulong) regs2.rsi); } -done: +part1_done: + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Unexpected successful VMEnter with invalid eVMCS pointer!"); + kvm_vm_free(vm); } -- cgit v1.2.3 From 47bf235f324c696395c30541fe4fcf99fcd24188 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:00:09 +0000 Subject: selftests/powerpc: Add tlbie_test in .gitignore The commit identified below added tlbie_test but forgot to add it in .gitignore. Fixes: 93cad5f78995 ("selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/259f9c06ed4563c4fa4fa8ffa652347278d769e7.1582847784.git.christophe.leroy@c-s.fr --- tools/testing/selftests/powerpc/mm/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 0ebeaea22641..97f7922c52c5 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -6,3 +6,4 @@ segv_errors wild_bctr large_vm_fork_separation bad_accesses +tlbie_test -- cgit v1.2.3 From cc9864a7aad9bc952d89f80dd0404a44ca591c2d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 17:18:34 -0700 Subject: selftest/bpf: Fix compilation warning in sockmap_parse_prog.c Remove unused len variable, which causes compilation warnings. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200314001834.3727680-1-andriin@fb.com --- tools/testing/selftests/bpf/progs/sockmap_parse_prog.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index a5c6d5903b22..ca283af80d4e 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -12,7 +12,6 @@ int bpf_prog1(struct __sk_buff *skb) __u32 lport = skb->local_port; __u32 rport = skb->remote_port; __u8 *d = data; - __u32 len = (__u32) data_end - (__u32) data; int err; if (data + 10 > data_end) { -- cgit v1.2.3 From 41078907eef0dd508d0999996d10683ae114c21a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 17:27:43 -0700 Subject: selftests/bpf: Fix nanosleep for real this time Amazingly, some libc implementations don't call __NR_nanosleep syscall from their nanosleep() APIs. Hammer it down with explicit syscall() call and never get back to it again. Also simplify code for timespec initialization. I verified that nanosleep is called w/ printk and in exactly same Linux image that is used in Travis CI. So it should both sleep and call correct syscall. v1->v2: - math is too hard, fix usec -> nsec convertion (Martin); - test_vmlinux has explicit nanosleep() call, convert that one as well. Fixes: 4e1fd25d19e8 ("selftests/bpf: Fix usleep() implementation") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200314002743.3782677-1-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/vmlinux.c | 2 +- tools/testing/selftests/bpf/test_progs.c | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c index 04939eda1325..72310cfc6474 100644 --- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -11,7 +11,7 @@ static void nsleep() { struct timespec ts = { .tv_nsec = MY_TV_NSEC }; - (void)nanosleep(&ts, NULL); + (void)syscall(__NR_nanosleep, &ts, NULL); } void test_vmlinux(void) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index f85a06512541..dc12fd0de1c2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -35,16 +35,12 @@ struct prog_test_def { */ int usleep(useconds_t usec) { - struct timespec ts; - - if (usec > 999999) { - ts.tv_sec = usec / 1000000; - ts.tv_nsec = usec % 1000000; - } else { - ts.tv_sec = 0; - ts.tv_nsec = usec; - } - return nanosleep(&ts, NULL); + struct timespec ts = { + .tv_sec = usec / 1000000, + .tv_nsec = (usec % 1000000) * 1000, + }; + + return syscall(__NR_nanosleep, &ts, NULL); } static bool should_run(struct test_selector *sel, int num, const char *name) -- cgit v1.2.3 From 94c2f50b61258381abcd3a33fd5f2bc87e26df11 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 18:39:30 -0700 Subject: selftests/bpf: Fix race in tcp_rtt test Previous attempt to make tcp_rtt more robust introduced a new race, in which server_done might be set to true before server can actually accept any connection. Fix this by unconditionally waiting for accept(). Given socket is non-blocking, if there are any problems with client side, it should eventually close listening FD and let server thread exit with failure. Fixes: 4cd729fa022c ("selftests/bpf: Make tcp_rtt test more robust to failures") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200314013932.4035712-1-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index e08f6bb17700..e56b52ab41da 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -226,7 +226,7 @@ static void *server_thread(void *arg) return ERR_PTR(err); } - while (!server_done) { + while (true) { client_fd = accept(fd, (struct sockaddr *)&addr, &len); if (client_fd == -1 && errno == EAGAIN) { usleep(50); @@ -272,7 +272,7 @@ void test_tcp_rtt(void) CHECK_FAIL(run_test(cgroup_fd, server_fd)); server_done = true; - pthread_join(tid, &server_res); + CHECK_FAIL(pthread_join(tid, &server_res)); CHECK_FAIL(IS_ERR(server_res)); close_server_fd: -- cgit v1.2.3 From fc32490bff855a539d253c8a52c5a1ba51d1325a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 18:39:31 -0700 Subject: selftests/bpf: Fix test_progs's parsing of test numbers When specifying disjoint set of tests, test_progs doesn't set skipped test's array elements to false. This leads to spurious execution of tests that should have been skipped. Fix it by explicitly initializing them to false. Fixes: 3a516a0a3a7b ("selftests/bpf: add sub-tests support for test_progs") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200314013932.4035712-2-andriin@fb.com --- tools/testing/selftests/bpf/test_progs.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index dc12fd0de1c2..c8cb407482c6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -424,7 +424,7 @@ err: int parse_num_list(const char *s, struct test_selector *sel) { - int i, set_len = 0, num, start = 0, end = -1; + int i, set_len = 0, new_len, num, start = 0, end = -1; bool *set = NULL, *tmp, parsing_end = false; char *next; @@ -459,18 +459,19 @@ int parse_num_list(const char *s, struct test_selector *sel) return -EINVAL; if (end + 1 > set_len) { - set_len = end + 1; - tmp = realloc(set, set_len); + new_len = end + 1; + tmp = realloc(set, new_len); if (!tmp) { free(set); return -ENOMEM; } + for (i = set_len; i < start; i++) + tmp[i] = false; set = tmp; + set_len = new_len; } - for (i = start; i <= end; i++) { + for (i = start; i <= end; i++) set[i] = true; - } - } if (!set) -- cgit v1.2.3 From fd27b1835e7079d26809cd4d33ba98f84e593766 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Mar 2020 18:39:32 -0700 Subject: selftests/bpf: Reset process and thread affinity after each test/sub-test Some tests and sub-tests are setting "custom" thread/process affinity and don't reset it back. Instead of requiring each test to undo all this, ensure that thread affinity is restored by test_progs test runner itself. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200314013932.4035712-3-andriin@fb.com --- tools/testing/selftests/bpf/test_progs.c | 42 +++++++++++++++++++++++++++++++- tools/testing/selftests/bpf/test_progs.h | 1 + 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c8cb407482c6..b521e0a512b6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2017 Facebook */ +#define _GNU_SOURCE #include "test_progs.h" #include "cgroup_helpers.h" #include "bpf_rlimit.h" #include -#include +#include +#include #include +#include #include /* backtrace */ /* defined in test_progs.h */ @@ -90,6 +93,34 @@ static void skip_account(void) } } +static void stdio_restore(void); + +/* A bunch of tests set custom affinity per-thread and/or per-process. Reset + * it after each test/sub-test. + */ +static void reset_affinity() { + + cpu_set_t cpuset; + int i, err; + + CPU_ZERO(&cpuset); + for (i = 0; i < env.nr_cpus; i++) + CPU_SET(i, &cpuset); + + err = sched_setaffinity(0, sizeof(cpuset), &cpuset); + if (err < 0) { + stdio_restore(); + fprintf(stderr, "Failed to reset process affinity: %d!\n", err); + exit(-1); + } + err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); + if (err < 0) { + stdio_restore(); + fprintf(stderr, "Failed to reset thread affinity: %d!\n", err); + exit(-1); + } +} + void test__end_subtest() { struct prog_test_def *test = env.test; @@ -107,6 +138,8 @@ void test__end_subtest() test->test_num, test->subtest_num, test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); + reset_affinity(); + free(test->subtest_name); test->subtest_name = NULL; } @@ -679,6 +712,12 @@ int main(int argc, char **argv) srand(time(NULL)); env.jit_enabled = is_jit_enabled(); + env.nr_cpus = libbpf_num_possible_cpus(); + if (env.nr_cpus < 0) { + fprintf(stderr, "Failed to get number of CPUs: %d!\n", + env.nr_cpus); + return -1; + } stdio_hijack(); for (i = 0; i < prog_test_cnt; i++) { @@ -709,6 +748,7 @@ int main(int argc, char **argv) test->test_num, test->test_name, test->error_cnt ? "FAIL" : "OK"); + reset_affinity(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index fd85fa61dbf7..f4aff6b8284b 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -71,6 +71,7 @@ struct test_env { FILE *stderr; char *log_buf; size_t log_cnt; + int nr_cpus; int succ_cnt; /* successful tests */ int sub_succ_cnt; /* successful sub-tests */ -- cgit v1.2.3 From 483d7a30f538e2f8addd32aa9a3d2e94ae55fa65 Mon Sep 17 00:00:00 2001 From: Wenbo Zhang Date: Sun, 15 Mar 2020 04:32:52 -0400 Subject: bpf, libbpf: Fix ___bpf_kretprobe_args1(x) macro definition Use PT_REGS_RC instead of PT_REGS_RET to get ret correctly. Fixes: df8ff35311c8 ("libbpf: Merge selftests' bpf_trace_helpers.h into libbpf's bpf_tracing.h") Signed-off-by: Wenbo Zhang Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200315083252.22274-1-ethercflow@gmail.com --- tools/lib/bpf/bpf_tracing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index b0c9ae5c73b5..f3f3c3fb98cb 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -390,7 +390,7 @@ ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx #define ___bpf_kretprobe_args1(x) \ - ___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx) + ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) #define ___bpf_kretprobe_args(args...) \ ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) -- cgit v1.2.3 From 3b7a15b0643d42e4dca78c5aed8f1ad209a3d1ab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 12 Mar 2020 22:31:29 -0700 Subject: perf tools: Give synthetic mmap events an inode generation When mmap2 events are synthesized the ino_generation field isn't being set leading to uninitialized memory being compared. Caught with clang's -fsanitize=memory: ==124733==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x55a96a6a65cc in __dso_id__cmp tools/perf/util/dsos.c:23:6 #1 0x55a96a6a81d5 in dso_id__cmp tools/perf/util/dsos.c:38:9 #2 0x55a96a6a717f in __dso__cmp_long_name tools/perf/util/dsos.c:74:15 #3 0x55a96a6a6c4c in __dsos__findnew_link_by_longname_id tools/perf/util/dsos.c:106:12 #4 0x55a96a6a851e in __dsos__findnew_by_longname_id tools/perf/util/dsos.c:178:9 #5 0x55a96a6a7798 in __dsos__find_id tools/perf/util/dsos.c:191:9 #6 0x55a96a6a7b57 in __dsos__findnew_id tools/perf/util/dsos.c:251:20 #7 0x55a96a6a7a57 in dsos__findnew_id tools/perf/util/dsos.c:259:17 #8 0x55a96a7776ae in machine__findnew_dso_id tools/perf/util/machine.c:2709:9 #9 0x55a96a77dfcf in map__new tools/perf/util/map.c:193:10 #10 0x55a96a77240a in machine__process_mmap2_event tools/perf/util/machine.c:1670:8 #11 0x55a96a7741a3 in machine__process_event tools/perf/util/machine.c:1882:9 #12 0x55a96a6aee39 in perf_event__process tools/perf/util/event.c:454:9 #13 0x55a96a87d633 in perf_tool__process_synth_event tools/perf/util/synthetic-events.c:63:9 #14 0x55a96a87f131 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:403:7 #15 0x55a96a8815d6 in __event__synthesize_thread tools/perf/util/synthetic-events.c:548:9 #16 0x55a96a882bff in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:681:3 #17 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #18 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #19 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #20 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #21 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #22 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 #23 0x55a96a52966e in __cmd_test tools/perf/tests/builtin-test.c:603:4 #24 0x55a96a52855d in cmd_test tools/perf/tests/builtin-test.c:747:9 #25 0x55a96a2844d4 in run_builtin tools/perf/perf.c:312:11 #26 0x55a96a282bd0 in handle_internal_command tools/perf/perf.c:364:8 #27 0x55a96a284097 in run_argv tools/perf/perf.c:408:2 #28 0x55a96a282223 in main tools/perf/perf.c:538:3 Uninitialized value was stored to memory at #1 0x55a96a6a18f7 in dso__new_id tools/perf/util/dso.c:1230:14 #2 0x55a96a6a78ee in __dsos__addnew_id tools/perf/util/dsos.c:233:20 #3 0x55a96a6a7bcc in __dsos__findnew_id tools/perf/util/dsos.c:252:21 #4 0x55a96a6a7a57 in dsos__findnew_id tools/perf/util/dsos.c:259:17 #5 0x55a96a7776ae in machine__findnew_dso_id tools/perf/util/machine.c:2709:9 #6 0x55a96a77dfcf in map__new tools/perf/util/map.c:193:10 #7 0x55a96a77240a in machine__process_mmap2_event tools/perf/util/machine.c:1670:8 #8 0x55a96a7741a3 in machine__process_event tools/perf/util/machine.c:1882:9 #9 0x55a96a6aee39 in perf_event__process tools/perf/util/event.c:454:9 #10 0x55a96a87d633 in perf_tool__process_synth_event tools/perf/util/synthetic-events.c:63:9 #11 0x55a96a87f131 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:403:7 #12 0x55a96a8815d6 in __event__synthesize_thread tools/perf/util/synthetic-events.c:548:9 #13 0x55a96a882bff in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:681:3 #14 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #15 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #16 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #17 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #18 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #19 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 Uninitialized value was stored to memory at #0 0x55a96a7725af in machine__process_mmap2_event tools/perf/util/machine.c:1646:25 #1 0x55a96a7741a3 in machine__process_event tools/perf/util/machine.c:1882:9 #2 0x55a96a6aee39 in perf_event__process tools/perf/util/event.c:454:9 #3 0x55a96a87d633 in perf_tool__process_synth_event tools/perf/util/synthetic-events.c:63:9 #4 0x55a96a87f131 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:403:7 #5 0x55a96a8815d6 in __event__synthesize_thread tools/perf/util/synthetic-events.c:548:9 #6 0x55a96a882bff in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:681:3 #7 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #8 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #9 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #10 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #11 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #12 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 #13 0x55a96a52966e in __cmd_test tools/perf/tests/builtin-test.c:603:4 #14 0x55a96a52855d in cmd_test tools/perf/tests/builtin-test.c:747:9 #15 0x55a96a2844d4 in run_builtin tools/perf/perf.c:312:11 #16 0x55a96a282bd0 in handle_internal_command tools/perf/perf.c:364:8 #17 0x55a96a284097 in run_argv tools/perf/perf.c:408:2 #18 0x55a96a282223 in main tools/perf/perf.c:538:3 Uninitialized value was created by a heap allocation #0 0x55a96a22f60d in malloc llvm/llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:925:3 #1 0x55a96a882948 in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:655:15 #2 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #3 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #4 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #5 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #6 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #7 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 #8 0x55a96a52966e in __cmd_test tools/perf/tests/builtin-test.c:603:4 #9 0x55a96a52855d in cmd_test tools/perf/tests/builtin-test.c:747:9 #10 0x55a96a2844d4 in run_builtin tools/perf/perf.c:312:11 #11 0x55a96a282bd0 in handle_internal_command tools/perf/perf.c:364:8 #12 0x55a96a284097 in run_argv tools/perf/perf.c:408:2 #13 0x55a96a282223 in main tools/perf/perf.c:538:3 SUMMARY: MemorySanitizer: use-of-uninitialized-value tools/perf/util/dsos.c:23:6 in __dso_id__cmp Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200313053129.131264-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index dd3e6f43fb86..3f28af39f9c6 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -345,6 +345,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, continue; event->mmap2.ino = (u64)ino; + event->mmap2.ino_generation = 0; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c -- cgit v1.2.3 From c3b10649a80e9da2892c1fd3038c53abd57588f6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 13 Mar 2020 21:46:07 +0800 Subject: perf report: Fix no branch type statistics report issue Previously we could get the report of branch type statistics. For example: # perf record -j any,save_type ... # t perf report --stdio # # Branch Statistics: # COND_FWD: 40.6% COND_BWD: 4.1% CROSS_4K: 24.7% CROSS_2M: 12.3% COND: 44.7% UNCOND: 0.0% IND: 6.1% CALL: 24.5% RET: 24.7% But now for the recent perf, it can't report the branch type statistics. It's a regression issue caused by commit 40c39e304641 ("perf report: Fix a no annotate browser displayed issue"), which only counts the branch type statistics for browser mode. This patch moves the branch_type_count() outside of ui__has_annotation() checking, then branch type statistics can work for stdio mode. Fixes: 40c39e304641 ("perf report: Fix a no annotate browser displayed issue") Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200313134607.12873-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d7c905f7520f..5f4045df76f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -186,24 +186,23 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, { struct hist_entry *he = iter->he; struct report *rep = arg; - struct branch_info *bi; + struct branch_info *bi = he->branch_info; struct perf_sample *sample = iter->sample; struct evsel *evsel = iter->evsel; int err; + branch_type_count(&rep->brtype_stat, &bi->flags, + bi->from.addr, bi->to.addr); + if (!ui__has_annotation() && !rep->symbol_ipc) return 0; - bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); - branch_type_count(&rep->brtype_stat, &bi->flags, - bi->from.addr, bi->to.addr); - out: return err; } -- cgit v1.2.3 From 59a08b4b3b1a9374adacd13cd7544c03e5582e0e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Mar 2020 16:56:09 +0100 Subject: perf expr: Fix copy/paste mistake Copy/paste leftover from recent refactor. Fixes: 26226a97724d ("perf expr: Move expr lexer to flex") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200315155609.603948-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 1928f2a3dddc..eaad29243c23 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -79,10 +79,10 @@ symbol {spec}*{sym}*{spec}*{sym}* { int start_token; - start_token = parse_events_get_extra(yyscanner); + start_token = expr_get_extra(yyscanner); if (start_token) { - parse_events_set_extra(NULL, yyscanner); + expr_set_extra(NULL, yyscanner); return start_token; } } -- cgit v1.2.3 From bfcaa84975fa0c75deca3e997533aaa35ffed12b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 16 Mar 2020 18:37:03 +0100 Subject: KVM: selftests: Rework timespec functions and usage The steal_time test's timespec stop condition was wrong and should have used the timespec functions instead to avoid being wrong, but timespec_diff had a strange interface. Rework all the timespec API and its use. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 37 ++++++++++-------------- tools/testing/selftests/kvm/include/test_util.h | 3 +- tools/testing/selftests/kvm/lib/test_util.c | 37 ++++++++++-------------- tools/testing/selftests/kvm/steal_time.c | 2 +- 4 files changed, 35 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index d82f7bc060c3..360cd3ea4cd6 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -117,8 +117,7 @@ static void *vcpu_worker(void *data) struct kvm_vm *vm = args->vm; int vcpu_id = args->vcpu_id; struct kvm_run *run; - struct timespec start; - struct timespec end; + struct timespec start, end, ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -135,9 +134,9 @@ static void *vcpu_worker(void *data) } clock_gettime(CLOCK_MONOTONIC, &end); - PER_VCPU_DEBUG("vCPU %d execution time: %lld.%.9lds\n", vcpu_id, - (long long)(timespec_diff(start, end).tv_sec), - timespec_diff(start, end).tv_nsec); + ts_diff = timespec_sub(end, start); + PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, + ts_diff.tv_sec, ts_diff.tv_nsec); return NULL; } @@ -201,8 +200,8 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) clock_gettime(CLOCK_MONOTONIC, &end); - PER_PAGE_DEBUG("UFFDIO_COPY %d \t%lld ns\n", tid, - (long long)timespec_to_ns(timespec_diff(start, end))); + PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, + timespec_to_ns(timespec_sub(end, start))); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", host_page_size, addr, tid); @@ -224,8 +223,7 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start; - struct timespec end; + struct timespec start, end, ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -295,11 +293,10 @@ static void *uffd_handler_thread_fn(void *arg) } clock_gettime(CLOCK_MONOTONIC, &end); - PER_VCPU_DEBUG("userfaulted %ld pages over %lld.%.9lds. (%f/sec)\n", - pages, (long long)(timespec_diff(start, end).tv_sec), - timespec_diff(start, end).tv_nsec, pages / - ((double)timespec_diff(start, end).tv_sec + - (double)timespec_diff(start, end).tv_nsec / 100000000.0)); + ts_diff = timespec_sub(end, start); + PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", + pages, ts_diff.tv_sec, ts_diff.tv_nsec, + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); return NULL; } @@ -360,13 +357,12 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; + struct timespec start, end, ts_diff; int *pipefds = NULL; struct kvm_vm *vm; uint64_t guest_num_pages; int vcpu_id; int r; - struct timespec start; - struct timespec end; vm = create_vm(mode, vcpus, vcpu_memory_bytes); @@ -514,12 +510,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - pr_info("Total guest execution time: %lld.%.9lds\n", - (long long)(timespec_diff(start, end).tv_sec), - timespec_diff(start, end).tv_nsec); + ts_diff = timespec_sub(end, start); + pr_info("Total guest execution time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)timespec_diff(start, end).tv_sec + - (double)timespec_diff(start, end).tv_nsec / 100000000.0)); + guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index f588ad1403f1..5eb01bf51b86 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -61,7 +61,8 @@ void test_assert(bool exp, const char *exp_str, size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); -struct timespec timespec_diff(struct timespec start, struct timespec end); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); +struct timespec timespec_add(struct timespec ts1, struct timespec ts2); +struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index ee12c4b9ae05..689e97c27ee2 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -56,36 +56,31 @@ int64_t timespec_to_ns(struct timespec ts) return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec; } -struct timespec timespec_diff(struct timespec start, struct timespec end) -{ - struct timespec temp; - - if ((end.tv_nsec - start.tv_nsec) < 0) { - temp.tv_sec = end.tv_sec - start.tv_sec - 1; - temp.tv_nsec = 1000000000LL + end.tv_nsec - start.tv_nsec; - } else { - temp.tv_sec = end.tv_sec - start.tv_sec; - temp.tv_nsec = end.tv_nsec - start.tv_nsec; - } - - return temp; -} - struct timespec timespec_add_ns(struct timespec ts, int64_t ns) { struct timespec res; - res.tv_sec = ts.tv_sec; res.tv_nsec = ts.tv_nsec + ns; - - if (res.tv_nsec > 1000000000UL) { - res.tv_sec += 1; - res.tv_nsec -= 1000000000UL; - } + res.tv_sec = ts.tv_sec + res.tv_nsec / 1000000000LL; + res.tv_nsec %= 1000000000LL; return res; } +struct timespec timespec_add(struct timespec ts1, struct timespec ts2) +{ + int64_t ns1 = timespec_to_ns(ts1); + int64_t ns2 = timespec_to_ns(ts2); + return timespec_add_ns((struct timespec){0}, ns1 + ns2); +} + +struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) +{ + int64_t ns1 = timespec_to_ns(ts1); + int64_t ns2 = timespec_to_ns(ts2); + return timespec_add_ns((struct timespec){0}, ns1 - ns2); +} + void print_skip(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index f976ac5e896a..fcc840088c91 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -242,7 +242,7 @@ static void *do_steal_time(void *arg) while (1) { clock_gettime(CLOCK_MONOTONIC, &ts); - if (ts.tv_sec > stop.tv_sec || ts.tv_nsec >= stop.tv_nsec) + if (timespec_to_ns(timespec_sub(ts, stop)) >= 0) break; } -- cgit v1.2.3 From 4d21ed2e3d6a7ca24afc531f693ebefa2549218d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Mar 2020 15:48:50 +0200 Subject: selftests: spectrum-2: Adjust tc_flower_scale limit according to current counter count With the change that made the code to query counter bank size from device instead of using hard-coded value, the number of available counters changed for Spectrum-2. Adjust the limit in the selftests. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index a0795227216e..efd798a85931 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -8,9 +8,9 @@ tc_flower_get_target() # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 12K (12,288) flow counters and six of + # Currently, the driver supports 30K (30,720) flow counters and six of # these are used for multicast routing. - local target=12282 + local target=30714 if ((! should_fail)); then echo $target -- cgit v1.2.3 From ee4848ac1a8a6c1c167a399efe4114f717b3e921 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Mar 2020 15:48:57 +0200 Subject: selftests: mlxsw: Add tc action hw_stats tests Add tests for mlxsw hw_stats types. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/tc_action_hw_stats.sh | 130 +++++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 9 ++ 2 files changed, 139 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh new file mode 100755 index 000000000000..20ed98fe5a60 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + default_hw_stats_test + immediate_hw_stats_test + delayed_hw_stats_test + disabled_hw_stats_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +hw_stats_test() +{ + RET=0 + + local name=$1 + local action_hw_stats=$2 + local occ_delta=$3 + local expected_packet_count=$4 + + local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats + check_err $? "Failed to add rule with $name hw_stats" + + local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + local expected_occ=$((orig_occ + occ_delta)) + [ "$new_occ" == "$expected_occ" ] + check_err $? "Expected occupancy of $expected_occ, got $new_occ" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count + check_err $? "Did not match incoming packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "$name hw_stats" +} + +default_hw_stats_test() +{ + hw_stats_test "default" "" 2 1 +} + +immediate_hw_stats_test() +{ + hw_stats_test "immediate" "hw_stats immediate" 2 1 +} + +delayed_hw_stats_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed + check_fail $? "Unexpected success in adding rule with delayed hw_stats" + + log_test "delayed hw_stats" +} + +disabled_hw_stats_test() +{ + hw_stats_test "disabled" "hw_stats disabled" 0 0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + swp1mac=$(mac_get $swp1) + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +check_tc_action_hw_stats_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index a4a7879b3bb9..977fc2b326a2 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -60,6 +60,15 @@ check_tc_chain_support() fi } +check_tc_action_hw_stats_support() +{ + tc actions help 2>&1 | grep -q hw_stats + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing action hw_stats support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 -- cgit v1.2.3 From 166391159c5deb84795d2ff46e95f276177fa5fb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 18 Mar 2020 18:30:43 -0600 Subject: wireguard: selftests: remove duplicated include This commit removes a duplicated include. Signed-off-by: YueHaibing Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/qemu/init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c index 90bc9813cadc..c9698120ac9d 100644 --- a/tools/testing/selftests/wireguard/qemu/init.c +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 551599edbfff2431cef943a772fbde1c3e26eaf8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 18:30:44 -0600 Subject: wireguard: selftests: test using new 64-bit time_t In case this helps expose bugs with the newer 64-bit time_t types, we do our testing with the newer musl that supports this as well as CONFIG_COMPAT_32BIT_TIME=n. This matters to us, since wireguard does in fact deal with timestamps. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/qemu/Makefile | 2 +- tools/testing/selftests/wireguard/qemu/kernel.config | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 28d477683e8a..90598a425c18 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -41,7 +41,7 @@ $(DISTFILES_PATH)/$(1): flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef -$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) +$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index af9323a0b6e0..d531de13c95b 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -56,7 +56,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_HZ_PERIODIC=n CONFIG_HIGH_RES_TIMERS=y -CONFIG_COMPAT_32BIT_TIME=y CONFIG_ARCH_RANDOM=y CONFIG_FILE_LOCKING=y CONFIG_POSIX_TIMERS=y -- cgit v1.2.3 From 11a7686aa99c7fe4b3f80f6dcccd54129817984d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 18:30:47 -0600 Subject: wireguard: noise: error out precomputed DH during handshake rather than config We precompute the static-static ECDH during configuration time, in order to save an expensive computation later when receiving network packets. However, not all ECDH computations yield a contributory result. Prior, we were just not letting those peers be added to the interface. However, this creates a strange inconsistency, since it was still possible to add other weird points, like a valid public key plus a low-order point, and, like points that result in zeros, a handshake would not complete. In order to make the behavior more uniform and less surprising, simply allow all peers to be added. Then, we'll error out later when doing the crypto if there's an issue. This also adds more separation between the crypto layer and the configuration layer. Discussed-with: Mathias Hall-Andersen Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/netlink.c | 8 ++--- drivers/net/wireguard/noise.c | 55 ++++++++++++++++-------------- drivers/net/wireguard/noise.h | 12 +++---- drivers/net/wireguard/peer.c | 7 ++-- tools/testing/selftests/wireguard/netns.sh | 15 +++++--- 5 files changed, 49 insertions(+), 48 deletions(-) (limited to 'tools') diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index bda26405497c..802099c8828a 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -411,11 +411,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs) peer = wg_peer_create(wg, public_key, preshared_key); if (IS_ERR(peer)) { - /* Similar to the above, if the key is invalid, we skip - * it without fanfare, so that services don't need to - * worry about doing key validation themselves. - */ - ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); + ret = PTR_ERR(peer); peer = NULL; goto out; } @@ -569,7 +565,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) private_key); list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { - BUG_ON(!wg_noise_precompute_static_static(peer)); + wg_noise_precompute_static_static(peer); wg_noise_expire_current_peer_keypairs(peer); } wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index 919d9d866446..708dc61c974f 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -44,32 +44,23 @@ void __init wg_noise_init(void) } /* Must hold peer->handshake.static_identity->lock */ -bool wg_noise_precompute_static_static(struct wg_peer *peer) +void wg_noise_precompute_static_static(struct wg_peer *peer) { - bool ret; - down_write(&peer->handshake.lock); - if (peer->handshake.static_identity->has_identity) { - ret = curve25519( - peer->handshake.precomputed_static_static, + if (!peer->handshake.static_identity->has_identity || + !curve25519(peer->handshake.precomputed_static_static, peer->handshake.static_identity->static_private, - peer->handshake.remote_static); - } else { - u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; - - ret = curve25519(empty, empty, peer->handshake.remote_static); + peer->handshake.remote_static)) memset(peer->handshake.precomputed_static_static, 0, NOISE_PUBLIC_KEY_LEN); - } up_write(&peer->handshake.lock); - return ret; } -bool wg_noise_handshake_init(struct noise_handshake *handshake, - struct noise_static_identity *static_identity, - const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], - const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], - struct wg_peer *peer) +void wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer) { memset(handshake, 0, sizeof(*handshake)); init_rwsem(&handshake->lock); @@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake, NOISE_SYMMETRIC_KEY_LEN); handshake->static_identity = static_identity; handshake->state = HANDSHAKE_ZEROED; - return wg_noise_precompute_static_static(peer); + wg_noise_precompute_static_static(peer); } static void handshake_zero(struct noise_handshake *handshake) @@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], return true; } +static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 precomputed[NOISE_PUBLIC_KEY_LEN]) +{ + static u8 zero_point[NOISE_PUBLIC_KEY_LEN]; + if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN))) + return false; + kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + chaining_key); + return true; +} + static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) { struct blake2s_state blake; @@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, NOISE_PUBLIC_KEY_LEN, key, handshake->hash); /* ss */ - kdf(handshake->chaining_key, key, NULL, - handshake->precomputed_static_static, NOISE_HASH_LEN, - NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, - handshake->chaining_key); + if (!mix_precomputed_dh(handshake->chaining_key, key, + handshake->precomputed_static_static)) + goto out; /* {t} */ tai64n_now(timestamp); @@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, handshake = &peer->handshake; /* ss */ - kdf(chaining_key, key, NULL, handshake->precomputed_static_static, - NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, - chaining_key); + if (!mix_precomputed_dh(chaining_key, key, + handshake->precomputed_static_static)) + goto out; /* {t} */ if (!message_decrypt(t, src->encrypted_timestamp, diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h index 138a07bb817c..f532d59d3f19 100644 --- a/drivers/net/wireguard/noise.h +++ b/drivers/net/wireguard/noise.h @@ -94,11 +94,11 @@ struct noise_handshake { struct wg_device; void wg_noise_init(void); -bool wg_noise_handshake_init(struct noise_handshake *handshake, - struct noise_static_identity *static_identity, - const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], - const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], - struct wg_peer *peer); +void wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer); void wg_noise_handshake_clear(struct noise_handshake *handshake); static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) { @@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); void wg_noise_set_static_identity_private_key( struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]); -bool wg_noise_precompute_static_static(struct wg_peer *peer); +void wg_noise_precompute_static_static(struct wg_peer *peer); bool wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c index 071eedf33f5a..1d634bd3038f 100644 --- a/drivers/net/wireguard/peer.c +++ b/drivers/net/wireguard/peer.c @@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, return ERR_PTR(ret); peer->device = wg; - if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, - public_key, preshared_key, peer)) { - ret = -EKEYREJECTED; - goto err_1; - } + wg_noise_handshake_init(&peer->handshake, &wg->static_identity, + public_key, preshared_key, peer); if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) goto err_1; if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 138d46b3f330..936e1ca9410e 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 n0 wg set wg0 peer "$pub2" allowed-ips ::/0 n0 wg set wg0 peer "$pub2" remove -low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) -n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } -[[ -z $(n0 wg show wg0 peers) ]] -n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } -[[ -z $(n0 wg show wg0 peers) ]] +for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do + n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111 +done +[[ -n $(n0 wg show wg0 peers) ]] +exec 4< <(n0 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns0 $ncat_pid +ip0 link set wg0 up +! read -r -n 1 -t 2 <&4 || false +kill $ncat_pid ip0 link del wg0 declare -A objects -- cgit v1.2.3 From 6e29225af902e46c97c1e5b6f3dd8d86490593a8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Mar 2020 16:24:18 +0100 Subject: binderfs: port tests to test harness infrastructure Makes for nicer output and prepares for additional tests. Signed-off-by: Christian Brauner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200313152420.138777-1-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman --- .../selftests/filesystems/binderfs/Makefile | 2 ++ .../selftests/filesystems/binderfs/binderfs_test.c | 31 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile index 58cb659b56b4..75315d9ba7a9 100644 --- a/tools/testing/selftests/filesystems/binderfs/Makefile +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -3,4 +3,6 @@ CFLAGS += -I../../../../../usr/include/ TEST_GEN_PROGS := binderfs_test +binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h + include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 8c2ed962e1c7..0cfca65e095a 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -15,7 +15,9 @@ #include #include #include + #include "../../kselftest.h" +#include "../../kselftest_harness.h" static ssize_t write_nointr(int fd, const void *buf, size_t count) { @@ -132,7 +134,7 @@ static void rmdir_protect_errno(const char *dir) errno = saved_errno; } -static void __do_binderfs_test(void) +static int __do_binderfs_test(void) { int fd, ret, saved_errno; size_t len; @@ -160,8 +162,7 @@ static void __do_binderfs_test(void) strerror(errno)); keep ? : rmdir_protect_errno("/dev/binderfs"); - ksft_exit_skip( - "The Android binderfs filesystem is not available\n"); + return 1; } /* binderfs mount test passed */ @@ -250,26 +251,24 @@ on_error: /* binderfs unmount test passed */ ksft_inc_pass_cnt(); + return 0; } -static void binderfs_test_privileged() +TEST(binderfs_test_privileged) { if (geteuid() != 0) - ksft_print_msg( - "Tests are not run as root. Skipping privileged tests\n"); - else - __do_binderfs_test(); + XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + + if (__do_binderfs_test() == 1) + XFAIL(return, "The Android binderfs filesystem is not available"); } -static void binderfs_test_unprivileged() +TEST(binderfs_test_unprivileged) { change_to_userns(); - __do_binderfs_test(); -} -int main(int argc, char *argv[]) -{ - binderfs_test_privileged(); - binderfs_test_unprivileged(); - ksft_exit_pass(); + if (__do_binderfs_test() == 1) + XFAIL(return, "The Android binderfs filesystem is not available"); } + +TEST_HARNESS_MAIN -- cgit v1.2.3 From ad29ace2e26e88e14b1829c7cef89ac0ab06f08a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Mar 2020 16:24:19 +0100 Subject: binderfs_test: switch from /dev to a unique per-test mountpoint Unprivileged users will be able to create directories in there. The unprivileged test for /dev wouldn't have worked on most systems. Signed-off-by: Christian Brauner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200313152420.138777-2-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman --- .../selftests/filesystems/binderfs/binderfs_test.c | 47 +++++++++++----------- 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 0cfca65e095a..818eb49f8125 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -139,29 +139,25 @@ static int __do_binderfs_test(void) int fd, ret, saved_errno; size_t len; ssize_t wret; - bool keep = false; struct binderfs_device device = { 0 }; struct binder_version version = { 0 }; + char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", + device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME]; change_to_mountns(); - ret = mkdir("/dev/binderfs", 0755); - if (ret < 0) { - if (errno != EEXIST) - ksft_exit_fail_msg( - "%s - Failed to create binderfs mountpoint\n", - strerror(errno)); - - keep = true; - } + if (!mkdtemp(binderfs_mntpt)) + ksft_exit_fail_msg( + "%s - Failed to create binderfs mountpoint\n", + strerror(errno)); - ret = mount(NULL, "/dev/binderfs", "binder", 0, 0); + ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); if (ret < 0) { if (errno != ENODEV) ksft_exit_fail_msg("%s - Failed to mount binderfs\n", strerror(errno)); - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); return 1; } @@ -170,7 +166,8 @@ static int __do_binderfs_test(void) memcpy(device.name, "my-binder", strlen("my-binder")); - fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); + snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt); + fd = open(device_path, O_RDONLY | O_CLOEXEC); if (fd < 0) ksft_exit_fail_msg( "%s - Failed to open binder-control device\n", @@ -181,7 +178,7 @@ static int __do_binderfs_test(void) close(fd); errno = saved_errno; if (ret < 0) { - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); ksft_exit_fail_msg( "%s - Failed to allocate new binder device\n", strerror(errno)); @@ -194,9 +191,10 @@ static int __do_binderfs_test(void) /* binder device allocation test passed */ ksft_inc_pass_cnt(); - fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY); + snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt); + fd = open(device_path, O_CLOEXEC | O_RDONLY); if (fd < 0) { - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); ksft_exit_fail_msg("%s - Failed to open my-binder device\n", strerror(errno)); } @@ -206,7 +204,7 @@ static int __do_binderfs_test(void) close(fd); errno = saved_errno; if (ret < 0) { - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); ksft_exit_fail_msg( "%s - Failed to open perform BINDER_VERSION request\n", strerror(errno)); @@ -218,9 +216,9 @@ static int __do_binderfs_test(void) /* binder transaction with binderfs binder device passed */ ksft_inc_pass_cnt(); - ret = unlink("/dev/binderfs/my-binder"); + ret = unlink(device_path); if (ret < 0) { - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); ksft_exit_fail_msg("%s - Failed to delete binder device\n", strerror(errno)); } @@ -228,12 +226,13 @@ static int __do_binderfs_test(void) /* binder device removal passed */ ksft_inc_pass_cnt(); - ret = unlink("/dev/binderfs/binder-control"); + snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt); + ret = unlink(device_path); if (!ret) { - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); ksft_exit_fail_msg("Managed to delete binder-control device\n"); } else if (errno != EPERM) { - keep ? : rmdir_protect_errno("/dev/binderfs"); + rmdir_protect_errno(binderfs_mntpt); ksft_exit_fail_msg( "%s - Failed to delete binder-control device but exited with unexpected error code\n", strerror(errno)); @@ -243,8 +242,8 @@ static int __do_binderfs_test(void) ksft_inc_xfail_cnt(); on_error: - ret = umount2("/dev/binderfs", MNT_DETACH); - keep ?: rmdir_protect_errno("/dev/binderfs"); + ret = umount2(binderfs_mntpt, MNT_DETACH); + rmdir_protect_errno(binderfs_mntpt); if (ret < 0) ksft_exit_fail_msg("%s - Failed to unmount binderfs\n", strerror(errno)); -- cgit v1.2.3 From e48d117436082217a22bee58760f101ae1f48fee Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Mar 2020 16:24:20 +0100 Subject: binderfs: add stress test for binderfs binder devices This adds a stress test that should hopefully help us catch regressions for [1], [2], and [3]. [1]: 2669b8b0c798 ("binder: prevent UAF for binderfs devices") [2]: f0fe2c0f050d ("binder: prevent UAF for binderfs devices II") [3]: 211b64e4b5b6 ("binderfs: use refcount for binder control devices too") Signed-off-by: Christian Brauner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200313152420.138777-3-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman --- .../selftests/filesystems/binderfs/Makefile | 2 +- .../selftests/filesystems/binderfs/binderfs_test.c | 426 ++++++++++++++++----- 2 files changed, 334 insertions(+), 94 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile index 75315d9ba7a9..8af25ae96049 100644 --- a/tools/testing/selftests/filesystems/binderfs/Makefile +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../../usr/include/ +CFLAGS += -I../../../../../usr/include/ -pthread TEST_GEN_PROGS := binderfs_test binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 818eb49f8125..8a6b507e34a8 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -3,15 +3,20 @@ #define _GNU_SOURCE #include #include +#include #include #include #include #include #include +#include #include #include +#include #include +#include #include +#include #include #include #include @@ -19,100 +24,26 @@ #include "../../kselftest.h" #include "../../kselftest_harness.h" -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; -again: - ret = write(fd, buf, count); - if (ret < 0 && errno == EINTR) - goto again; - - return ret; -} - -static void write_to_file(const char *filename, const void *buf, size_t count, - int allowed_errno) -{ - int fd, saved_errno; - ssize_t ret; - - fd = open(filename, O_WRONLY | O_CLOEXEC); - if (fd < 0) - ksft_exit_fail_msg("%s - Failed to open file %s\n", - strerror(errno), filename); +#define DEFAULT_THREADS 4 - ret = write_nointr(fd, buf, count); - if (ret < 0) { - if (allowed_errno && (errno == allowed_errno)) { - close(fd); - return; - } +#define PTR_TO_INT(p) ((int)((intptr_t)(p))) +#define INT_TO_PTR(u) ((void *)((intptr_t)(u))) - goto on_error; +#define close_prot_errno_disarm(fd) \ + if (fd >= 0) { \ + int _e_ = errno; \ + close(fd); \ + errno = _e_; \ + fd = -EBADF; \ } - if ((size_t)ret != count) - goto on_error; - - close(fd); - return; - -on_error: - saved_errno = errno; - close(fd); - errno = saved_errno; - - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to write to file %s\n", - strerror(errno), filename); - - ksft_exit_fail_msg("Failed to write to file %s\n", filename); -} - -static void change_to_userns(void) -{ - int ret; - uid_t uid; - gid_t gid; - /* {g,u}id_map files only allow a max of 4096 bytes written to them */ - char idmap[4096]; - - uid = getuid(); - gid = getgid(); - - ret = unshare(CLONE_NEWUSER); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", - strerror(errno)); - - write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT); - - ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid); - if (ret < 0 || (size_t)ret >= sizeof(idmap)) - ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", - strerror(errno)); - - write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0); - - ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid); - if (ret < 0 || (size_t)ret >= sizeof(idmap)) - ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", - strerror(errno)); - - write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0); - - ret = setgid(0); - if (ret) - ksft_exit_fail_msg("%s - Failed to setgid(0)\n", - strerror(errno)); +#define log_exit(format, ...) \ + ({ \ + fprintf(stderr, format "\n", ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + }) - ret = setuid(0); - if (ret) - ksft_exit_fail_msg("%s - Failed to setgid(0)\n", - strerror(errno)); -} - -static void change_to_mountns(void) +static void change_mountns(void) { int ret; @@ -144,7 +75,7 @@ static int __do_binderfs_test(void) char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME]; - change_to_mountns(); + change_mountns(); if (!mkdtemp(binderfs_mntpt)) ksft_exit_fail_msg( @@ -253,6 +184,288 @@ on_error: return 0; } +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static int setid_userns_root(void) +{ + if (setuid(0)) + return -1; + if (setgid(0)) + return -1; + + setfsuid(0); + setfsgid(0); + + return 0; +} + +enum idmap_type { + UID_MAP, + GID_MAP, +}; + +static ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; +again: + ret = read(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf, + size_t buf_size) +{ + int fd; + int ret; + char path[4096]; + + if (type == GID_MAP) { + int setgroups_fd; + + snprintf(path, sizeof(path), "/proc/%d/setgroups", pid); + setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOFOLLOW); + if (setgroups_fd < 0 && errno != ENOENT) + return -1; + + if (setgroups_fd >= 0) { + ret = write_nointr(setgroups_fd, "deny", sizeof("deny") - 1); + close_prot_errno_disarm(setgroups_fd); + if (ret != sizeof("deny") - 1) + return -1; + } + } + + switch (type) { + case UID_MAP: + ret = snprintf(path, sizeof(path), "/proc/%d/uid_map", pid); + break; + case GID_MAP: + ret = snprintf(path, sizeof(path), "/proc/%d/gid_map", pid); + break; + default: + return -1; + } + if (ret < 0 || ret >= sizeof(path)) + return -E2BIG; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, buf_size); + close_prot_errno_disarm(fd); + if (ret != buf_size) + return -1; + + return 0; +} + +static void change_userns(int syncfds[2]) +{ + int ret; + char buf; + + close_prot_errno_disarm(syncfds[1]); + + ret = unshare(CLONE_NEWUSER); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", + strerror(errno)); + + ret = write_nointr(syncfds[0], "1", 1); + if (ret != 1) + ksft_exit_fail_msg("write_nointr() failed\n"); + + ret = read_nointr(syncfds[0], &buf, 1); + if (ret != 1) + ksft_exit_fail_msg("read_nointr() failed\n"); + + close_prot_errno_disarm(syncfds[0]); + + if (setid_userns_root()) + ksft_exit_fail_msg("setid_userns_root() failed"); +} + +static void change_idmaps(int syncfds[2], pid_t pid) +{ + int ret; + char buf; + char id_map[4096]; + + close_prot_errno_disarm(syncfds[0]); + + ret = read_nointr(syncfds[1], &buf, 1); + if (ret != 1) + ksft_exit_fail_msg("read_nointr() failed\n"); + + snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid()); + ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map)); + if (ret) + ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed"); + + snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid()); + ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map)); + if (ret) + ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed"); + + ret = write_nointr(syncfds[1], "1", 1); + if (ret != 1) + ksft_exit_fail_msg("write_nointr() failed"); + + close_prot_errno_disarm(syncfds[1]); +} + +static void *binder_version_thread(void *data) +{ + int fd = PTR_TO_INT(data); + struct binder_version version = { 0 }; + int ret; + + ret = ioctl(fd, BINDER_VERSION, &version); + if (ret < 0) + ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno)); + + pthread_exit(data); +} + +/* + * Regression test: + * 2669b8b0c798 ("binder: prevent UAF for binderfs devices") + * f0fe2c0f050d ("binder: prevent UAF for binderfs devices II") + * 211b64e4b5b6 ("binderfs: use refcount for binder control devices too") + */ +TEST(binderfs_stress) +{ + int fds[1000]; + int syncfds[2]; + pid_t pid; + int fd, ret; + size_t len; + struct binderfs_device device = { 0 }; + char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", + device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME]; + + ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno)); + + pid = fork(); + if (pid < 0) { + close_prot_errno_disarm(syncfds[0]); + close_prot_errno_disarm(syncfds[1]); + ksft_exit_fail_msg("%s - Failed to fork", strerror(errno)); + } + + if (pid == 0) { + int i, j, k, nthreads; + pthread_attr_t attr; + pthread_t threads[DEFAULT_THREADS]; + change_userns(syncfds); + change_mountns(); + + if (!mkdtemp(binderfs_mntpt)) + log_exit("%s - Failed to create binderfs mountpoint\n", + strerror(errno)); + + ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); + if (ret < 0) + log_exit("%s - Failed to mount binderfs\n", strerror(errno)); + + for (int i = 0; i < ARRAY_SIZE(fds); i++) { + + snprintf(device_path, sizeof(device_path), + "%s/binder-control", binderfs_mntpt); + fd = open(device_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + log_exit("%s - Failed to open binder-control device\n", strerror(errno)); + + memset(&device, 0, sizeof(device)); + snprintf(device.name, sizeof(device.name), "%d", i); + ret = ioctl(fd, BINDER_CTL_ADD, &device); + close_prot_errno_disarm(fd); + if (ret < 0) + log_exit("%s - Failed to allocate new binder device\n", strerror(errno)); + + snprintf(device_path, sizeof(device_path), "%s/%d", + binderfs_mntpt, i); + fds[i] = open(device_path, O_RDONLY | O_CLOEXEC); + if (fds[i] < 0) + log_exit("%s - Failed to open binder device\n", strerror(errno)); + } + + ret = umount2(binderfs_mntpt, MNT_DETACH); + rmdir_protect_errno(binderfs_mntpt); + if (ret < 0) + log_exit("%s - Failed to unmount binderfs\n", strerror(errno)); + + nthreads = get_nprocs_conf(); + if (nthreads > DEFAULT_THREADS) + nthreads = DEFAULT_THREADS; + + pthread_attr_init(&attr); + for (k = 0; k < ARRAY_SIZE(fds); k++) { + for (i = 0; i < nthreads; i++) { + ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k])); + if (ret) { + ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i); + break; + } + } + + for (j = 0; j < i; j++) { + void *fdptr = NULL; + + ret = pthread_join(threads[j], &fdptr); + if (ret) + ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr)); + } + } + pthread_attr_destroy(&attr); + + for (k = 0; k < ARRAY_SIZE(fds); k++) + close(fds[k]); + + exit(EXIT_SUCCESS); + } + + change_idmaps(syncfds, pid); + + ret = wait_for_pid(pid); + if (ret) + ksft_exit_fail_msg("wait_for_pid() failed"); +} + TEST(binderfs_test_privileged) { if (geteuid() != 0) @@ -264,10 +477,37 @@ TEST(binderfs_test_privileged) TEST(binderfs_test_unprivileged) { - change_to_userns(); + int ret; + int syncfds[2]; + pid_t pid; - if (__do_binderfs_test() == 1) - XFAIL(return, "The Android binderfs filesystem is not available"); + ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno)); + + pid = fork(); + if (pid < 0) { + close_prot_errno_disarm(syncfds[0]); + close_prot_errno_disarm(syncfds[1]); + ksft_exit_fail_msg("%s - Failed to fork", strerror(errno)); + } + + if (pid == 0) { + change_userns(syncfds); + if (__do_binderfs_test() == 1) + exit(2); + exit(EXIT_SUCCESS); + } + + change_idmaps(syncfds, pid); + + ret = wait_for_pid(pid); + if (ret) { + if (ret == 2) + XFAIL(return, "The Android binderfs filesystem is not available"); + else + ksft_exit_fail_msg("wait_for_pid() failed"); + } } TEST_HARNESS_MAIN -- cgit v1.2.3 From 564200ed8e71d91327d337e46bc778cee02da866 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Mar 2020 11:42:56 -0300 Subject: tools headers uapi: Update linux/in.h copy To get the changes in: 267762538705 ("seg6: fix SRv6 L2 tunnels to use IANA-assigned protocol number") That ends up automatically adding the new IPPROTO_ETHERNET to the socket args beautifiers: $ tools/perf/trace/beauty/socket_ipproto.sh > before Apply this patch: $ tools/perf/trace/beauty/socket_ipproto.sh > after $ diff -u before after --- before 2020-03-19 11:48:36.876673819 -0300 +++ after 2020-03-19 11:49:00.148541377 -0300 @@ -6,6 +6,7 @@ [132] = "SCTP", [136] = "UDPLITE", [137] = "MPLS", + [143] = "ETHERNET", [17] = "UDP", [1] = "ICMP", [22] = "IDP", $ Addresses this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: David S. Miller Cc: Paolo Lungaroni Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 1521073b6348..8533bf07450f 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -74,6 +74,8 @@ enum { #define IPPROTO_UDPLITE IPPROTO_UDPLITE IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ #define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ -- cgit v1.2.3 From 8c77f0ba41566b5a54a49c53107cc855d89ff4c1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 18 Mar 2020 11:13:45 -0400 Subject: selftest/ftrace: Fix function trigger test to handle trace not disabling the tracer The ftrace selftest "ftrace - test for function traceon/off triggers" enables all events and reads the trace file. Now that the trace file does not disable tracing, and will attempt to continually read new data that is added, the selftest gets stuck reading the trace file. This is because the data added to the trace file will fill up quicker than the reading of it. By only enabling scheduling events, the read can keep up with the writes. Instead of enabling all events, only enable the scheduler events. Link: http://lkml.kernel.org/r/20200318111345.0516642e@gandalf.local.home Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc index 0c04282d33dd..1947387fe976 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc @@ -41,7 +41,7 @@ fi echo '** ENABLE EVENTS' -echo 1 > events/enable +echo 1 > events/sched/enable echo '** ENABLE TRACING' enable_tracing -- cgit v1.2.3 From a0968a025c04702427a4aee2c618f451a5098cd8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Mar 2020 22:04:02 +1100 Subject: selftests/powerpc: Add a test of sigreturn vs VDSO There's two different paths through the sigreturn code, depending on whether the VDSO is mapped or not. We recently discovered a bug in the unmapped case, because it's not commonly used these days. So add a test that sends itself a signal, then moves the VDSO, takes another signal and finally unmaps the VDSO before sending itself another signal. That tests the standard signal path, the code that handles the VDSO being moved, and also the signal path in the case where the VDSO is unmapped. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200304110402.6038-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/signal/.gitignore | 1 + tools/testing/selftests/powerpc/signal/Makefile | 2 +- .../selftests/powerpc/signal/sigreturn_vdso.c | 127 +++++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/signal/sigreturn_vdso.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index dca5852a1546..03dafa795255 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -1,3 +1,4 @@ signal signal_tm sigfuz +sigreturn_vdso diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 113838fbbe7f..63b57583e07d 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := signal signal_tm sigfuz +TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c new file mode 100644 index 000000000000..e282fff0fe25 --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that we can take signals with and without the VDSO mapped, which trigger + * different paths in the signal handling code. + * + * See handle_rt_signal64() and setup_trampoline() in signal_64.c + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +// Ensure assert() is not compiled out +#undef NDEBUG +#include + +#include "utils.h" + +static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high) +{ + unsigned long start, end; + static char buf[4096]; + char name[128]; + FILE *f; + int rc = -1; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, name); + if (rc == 2) + continue; + + if (rc != 3) { + printf("sscanf errored\n"); + rc = -1; + break; + } + + if (strstr(name, needle)) { + *low = start; + *high = end - 1; + rc = 0; + break; + } + } + + fclose(f); + + return rc; +} + +static volatile sig_atomic_t took_signal = 0; + +static void sigusr1_handler(int sig) +{ + took_signal++; +} + +int test_sigreturn_vdso(void) +{ + unsigned long low, high, size; + struct sigaction act; + char *p; + + act.sa_handler = sigusr1_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + assert(sigaction(SIGUSR1, &act, NULL) == 0); + + // Confirm the VDSO is mapped, and work out where it is + assert(search_proc_maps("[vdso]", &low, &high) == 0); + size = high - low + 1; + printf("VDSO is at 0x%lx-0x%lx (%lu bytes)\n", low, high, size); + + kill(getpid(), SIGUSR1); + assert(took_signal == 1); + printf("Signal delivered OK with VDSO mapped\n"); + + // Remap the VDSO somewhere else + p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(p != MAP_FAILED); + assert(mremap((void *)low, size, size, MREMAP_MAYMOVE|MREMAP_FIXED, p) != MAP_FAILED); + assert(search_proc_maps("[vdso]", &low, &high) == 0); + size = high - low + 1; + printf("VDSO moved to 0x%lx-0x%lx (%lu bytes)\n", low, high, size); + + kill(getpid(), SIGUSR1); + assert(took_signal == 2); + printf("Signal delivered OK with VDSO moved\n"); + + assert(munmap((void *)low, size) == 0); + printf("Unmapped VDSO\n"); + + // Confirm the VDSO is not mapped anymore + assert(search_proc_maps("[vdso]", &low, &high) != 0); + + // Make the stack executable + assert(search_proc_maps("[stack]", &low, &high) == 0); + size = high - low + 1; + mprotect((void *)low, size, PROT_READ|PROT_WRITE|PROT_EXEC); + printf("Remapped the stack executable\n"); + + kill(getpid(), SIGUSR1); + assert(took_signal == 3); + printf("Signal delivered OK with VDSO unmapped\n"); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_vdso, "sigreturn_vdso"); +} -- cgit v1.2.3 From 915b7f6f9a5e232c138bb36743a1fdb0fcf2c432 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 11 Feb 2020 00:38:30 -0300 Subject: selftests/powerpc: Add tm-signal-pagefault test This test triggers a TM Bad Thing by raising a signal in transactional state and forcing a pagefault to happen in kernelspace when the kernel signal handling code first touches the user signal stack. This is inspired by the test tm-signal-context-force-tm but uses userfaultfd to make the test deterministic. While this test always triggers the bug in one run, I had to execute tm-signal-context-force-tm several times (the test runs 5000 times each execution) to trigger the same bug. tm-signal-context-force-tm is kept instead of replaced because, while this test is more reliable and triggers the same bug, tm-signal-context-force-tm has a better coverage, in the sense that by running the test several times it might trigger the pagefault and/or be preempted at different places. v3: skip test if userfaultfd is unavailable. Signed-off-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200211033831.11165-2-gustavold@linux.ibm.com --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 3 +- .../selftests/powerpc/tm/tm-signal-pagefault.c | 284 +++++++++++++++++++++ 3 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 98f2708d86cc..e1c72a4a3e91 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -13,6 +13,7 @@ tm-signal-context-chk-vmx tm-signal-context-chk-vsx tm-signal-context-force-tm tm-signal-sigreturn-nt +tm-signal-pagefault tm-vmx-unavail tm-unavailable tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index b15a1a325bd0..b1d99736f8b8 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ - tm-signal-context-force-tm tm-poison + tm-signal-context-force-tm tm-poison tm-signal-pagefault top_srcdir = ../../../../.. include ../../lib.mk @@ -22,6 +22,7 @@ $(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 $(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64 +$(OUTPUT)/tm-signal-pagefault: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c new file mode 100644 index 000000000000..5908bc6abe60 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020, Gustavo Luiz Duarte, IBM Corp. + * + * This test starts a transaction and triggers a signal, forcing a pagefault to + * happen when the kernel signal handling code touches the user signal stack. + * + * In order to avoid pre-faulting the signal stack memory and to force the + * pagefault to happen precisely in the kernel signal handling code, the + * pagefault handling is done in userspace using the userfaultfd facility. + * + * Further pagefaults are triggered by crafting the signal handler's ucontext + * to point to additional memory regions managed by the userfaultfd, so using + * the same mechanism used to avoid pre-faulting the signal stack memory. + * + * On failure (bug is present) kernel crashes or never returns control back to + * userspace. If bug is not present, tests completes almost immediately. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tm.h" + + +#define UF_MEM_SIZE 655360 /* 10 x 64k pages */ + +/* Memory handled by userfaultfd */ +static char *uf_mem; +static size_t uf_mem_offset = 0; + +/* + * Data that will be copied into the faulting pages (instead of zero-filled + * pages). This is used to make the test more reliable and avoid segfaulting + * when we return from the signal handler. Since we are making the signal + * handler's ucontext point to newly allocated memory, when that memory is + * paged-in it will contain the expected content. + */ +static char backing_mem[UF_MEM_SIZE]; + +static size_t pagesize; + +/* + * Return a chunk of at least 'size' bytes of memory that will be handled by + * userfaultfd. If 'backing_data' is not NULL, its content will be save to + * 'backing_mem' and then copied into the faulting pages when the page fault + * is handled. + */ +void *get_uf_mem(size_t size, void *backing_data) +{ + void *ret; + + if (uf_mem_offset + size > UF_MEM_SIZE) { + fprintf(stderr, "Requesting more uf_mem than expected!\n"); + exit(EXIT_FAILURE); + } + + ret = &uf_mem[uf_mem_offset]; + + /* Save the data that will be copied into the faulting page */ + if (backing_data != NULL) + memcpy(&backing_mem[uf_mem_offset], backing_data, size); + + /* Reserve the requested amount of uf_mem */ + uf_mem_offset += size; + /* Keep uf_mem_offset aligned to the page size (round up) */ + uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1); + + return ret; +} + +void *fault_handler_thread(void *arg) +{ + struct uffd_msg msg; /* Data read from userfaultfd */ + long uffd; /* userfaultfd file descriptor */ + struct uffdio_copy uffdio_copy; + struct pollfd pollfd; + ssize_t nread, offset; + + uffd = (long) arg; + + for (;;) { + pollfd.fd = uffd; + pollfd.events = POLLIN; + if (poll(&pollfd, 1, -1) == -1) { + perror("poll() failed"); + exit(EXIT_FAILURE); + } + + nread = read(uffd, &msg, sizeof(msg)); + if (nread == 0) { + fprintf(stderr, "read(): EOF on userfaultfd\n"); + exit(EXIT_FAILURE); + } + + if (nread == -1) { + perror("read() failed"); + exit(EXIT_FAILURE); + } + + /* We expect only one kind of event */ + if (msg.event != UFFD_EVENT_PAGEFAULT) { + fprintf(stderr, "Unexpected event on userfaultfd\n"); + exit(EXIT_FAILURE); + } + + /* + * We need to handle page faults in units of pages(!). + * So, round faulting address down to page boundary. + */ + uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1); + + offset = (char *) uffdio_copy.dst - uf_mem; + uffdio_copy.src = (unsigned long) &backing_mem[offset]; + + uffdio_copy.len = pagesize; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { + perror("ioctl-UFFDIO_COPY failed"); + exit(EXIT_FAILURE); + } + } +} + +void setup_uf_mem(void) +{ + long uffd; /* userfaultfd file descriptor */ + pthread_t thr; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + int ret; + + pagesize = sysconf(_SC_PAGE_SIZE); + + /* Create and enable userfaultfd object */ + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd == -1) { + perror("userfaultfd() failed"); + exit(EXIT_FAILURE); + } + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { + perror("ioctl-UFFDIO_API failed"); + exit(EXIT_FAILURE); + } + + /* + * Create a private anonymous mapping. The memory will be demand-zero + * paged, that is, not yet allocated. When we actually touch the memory + * the related page will be allocated via the userfaultfd mechanism. + */ + uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (uf_mem == MAP_FAILED) { + perror("mmap() failed"); + exit(EXIT_FAILURE); + } + + /* + * Register the memory range of the mapping we've just mapped to be + * handled by the userfaultfd object. In 'mode' we request to track + * missing pages (i.e. pages that have not yet been faulted-in). + */ + uffdio_register.range.start = (unsigned long) uf_mem; + uffdio_register.range.len = UF_MEM_SIZE; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { + perror("ioctl-UFFDIO_REGISTER"); + exit(EXIT_FAILURE); + } + + /* Create a thread that will process the userfaultfd events */ + ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); + if (ret != 0) { + fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret); + exit(EXIT_FAILURE); + } +} + +/* + * Assumption: the signal was delivered while userspace was in transactional or + * suspended state, i.e. uc->uc_link != NULL. + */ +void signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + + /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */ + ucp->uc_link->uc_mcontext.regs->nip += 4; + + ucp->uc_mcontext.v_regs = + get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs); + + ucp->uc_link->uc_mcontext.v_regs = + get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs); + + ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link); +} + +bool have_userfaultfd(void) +{ + long rc; + + errno = 0; + rc = syscall(__NR_userfaultfd, -1); + + return rc == 0 || errno != ENOSYS; +} + +int tm_signal_pagefault(void) +{ + struct sigaction sa; + stack_t ss; + + SKIP_IF(!have_htm()); + SKIP_IF(!have_userfaultfd()); + + setup_uf_mem(); + + /* + * Set an alternative stack that will generate a page fault when the + * signal is raised. The page fault will be treated via userfaultfd, + * i.e. via fault_handler_thread. + */ + ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; + if (sigaltstack(&ss, NULL) == -1) { + perror("sigaltstack() failed"); + exit(EXIT_FAILURE); + } + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sa.sa_sigaction = signal_handler; + if (sigaction(SIGTRAP, &sa, NULL) == -1) { + perror("sigaction() failed"); + exit(EXIT_FAILURE); + } + + /* Trigger a SIGTRAP in transactional state */ + asm __volatile__( + "tbegin.;" + "beq 1f;" + "trap;" + "1: ;" + : : : "memory"); + + /* Trigger a SIGTRAP in suspended state */ + asm __volatile__( + "tbegin.;" + "beq 1f;" + "tsuspend.;" + "trap;" + "tresume.;" + "1: ;" + : : : "memory"); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + /* + * Depending on kernel config, the TM Bad Thing might not result in a + * crash, instead the kernel never returns control back to userspace, so + * set a tight timeout. If the test passes it completes almost + * immediately. + */ + test_harness_set_timeout(2); + return test_harness(tm_signal_pagefault, "tm_signal_pagefault"); +} -- cgit v1.2.3 From 26d9f0cde24ca94ba8ee81be1070d6e1f83fcf8e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 19 Mar 2020 15:47:24 +0200 Subject: selftests: forwarding: Add an skbedit priority selftest Add a test that runs traffic through a port such that skbedit priority action acts on it during forwarding. Test that at egress, it is classified correctly according to the new priority at a PRIO qdisc. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/skbedit_priority.sh | 163 +++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/skbedit_priority.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh new file mode 100755 index 000000000000..0e7693297765 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by an action skbedit priority. The +# new priority should be taken into account when classifying traffic on the PRIO +# qdisc at $swp2. The test verifies that for different priority values, the +# traffic ends up in expected PRIO band. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | PRIO | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ingress + test_egress +" + +NUM_NETIFS=4 +source lib.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + tc qdisc add dev $swp2 root handle 10: \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 +} + +switch_destroy() +{ + tc qdisc del dev $swp2 root + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +test_skbedit_priority_one() +{ + local locus=$1; shift + local prio=$1; shift + local classid=$1; shift + + RET=0 + + tc filter add $locus handle 101 pref 1 \ + flower action skbedit priority $prio + + local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets) + $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q + local pkt1 + pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \ + qdisc_parent_stats_get $swp2 $classid .packets) + + check_err $? "Expected to get 10 packets on class $classid, but got +$((pkt1 - pkt0))." + log_test "$locus skbedit priority $prio -> classid $classid" + + tc filter del $locus pref 1 +} + +test_ingress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp1 ingress" \ + $prio 10:$((8 - prio)) + done +} + +test_egress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp2 egress" \ + $prio 10:$((8 - prio)) + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 081c54323b27d8d4b40df6b2375b9e1f6846d827 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Thu, 31 Oct 2019 21:14:22 +0800 Subject: tools/power turbostat: Support Cometlake From a turbostat point of view, Cometlake is like Kabylake. Suggested-by: Rui Zhang Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 31c1ca0bb3ee..dd5ac9f52ac5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4610,6 +4610,8 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_SKYLAKE: case INTEL_FAM6_KABYLAKE_L: case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: return INTEL_FAM6_SKYLAKE_L; case INTEL_FAM6_ICELAKE_L: -- cgit v1.2.3 From d8d005ba6afa502ca37ced5782f672c4d2fc1515 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 18:33:12 -0400 Subject: tools/power turbostat: Fix gcc build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warning: ‘__builtin_strncpy’ specified bound 20 equals destination size [-Wstringop-truncation] reduce param to strncpy, to guarantee that a null byte is always copied into destination buffer. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd5ac9f52ac5..fa95a8ca5565 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5325,9 +5325,9 @@ int add_counter(unsigned int msr_num, char *path, char *name, } msrp->msr_num = msr_num; - strncpy(msrp->name, name, NAME_BYTES); + strncpy(msrp->name, name, NAME_BYTES - 1); if (path) - strncpy(msrp->path, path, PATH_BYTES); + strncpy(msrp->path, path, PATH_BYTES - 1); msrp->width = width; msrp->type = type; msrp->format = format; -- cgit v1.2.3 From 4bf7132a0ace8888398af8cec6485ee4c6db5ea8 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 14 Jan 2020 12:04:55 +0800 Subject: tools/power turbostat: Support Tiger Lake From a turbostat point of view, Tiger Lake looks like Ice Lake. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa95a8ca5565..d2c3f294da2d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4616,6 +4616,8 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_L: case INTEL_FAM6_ICELAKE_NNPI: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: return INTEL_FAM6_CANNONLAKE_L; case INTEL_FAM6_ATOM_TREMONT_D: -- cgit v1.2.3 From 23274faf96500700da83c4f0ff12d78ae03d5604 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 14 Jan 2020 12:06:49 +0800 Subject: tools/power turbostat: Support Ice Lake server From a turbostat point of view, Ice Lake server looks like Sky Lake server. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d2c3f294da2d..26088b2a27cc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4622,6 +4622,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ATOM_TREMONT_D: return INTEL_FAM6_ATOM_GOLDMONT_D; + + case INTEL_FAM6_ICELAKE_X: + return INTEL_FAM6_SKYLAKE_X; } return model; } -- cgit v1.2.3 From d7814c3098ddb2780bb66e787aa3949110dd4a41 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 14 Jan 2020 12:09:45 +0800 Subject: tools/power turbostat: Support Jasper Lake Jasper Lake, like Elkhart Lake, uses a Tremont CPU. So reuse the code. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 26088b2a27cc..e953afb2e7a1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4623,6 +4623,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ATOM_TREMONT_D: return INTEL_FAM6_ATOM_GOLDMONT_D; + case INTEL_FAM6_ATOM_TREMONT_L: + return INTEL_FAM6_ATOM_TREMONT; + case INTEL_FAM6_ICELAKE_X: return INTEL_FAM6_SKYLAKE_X; } -- cgit v1.2.3 From f6708400707fec5cb56600710a9be59266df09ad Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 18 Mar 2020 18:55:48 +0800 Subject: tools/power turbostat: Support Elkhart Lake From a turbostat point of view the Tremont-based Elkhart Lake is very similar to Goldmont, reuse the code of Goldmont. Elkhart Lake does not support 'group turbo limit counter' nor C3, adjust the code accordingly. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e953afb2e7a1..761146c4f9bc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3265,6 +3265,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3336,6 +3337,17 @@ int is_skx(unsigned int family, unsigned int model) } return 0; } +int is_ehl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT: + return 1; + } + return 0; +} int has_turbo_ratio_limit(unsigned int family, unsigned int model) { @@ -3894,6 +3906,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model) else BIC_PRESENT(BIC_PkgWatt); break; + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + BIC_PRESENT(BIC_GFX_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + BIC_PRESENT(BIC_GFXWatt); + } + break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; @@ -4295,6 +4321,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ return 1; } return 0; @@ -4324,6 +4351,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_CANNONLAKE_L: /* CNL */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ return 1; } return 0; @@ -4882,7 +4910,8 @@ void process_cpuid() do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - if (do_slm_cstates || do_knl_cstates || is_cnl(family, model)) + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || + is_ehl(family, model)) BIC_NOT_PRESENT(BIC_CPU_c3); if (!quiet) -- cgit v1.2.3 From 1f81c5efc020314b2db30d77efe228b7e117750d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 18:26:05 -0400 Subject: tools/power turbostat: Fix missing SYS_LPI counter on some Chromebooks Some Chromebook BIOS' do not export an ACPI LPIT, which is how Linux finds the residency counter for CPU and SYSTEM low power states, that is exports in /sys/devices/system/cpu/cpuidle/*residency_us When these sysfs attributes are missing, check the debugfs attrubte from the pmc_core driver, which accesses the same counter value. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 761146c4f9bc..3ecbf709a48c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -304,6 +304,10 @@ int *irqs_per_cpu; /* indexed by cpu_num */ void setup_all_buffers(void); +char *sys_lpi_file; +char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; +char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; + int cpu_is_not_present(int cpu) { return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); @@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void) * * record snapshot of * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us - * - * return 1 if config change requires a restart, else return 0 */ int snapshot_cpu_lpi_us(void) { @@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void) /* * snapshot_sys_lpi() * - * record snapshot of - * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us - * - * return 1 if config change requires a restart, else return 0 + * record snapshot of sys_lpi_file */ int snapshot_sys_lpi_us(void) { FILE *fp; int retval; - fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); + fp = fopen_or_die(sys_lpi_file, "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); if (retval != 1) { @@ -4946,10 +4945,16 @@ void process_cpuid() else BIC_NOT_PRESENT(BIC_CPU_LPI); - if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK)) + if (!access(sys_lpi_file_sysfs, R_OK)) { + sys_lpi_file = sys_lpi_file_sysfs; BIC_PRESENT(BIC_SYS_LPI); - else + } else if (!access(sys_lpi_file_debugfs, R_OK)) { + sys_lpi_file = sys_lpi_file_debugfs; + BIC_PRESENT(BIC_SYS_LPI); + } else { + sys_lpi_file_sysfs = NULL; BIC_NOT_PRESENT(BIC_SYS_LPI); + } if (!quiet) decode_misc_feature_control(); -- cgit v1.2.3 From fcaa681c03ea82193e60d7f2cdfd94fbbcd4cae9 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 23:24:17 -0400 Subject: tools/power turbostat: Fix 32-bit capabilities warning warning: `turbostat' uses 32-bit capabilities (legacy support in use) Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 13f1e8b9ac52..2b6551269e43 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) - $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap .PHONY : clean clean : diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3ecbf709a48c..77f89371ec5f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include @@ -3150,28 +3150,42 @@ void check_dev_msr() err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } -void check_permissions() +/* + * check for CAP_SYS_RAWIO + * return 0 on success + * return 1 on fail + */ +int check_for_cap_sys_rawio(void) { - struct __user_cap_header_struct cap_header_data; - cap_user_header_t cap_header = &cap_header_data; - struct __user_cap_data_struct cap_data_data; - cap_user_data_t cap_data = &cap_data_data; - extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); - int do_exit = 0; - char pathname[32]; + cap_t caps; + cap_flag_value_t cap_flag_value; - /* check for CAP_SYS_RAWIO */ - cap_header->pid = getpid(); - cap_header->version = _LINUX_CAPABILITY_VERSION; - if (capget(cap_header, cap_data) < 0) - err(-6, "capget(2) failed"); + caps = cap_get_proc(); + if (caps == NULL) + err(-6, "cap_get_proc\n"); - if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) { - do_exit++; + if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) + err(-6, "cap_get\n"); + + if (cap_flag_value != CAP_SET) { warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname); + return 1; } + if (cap_free(caps) == -1) + err(-6, "cap_free\n"); + + return 0; +} +void check_permissions(void) +{ + int do_exit = 0; + char pathname[32]; + + /* check for CAP_SYS_RAWIO */ + do_exit += check_for_cap_sys_rawio(); + /* test file permissions */ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); if (euidaccess(pathname, R_OK)) { -- cgit v1.2.3 From 07f24dc95daca49b8a2e804edc024dd4e91610ac Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 19 Mar 2020 14:30:56 -0700 Subject: selftests/x86/vdso: Fix no-vDSO segfaults test_vdso would try to call a NULL pointer if the vDSO was missing. vdso_restorer_32 hit a genuine failure: trying to use the kernel-provided signal restorer doesn't work if the vDSO is missing. Skip the test if the vDSO is missing, since the test adds no particular value in that case. Reported-by: kbuild test robot Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/618ea7b8c55b10d08b1cb139e9a3a957934b8647.1584653439.git.luto@kernel.org --- tools/testing/selftests/x86/test_vdso.c | 5 +++++ tools/testing/selftests/x86/vdso_restorer.c | 15 +++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 35edd61d1663..42052db0f870 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -259,6 +259,11 @@ static void test_one_clock_gettime(int clock, const char *name) static void test_clock_gettime(void) { + if (!vdso_clock_gettime) { + printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n"); + return; + } + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); clock++) { test_one_clock_gettime(clock, clocknames[clock]); diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c index 29a5c94c4b50..fe99f2434155 100644 --- a/tools/testing/selftests/x86/vdso_restorer.c +++ b/tools/testing/selftests/x86/vdso_restorer.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -46,11 +47,23 @@ int main() int nerrs = 0; struct real_sigaction sa; + void *vdso = dlopen("linux-vdso.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + printf("[SKIP]\tFailed to find vDSO. Tests are not expected to work.\n"); + return 0; + } + memset(&sa, 0, sizeof(sa)); sa.handler = handler_with_siginfo; sa.flags = SA_SIGINFO; sa.restorer = NULL; /* request kernel-provided restorer */ + printf("[RUN]\tRaise a signal, SA_SIGINFO, sa.restorer == NULL\n"); + if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0) err(1, "raw rt_sigaction syscall"); @@ -63,6 +76,8 @@ int main() nerrs++; } + printf("[RUN]\tRaise a signal, !SA_SIGINFO, sa.restorer == NULL\n"); + sa.flags = 0; sa.handler = handler_without_siginfo; if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0) -- cgit v1.2.3 From 630b99ab60aa972052a4202a1ff96c7e45eb0054 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 12 Mar 2020 15:35:51 -0700 Subject: selftests/x86/ptrace_syscall_32: Fix no-vDSO segfault If AT_SYSINFO is not present, don't try to call a NULL pointer. Reported-by: kbuild test robot Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/faaf688265a7e1a5b944d6f8bc0f6368158306d3.1584052409.git.luto@kernel.org --- tools/testing/selftests/x86/ptrace_syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 6f22238f3217..12aaa063196e 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -414,8 +414,12 @@ int main() #if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16) vsyscall32 = (void *)getauxval(AT_SYSINFO); - printf("[RUN]\tCheck AT_SYSINFO return regs\n"); - test_sys32_regs(do_full_vsyscall32); + if (vsyscall32) { + printf("[RUN]\tCheck AT_SYSINFO return regs\n"); + test_sys32_regs(do_full_vsyscall32); + } else { + printf("[SKIP]\tAT_SYSINFO is not available\n"); + } #endif test_ptrace_syscall_restart(); -- cgit v1.2.3 From 8ddbda76245f5d10e00020db34455404019efc91 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 3 Mar 2020 14:50:43 -0800 Subject: tools/power/x86/intel-speed-select: Fix mailbox usage for CLOS_PM_QOS_CONFIG Even for the products using MMIO, this message needs to be sent via mail box. The previous fix done for this didn't properly address this. That fix simply removed sending command via MMIO, but still didn't trigger sending via mailbox. Add additional condition to check for CLOS_PM_QOS_CONFIG, when MMIO is supported on a platform. Fixes: cd0e63706549 (tools/power/x86/intel-speed-select: Use mailbox for CLOS_PM_QOS_CONFIG) Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 0cf3548681f8..50db0cd23d8c 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -572,7 +572,8 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command, "mbox_send: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n", cpu, command, sub_command, parameter, req_data); - if (isst_platform_info.mmio_supported && command == CONFIG_CLOS) { + if (isst_platform_info.mmio_supported && command == CONFIG_CLOS && + sub_command != CLOS_PM_QOS_CONFIG) { unsigned int value; int write = 0; int clos_id, core_id, ret = 0; -- cgit v1.2.3 From ced2f5304d1409805edd661b5f32092f3878be05 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 3 Mar 2020 14:50:44 -0800 Subject: tools/power/x86/intel-speed-select: Fix last cpu number Here topology_max_cpus is used for total CPU count, not the last CPU number. So remove "-1". Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 50db0cd23d8c..c922cfd7ba50 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -313,7 +313,6 @@ static void set_max_cpu_num(void) while (fscanf(filep, "%lx,", &dummy) == 1) topo_max_cpus += BITMASK_SIZE; fclose(filep); - topo_max_cpus--; /* 0 based */ debug_printf("max cpus %d\n", topo_max_cpus); } -- cgit v1.2.3 From f0e0b4d17baaaf4968664f8fe79c66a7417aaab0 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:12 -0800 Subject: tools/power/x86/intel-speed-select: Warn for invalid package id When CPU is offline, we can't get package id. So print error for this and don't use output. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index c922cfd7ba50..3f019f4f90df 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -261,6 +261,10 @@ static void for_each_online_package_in_set(void (*callback)(int, void *, void *, if (die_id < 0) die_id = 0; pkg_id = get_physical_package_id(i); + if (pkg_id < 0) { + fprintf(stderr, "Failed to get package id, CPU %d may be offline\n", i); + continue; + } /* Create an unique id for package, die combination to store */ pkg_id = (MAX_PACKAGE_COUNT * pkg_id + die_id); @@ -362,6 +366,10 @@ static void set_cpu_present_cpu_mask(void) die_id = 0; pkg_id = get_physical_package_id(i); + if (pkg_id < 0) { + fprintf(stderr, "Failed to get package id, CPU %d may be offline\n", i); + continue; + } if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) { int core_id = get_physical_core_id(i); -- cgit v1.2.3 From f5205f493100f1cdbecdf5498abb4c3dbdce08c5 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:13 -0800 Subject: tools/power/x86/intel-speed-select: Make target CPU optional for core-power info Currently "-c" is a mandatory option for "core-power info" command. Make this optional as this is a per package/die property. When not specified, it will print info for every package/die. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 3f019f4f90df..c0de18e10cda 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1763,19 +1763,17 @@ static void dump_clos_info(int arg) if (cmd_help) { fprintf(stderr, "Print Intel Speed Select Technology core power information\n"); - fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n"); - exit(0); - } - - if (!max_target_cpus) { - fprintf(stderr, - "Invalid target cpu. Specify with [-c|--cpu]\n"); + fprintf(stderr, "\t Optionally specify targeted cpu id with [--cpu|-c]\n"); exit(0); } isst_ctdp_display_information_start(outf); - for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL, - NULL, NULL, NULL); + if (max_target_cpus) + for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL, + NULL, NULL, NULL); + else + for_each_online_package_in_set(get_clos_info_for_cpu, NULL, + NULL, NULL, NULL); isst_ctdp_display_information_end(outf); } -- cgit v1.2.3 From 143ad32209af647c76b4ce06144eb093375a8562 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:14 -0800 Subject: tools/power/x86/intel-speed-select: Enhance core-power info command In addition to CLOS enable status, also show the core-power feature status. This will help why clos enable status didn't give desired results as the core-power feature may be disabled or unsupported. The new display looks as follows: $intel-speed-select core-power info Intel(R) Speed Select Technology .. package-0 die-0 cpu-0 core-power support-status:supported enable-status:enabled clos-enable-status:1 priority-type:0 In the above display "support-status" and "enable-status", shows the status of the core-power feature and "clos-enable-status", shows the status of the clos. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 9 +++++++-- tools/power/x86/intel-speed-select/isst-display.c | 17 ++++++++++++++++- tools/power/x86/intel-speed-select/isst.h | 4 +++- 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index c0de18e10cda..405b03cba248 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1754,8 +1754,13 @@ static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_clos_get_clos_information(cpu, &enable, &prio_type); if (ret) perror("isst_clos_get_info"); - else - isst_clos_display_clos_information(cpu, outf, enable, prio_type); + else { + int cp_state, cp_cap; + + isst_read_pm_config(cpu, &cp_state, &cp_cap); + isst_clos_display_clos_information(cpu, outf, enable, prio_type, + cp_state, cp_cap); + } } static void dump_clos_info(int arg) diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 3d70be2a9f61..30377acb909d 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -589,7 +589,8 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos, } void isst_clos_display_clos_information(int cpu, FILE *outf, - int clos_enable, int type) + int clos_enable, int type, + int state, int cap) { char header[256]; char value[256]; @@ -605,7 +606,21 @@ void isst_clos_display_clos_information(int cpu, FILE *outf, snprintf(header, sizeof(header), "core-power"); format_and_print(outf, 4, header, NULL); + snprintf(header, sizeof(header), "support-status"); + if (cap) + snprintf(value, sizeof(value), "supported"); + else + snprintf(value, sizeof(value), "unsupported"); + format_and_print(outf, 5, header, value); + snprintf(header, sizeof(header), "enable-status"); + if (state) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); + format_and_print(outf, 5, header, value); + + snprintf(header, sizeof(header), "clos-enable-status"); snprintf(value, sizeof(value), "%d", clos_enable); format_and_print(outf, 5, header, value); diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index ad5aa6341d0f..53e147a9a295 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -245,7 +245,9 @@ extern void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd, extern int isst_clos_get_clos_information(int cpu, int *enable, int *type); extern void isst_clos_display_clos_information(int cpu, FILE *outf, - int clos_enable, int type); + int clos_enable, int type, + int state, int cap); extern int is_clx_n_platform(void); extern int get_cpufreq_base_freq(int cpu); +extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap); #endif -- cgit v1.2.3 From 6320c9fb91212bc951b3a2d2cc2fceb1de4aea43 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:15 -0800 Subject: tools/power/x86/intel-speed-select: Use more verbiage for clos information Instead of displaying 0 and 1 for enable status, display "disabled" and "enabled" respectively. Similarly for priority type, display "ordered or proportional" instead of 0 and 1. An example display: $intel-speed-select -c 1 core-power info Intel(R) Speed Select Technology .. package-0 die-0 cpu-1 core-power support-status:supported enable-status:enabled clos-enable-status:enabled priority-type:proportional Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-display.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 30377acb909d..ec737e101e52 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -621,11 +621,17 @@ void isst_clos_display_clos_information(int cpu, FILE *outf, format_and_print(outf, 5, header, value); snprintf(header, sizeof(header), "clos-enable-status"); - snprintf(value, sizeof(value), "%d", clos_enable); + if (clos_enable) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); format_and_print(outf, 5, header, value); snprintf(header, sizeof(header), "priority-type"); - snprintf(value, sizeof(value), "%d", type); + if (type) + snprintf(value, sizeof(value), "ordered"); + else + snprintf(value, sizeof(value), "proportional"); format_and_print(outf, 5, header, value); format_and_print(outf, 1, NULL, NULL); -- cgit v1.2.3 From 696691985c3191f2c4c76f09b1c7c5f3ca99d5ed Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:16 -0800 Subject: tools/power/x86/intel-speed-select: Special handling for CPU 0 online/offline When "-o" option for force online/offline is used with command: perf-profile set-config-level If the config level calls for CPU 0 online/offline, then call fails as there is special kernel setup required for CPU 0 online/offline and the currently not setup for that. But when call is for online CPU 0, then don't fail. Just warn that this system is not setup for CPU 0 online/offline. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 405b03cba248..b8f4846d7c78 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -220,8 +220,14 @@ static void set_cpu_online_offline(int cpu, int state) "/sys/devices/system/cpu/cpu%d/online", cpu); fd = open(buffer, O_WRONLY); - if (fd < 0) + if (fd < 0) { + if (!cpu && state) { + fprintf(stderr, "This system is not configured for CPU 0 online/offline\n"); + fprintf(stderr, "Ignoring online request for CPU 0 as this is already online\n"); + return; + } err(-1, "%s open failed", buffer); + } if (state) ret = write(fd, "1\n", 2); -- cgit v1.2.3 From 864dc09e692f55fc7a5a87c7411da0a348ac1094 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:17 -0800 Subject: tools/power/x86/intel-speed-select: Max CPU count calculation when CPU0 is offline Currently /sys/devices/system/cpu/cpu0/topology/thread_siblings is used to get the max CPU count. But when CPU0 is offline, then this file will be absent. So add processing so that we can get count from any first CPU in the system. which is online. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index b8f4846d7c78..c773c6cc02a5 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -316,10 +316,24 @@ static void set_max_cpu_num(void) { FILE *filep; unsigned long dummy; + int i; topo_max_cpus = 0; - filep = fopen_or_exit( - "/sys/devices/system/cpu/cpu0/topology/thread_siblings", "r"); + for (i = 0; i < 256; ++i) { + char path[256]; + + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i); + filep = fopen(path, "r"); + if (filep) + break; + } + + if (!filep) { + fprintf(stderr, "Can't get max cpu number\n"); + exit(0); + } + while (fscanf(filep, "%lx,", &dummy) == 1) topo_max_cpus += BITMASK_SIZE; fclose(filep); -- cgit v1.2.3 From fb186158283958984fecf7c1bbc0833dc76e375b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:18 -0800 Subject: tools/power/x86/intel-speed-select: Store topology information Once the CPU is offline, the topology information (core-id, package-id, die-id) is not accessible via sysfs. So when user selects a config level more than base config 0 and offlined CPUs to match the config level, to return to base config he has to manually online CPUs before. Without this CPUs information mapping from Punit CPU numbering will lot work as it needs atlest package id for each CPU. To avoid this additional steps store the topology information in a file , which is created on the very first run after boot. Since system boots in base config and all CPUs are online, we can get information about every CPU. Once any of the APIs like get_physical_package_id(), get_physical_core_id() or get_physical_die_id() fails to read from sysfs, read from the stored mapping file. This mapping file is stored in /tmp file system. so on every boot it is recreated to make sure that any new CPUs are added to the system before boot are taken into account. But don't use the stored physical device id when trying to get information for CPU to send message in for_each_online_package_in_set(). Here use the real value from syfs and in case fails try the next CPU. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 143 +++++++++++++++++++++-- 1 file changed, 132 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index c773c6cc02a5..4773970cda9b 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -62,6 +62,13 @@ struct _cpu_map { }; struct _cpu_map *cpu_map; +struct cpu_topology { + short cpu; + short core_id; + short pkg_id; + short die_id; +}; + void debug_printf(const char *format, ...) { va_list args; @@ -176,25 +183,137 @@ int out_format_is_json(void) return out_format_json; } +static int get_stored_topology_info(int cpu, int *core_id, int *pkg_id, int *die_id) +{ + const char *pathname = "/tmp/isst_cpu_topology.dat"; + struct cpu_topology cpu_top; + FILE *fp; + int ret; + + fp = fopen(pathname, "rb"); + if (!fp) + return -1; + + ret = fseek(fp, cpu * sizeof(cpu_top), SEEK_SET); + if (ret) + goto err_ret; + + ret = fread(&cpu_top, sizeof(cpu_top), 1, fp); + if (ret != 1) { + ret = -1; + goto err_ret; + } + + *pkg_id = cpu_top.pkg_id; + *core_id = cpu_top.core_id; + *die_id = cpu_top.die_id; + ret = 0; + +err_ret: + fclose(fp); + + return ret; +} + +static void store_cpu_topology(void) +{ + const char *pathname = "/tmp/isst_cpu_topology.dat"; + FILE *fp; + int i; + + fp = fopen(pathname, "rb"); + if (fp) { + /* Mapping already exists */ + fclose(fp); + return; + } + + fp = fopen(pathname, "wb"); + if (!fp) { + fprintf(stderr, "Can't create file:%s\n", pathname); + return; + } + + for (i = 0; i < topo_max_cpus; ++i) { + struct cpu_topology cpu_top; + + cpu_top.core_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/core_id", i); + if (cpu_top.core_id < 0) + cpu_top.core_id = -1; + + cpu_top.pkg_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i); + if (cpu_top.pkg_id < 0) + cpu_top.pkg_id = -1; + + cpu_top.die_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/die_id", i); + if (cpu_top.die_id < 0) + cpu_top.die_id = -1; + + cpu_top.cpu = i; + + if (fwrite(&cpu_top, sizeof(cpu_top), 1, fp) != 1) { + fprintf(stderr, "Can't write to:%s\n", pathname); + break; + } + } + + fclose(fp); +} + int get_physical_package_id(int cpu) { - return parse_int_file( - 0, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", - cpu); + int ret; + + ret = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", + cpu); + if (ret < 0) { + int core_id, pkg_id, die_id; + + ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); + if (!ret) + return pkg_id; + } + + return ret; } int get_physical_core_id(int cpu) { - return parse_int_file( - 0, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); + int ret; + + ret = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/core_id", + cpu); + if (ret < 0) { + int core_id, pkg_id, die_id; + + ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); + if (!ret) + return core_id; + } + + return ret; } int get_physical_die_id(int cpu) { int ret; - ret = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/topology/die_id", - cpu); + ret = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/die_id", + cpu); + if (ret < 0) { + int core_id, pkg_id, die_id; + + ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); + if (!ret) + return die_id; + } + if (ret < 0) ret = 0; @@ -266,11 +385,12 @@ static void for_each_online_package_in_set(void (*callback)(int, void *, void *, die_id = get_physical_die_id(i); if (die_id < 0) die_id = 0; - pkg_id = get_physical_package_id(i); - if (pkg_id < 0) { - fprintf(stderr, "Failed to get package id, CPU %d may be offline\n", i); + + pkg_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i); + if (pkg_id < 0) continue; - } + /* Create an unique id for package, die combination to store */ pkg_id = (MAX_PACKAGE_COUNT * pkg_id + die_id); @@ -2352,6 +2472,7 @@ static void cmdline(int argc, char **argv) printf("Intel(R) Speed Select Technology\n"); printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model); set_max_cpu_num(); + store_cpu_topology(); set_cpu_present_cpu_mask(); set_cpu_target_cpu_mask(); -- cgit v1.2.3 From f362cdccca07c2b90fc4fac9bf13d9ce975e9aa7 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:19 -0800 Subject: tools/power/x86/intel-speed-select: Helpful warning for missing kernel interface When the device file "/dev/isst_interface" is not present, instead of failing on access, check at the start and print a helpful warning. Here CLX platform is an exception, which doesn't depend on the device file. So continue for CLX platform. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 34 +++++++++++++++++------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 4773970cda9b..9bf461e57a34 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -2408,6 +2408,8 @@ static void print_version(void) static void cmdline(int argc, char **argv) { + const char *pathname = "/dev/isst_interface"; + FILE *fp; int opt; int option_index = 0; int ret; @@ -2423,6 +2425,28 @@ static void cmdline(int argc, char **argv) { 0, 0, 0, 0 } }; + if (geteuid() != 0) { + fprintf(stderr, "Must run as root\n"); + exit(0); + } + + ret = update_cpu_model(); + if (ret) + err(-1, "Invalid CPU model (%d)\n", cpu_model); + printf("Intel(R) Speed Select Technology\n"); + printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model); + + if (!is_clx_n_platform()) { + fp = fopen(pathname, "rb"); + if (!fp) { + fprintf(stderr, "Intel speed select drivers are not loaded on this system.\n"); + fprintf(stderr, "Verify that kernel config includes CONFIG_INTEL_SPEED_SELECT_INTERFACE.\n"); + fprintf(stderr, "If the config is included then this is not a supported platform.\n"); + exit(0); + } + fclose(fp); + } + progname = argv[0]; while ((opt = getopt_long_only(argc, argv, "+c:df:hio:v", long_options, &option_index)) != -1) { @@ -2457,20 +2481,10 @@ static void cmdline(int argc, char **argv) } } - if (geteuid() != 0) { - fprintf(stderr, "Must run as root\n"); - exit(0); - } - if (optind > (argc - 2)) { fprintf(stderr, "Feature name and|or command not specified\n"); exit(0); } - ret = update_cpu_model(); - if (ret) - err(-1, "Invalid CPU model (%d)\n", cpu_model); - printf("Intel(R) Speed Select Technology\n"); - printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model); set_max_cpu_num(); store_cpu_topology(); set_cpu_present_cpu_mask(); -- cgit v1.2.3 From addd116d8daf941485bf68eff22f43ff6525b76d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:20 -0800 Subject: tools/power/x86/intel-speed-select: Enhance help Enhance help message which adds some example. The changes include: - Print help when options are not recognized. - For CLX, display only options which are applicable. - Sort options in alphatical order. - Disply help() instead of error: "Feature name and|or command not specified" - Remove duplicate display of Intel(R) Speed Select Technology Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 29 ++++++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 9bf461e57a34..5302a552f87f 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -2368,11 +2368,18 @@ void process_command(int argc, char **argv, static void usage(void) { - printf("Intel(R) Speed Select Technology\n"); + if (is_clx_n_platform()) { + fprintf(stderr, "\nThere is limited support of Intel Speed Select features on this platform.\n"); + fprintf(stderr, "Everything is pre-configured using BIOS options, this tool can't enable any feature in the hardware.\n\n"); + } + printf("\nUsage:\n"); printf("intel-speed-select [OPTIONS] FEATURE COMMAND COMMAND_ARGUMENTS\n"); - printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features,\n"); - printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n"); + printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features:\n"); + if (is_clx_n_platform()) + printf("\nFEATURE : [perf-profile|base-freq]\n"); + else + printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n"); printf("\nFor help on each feature, use -h|--help\n"); printf("\tFor example: intel-speed-select perf-profile -h\n"); @@ -2385,17 +2392,29 @@ static void usage(void) printf("\t\tDefault: Die scoped for all dies in the system with multiple dies/package\n"); printf("\t\t\t Or Package scoped for all Packages when each package contains one die\n"); printf("\t[-d|--debug] : Debug mode\n"); + printf("\t[-f|--format] : output format [json|text]. Default: text\n"); printf("\t[-h|--help] : Print help\n"); printf("\t[-i|--info] : Print platform information\n"); printf("\t[-o|--out] : Output file\n"); printf("\t\t\tDefault : stderr\n"); - printf("\t[-f|--format] : output format [json|text]. Default: text\n"); printf("\t[-v|--version] : Print version\n"); printf("\nResult format\n"); printf("\tResult display uses a common format for each command:\n"); printf("\tResults are formatted in text/JSON with\n"); printf("\t\tPackage, Die, CPU, and command specific results.\n"); + + printf("\nExamples\n"); + printf("\tTo get platform information:\n"); + printf("\t\tintel-speed-select --info\n"); + printf("\tTo get full perf-profile information dump:\n"); + printf("\t\tintel-speed-select perf-profile info\n"); + printf("\tTo get full base-freq information dump:\n"); + printf("\t\tintel-speed-select base-freq info -l 0\n"); + if (!is_clx_n_platform()) { + printf("\tTo get full turbo-freq information dump:\n"); + printf("\t\tintel-speed-select turbo-freq info -l 0\n"); + } exit(1); } @@ -2482,7 +2501,7 @@ static void cmdline(int argc, char **argv) } if (optind > (argc - 2)) { - fprintf(stderr, "Feature name and|or command not specified\n"); + usage(); exit(0); } set_max_cpu_num(); -- cgit v1.2.3 From 1ba148ae9e11324817685a8b13f5968c4bc1cbfd Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:21 -0800 Subject: tools/power/x86/intel-speed-select: Enhance --info option Add additional information, which will allow user to detect available features. This will allow users to check presence of features before continue to test. A sample output: $sudo ./intel-speed-select --info Intel(R) Speed Select Technology Executing on CPU model:85[0x55] Platform: API version : 1 Platform: Driver version : 1 Platform: mbox supported : 1 Platform: mmio supported : 0 Intel(R) SST-PP (feature perf-profile) is not supported Only performance level 0 (base level) is present TDP level change control is locked Intel(R) SST-TF (feature turbo-freq) is supported Intel(R) SST-BF (feature base-freq) is supported Intel(R) SST-CP (feature core-power) is supported Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 74 ++++++++++++++++++++++++ tools/power/x86/intel-speed-select/isst.h | 2 + 2 files changed, 76 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5302a552f87f..65110d06394f 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -846,12 +846,85 @@ static int isst_fill_platform_info(void) return 0; } +static void isst_print_extended_platform_info(void) +{ + int cp_state, cp_cap, fact_support = 0, pbf_support = 0; + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + int ret, i, j; + FILE *filep; + + for (i = 0; i < 256; ++i) { + char path[256]; + + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i); + filep = fopen(path, "r"); + if (filep) + break; + } + + if (!filep) + return; + + fclose(filep); + + ret = isst_get_ctdp_levels(i, &pkg_dev); + if (ret) + return; + + if (pkg_dev.enabled) { + fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is supported\n"); + } else { + fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is not supported\n"); + fprintf(outf, "Only performance level 0 (base level) is present\n"); + } + + if (pkg_dev.locked) + fprintf(outf, "TDP level change control is locked\n"); + else + fprintf(outf, "TDP level change control is unlocked, max level: %d \n", pkg_dev.levels); + + for (j = 0; j <= pkg_dev.levels; ++j) { + ret = isst_get_ctdp_control(i, j, &ctdp_level); + if (ret) + continue; + + if (!fact_support && ctdp_level.fact_support) + fact_support = 1; + + if (!pbf_support && ctdp_level.pbf_support) + pbf_support = 1; + } + + if (fact_support) + fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is supported\n"); + else + fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is not supported\n"); + + if (pbf_support) + fprintf(outf, "Intel(R) SST-BF (feature base-freq) is supported\n"); + else + fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n"); + + ret = isst_read_pm_config(i, &cp_state, &cp_cap); + if (cp_cap) + fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n"); + else + fprintf(outf, "Intel(R) SST-CP (feature core-power) is not supported\n"); +} + static void isst_print_platform_information(void) { struct isst_if_platform_info platform_info; const char *pathname = "/dev/isst_interface"; int fd; + if (is_clx_n_platform()) { + fprintf(stderr, "\nThis option in not supported on this platform\n"); + exit(0); + } + fd = open(pathname, O_RDWR); if (fd < 0) err(-1, "%s open failed", pathname); @@ -867,6 +940,7 @@ static void isst_print_platform_information(void) platform_info.mbox_supported); fprintf(outf, "Platform: mmio supported : %d\n", platform_info.mmio_supported); + isst_print_extended_platform_info(); } close(fd); diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 53e147a9a295..639d3d649480 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -196,6 +196,8 @@ extern int isst_send_msr_command(unsigned int cpu, unsigned int command, int write, unsigned long long *req_resp); extern int isst_get_ctdp_levels(int cpu, struct isst_pkg_ctdp *pkg_dev); +extern int isst_get_ctdp_control(int cpu, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level); extern int isst_get_coremask_info(int cpu, int config_index, struct isst_pkg_ctdp_level_info *ctdp_level); extern int isst_get_process_ctdp(int cpu, int tdp_level, -- cgit v1.2.3 From 87e115b3256c1c5a324ab495eab722c0ec7d5c34 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:22 -0800 Subject: tools/power/x86/intel-speed-select: Add an API for error/information print Add a common API which can be used to print all error and information messages. In this way a common format can be used. For json output an error index in suffixed to make unique error key. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 5 +++ tools/power/x86/intel-speed-select/isst-display.c | 44 +++++++++++++++++++++++ tools/power/x86/intel-speed-select/isst.h | 2 ++ 3 files changed, 51 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 65110d06394f..360fa00f9c7a 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -69,6 +69,11 @@ struct cpu_topology { short die_id; }; +FILE *get_output_file(void) +{ + return outf; +} + void debug_printf(const char *format, ...) { va_list args; diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index ec737e101e52..943226d1dd4e 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -515,15 +515,18 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, format_and_print(outf, 1, NULL, NULL); } +static int start; void isst_ctdp_display_information_start(FILE *outf) { last_level = 0; format_and_print(outf, 0, "start", NULL); + start = 1; } void isst_ctdp_display_information_end(FILE *outf) { format_and_print(outf, 0, NULL, NULL); + start = 0; } void isst_pbf_display_information(int cpu, FILE *outf, int level, @@ -686,3 +689,44 @@ void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd, format_and_print(outf, 1, NULL, NULL); } + +void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg) +{ + FILE *outf = get_output_file(); + static int error_index; + char header[256]; + char value[256]; + + if (!out_format_is_json()) { + if (arg_valid) + snprintf(value, sizeof(value), "%s %d", msg, arg); + else + snprintf(value, sizeof(value), "%s", msg); + + if (error) + fprintf(outf, "Error: %s\n", value); + else + fprintf(outf, "Information: %s\n", value); + return; + } + + if (!start) + format_and_print(outf, 0, "start", NULL); + + if (error) + snprintf(header, sizeof(header), "Error%d", error_index++); + else + snprintf(header, sizeof(header), "Information:%d", error_index++); + format_and_print(outf, 1, header, NULL); + + snprintf(header, sizeof(header), "message"); + if (arg_valid) + snprintf(value, sizeof(value), "%s %d", msg, arg); + else + snprintf(value, sizeof(value), "%s", msg); + + format_and_print(outf, 2, header, value); + format_and_print(outf, 1, NULL, NULL); + if (!start) + format_and_print(outf, 0, NULL, NULL); +} diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 639d3d649480..4950d2368ff8 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -172,6 +172,7 @@ extern int get_cpu_count(int pkg_id, int die_id); extern int get_core_count(int pkg_id, int die_id); /* Common interfaces */ +FILE *get_output_file(void); extern void debug_printf(const char *format, ...); extern int out_format_is_json(void); extern int get_physical_package_id(int cpu); @@ -252,4 +253,5 @@ extern void isst_clos_display_clos_information(int cpu, FILE *outf, extern int is_clx_n_platform(void); extern int get_cpufreq_base_freq(int cpu); extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap); +extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg); #endif -- cgit v1.2.3 From ac9d05ea4cfb99dd24767f2510ca526282fffa13 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:23 -0800 Subject: tools/power/x86/intel-speed-select: Improve error display for perf-profile feature This change adds improved error display and handling for commands related to perf-profile feature. The changes include: - When invalid TDP level is passed. display error and exit - Replace perror with helpful error message - Show error when TDP level can't be set - Print error when information can't be read for a level - Validate user options for invalid level - Display error for TDP lock status Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 21 +++++++++++---- tools/power/x86/intel-speed-select/isst-core.c | 33 ++++++++++++++++++++---- 2 files changed, 44 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 360fa00f9c7a..4230a19664d3 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1130,6 +1130,11 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2, { int ret; + if (tdp_level != 0xff && tdp_level != 0) { + isst_display_error_info_message(1, "Invalid level", 1, tdp_level); + exit(0); + } + ret = clx_n_config(cpu); if (ret) { perror("isst_get_process_ctdp"); @@ -1154,7 +1159,9 @@ static void dump_isst_config_for_cpu(int cpu, void *arg1, void *arg2, memset(&pkg_dev, 0, sizeof(pkg_dev)); ret = isst_get_process_ctdp(cpu, tdp_level, &pkg_dev); if (ret) { - perror("isst_get_process_ctdp"); + isst_display_error_info_message(1, "Failed to get perf-profile info on cpu", 1, cpu); + isst_ctdp_display_information_end(outf); + exit(1); } else { isst_ctdp_display_information(cpu, outf, tdp_level, &pkg_dev); isst_get_process_ctdp_complete(cpu, &pkg_dev); @@ -1197,9 +1204,11 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int ret; ret = isst_set_tdp_level(cpu, tdp_level); - if (ret) - perror("set_tdp_level_for_cpu"); - else { + if (ret) { + isst_display_error_info_message(1, "Set TDP level failed", 0, 0); + isst_ctdp_display_information_end(outf); + exit(1); + } else { isst_display_result(cpu, outf, "perf-profile", "set_tdp_level", ret); if (force_online_offline) { @@ -1237,11 +1246,13 @@ static void set_tdp_level(int arg) "\t Arguments: -l|--level : Specify tdp level\n"); fprintf(stderr, "\t Optional Arguments: -o | online : online/offline for the tdp level\n"); + fprintf(stderr, + "\t online/offline operation has limitations, refer to Linux hotplug documentation\n"); exit(0); } if (tdp_level == 0xff) { - fprintf(outf, "Invalid command: specify tdp_level\n"); + isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0); exit(1); } isst_ctdp_display_information_start(outf); diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 81a119f688a3..2f3921cd87f3 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -114,8 +114,10 @@ int isst_get_tdp_info(int cpu, int config_index, ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_TDP_INFO, 0, config_index, &resp); - if (ret) + if (ret) { + isst_display_error_info_message(1, "Invalid level, Can't get TDP information at level", 1, config_index); return ret; + } ctdp_level->pkg_tdp = resp & GENMASK(14, 0); ctdp_level->tdp_ratio = (resp & GENMASK(23, 16)) >> 16; @@ -352,7 +354,7 @@ int isst_set_tdp_level_msr(int cpu, int tdp_level) debug_printf("cpu: tdp_level via MSR %d\n", cpu, tdp_level); if (isst_get_config_tdp_lock_status(cpu)) { - debug_printf("cpu: tdp_locked %d\n", cpu); + isst_display_error_info_message(1, "tdp_locked", 0, 0); return -1; } @@ -373,10 +375,19 @@ int isst_set_tdp_level(int cpu, int tdp_level) unsigned int resp; int ret; + + if (isst_get_config_tdp_lock_status(cpu)) { + isst_display_error_info_message(1, "TDP is locked", 0, 0); + return -1; + + } + ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_SET_LEVEL, 0, tdp_level, &resp); - if (ret) - return isst_set_tdp_level_msr(cpu, tdp_level); + if (ret) { + isst_display_error_info_message(1, "Set TDP level failed for level", 1, tdp_level); + return ret; + } return 0; } @@ -671,7 +682,7 @@ void isst_get_process_ctdp_complete(int cpu, struct isst_pkg_ctdp *pkg_dev) int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev) { - int i, ret; + int i, ret, valid = 0; if (pkg_dev->processed) return 0; @@ -684,6 +695,14 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev) cpu, pkg_dev->enabled, pkg_dev->current_level, pkg_dev->levels); + if (tdp_level != 0xff && tdp_level > pkg_dev->levels) { + isst_display_error_info_message(1, "Invalid level", 0, 0); + return -1; + } + + if (!pkg_dev->enabled) + isst_display_error_info_message(0, "perf-profile feature is not supported, just base-config level 0 is valid", 0, 0); + for (i = 0; i <= pkg_dev->levels; ++i) { struct isst_pkg_ctdp_level_info *ctdp_level; @@ -703,6 +722,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev) if (ret) continue; + valid = 1; pkg_dev->processed = 1; ctdp_level->processed = 1; @@ -775,6 +795,9 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev) isst_get_uncore_mem_freq(cpu, i, ctdp_level); } + if (!valid) + isst_display_error_info_message(0, "Invalid level, Can't get TDP control information at specified levels on cpu", 1, cpu); + return 0; } -- cgit v1.2.3 From 6c8edba37cc58f44e02f846c4ab50702b64564fd Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:24 -0800 Subject: tools/power/x86/intel-speed-select: Check feature status first Before looking for information about the base-freq or turbo-freq details, first check if the feature is supported at that level. If not print error and return. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-core.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 2f3921cd87f3..732d9a5eacf1 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -394,9 +394,19 @@ int isst_set_tdp_level(int cpu, int tdp_level) int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info) { + struct isst_pkg_ctdp_level_info ctdp_level; int i, ret, core_cnt, max; unsigned int req, resp; + ret = isst_get_ctdp_control(cpu, level, &ctdp_level); + if (ret) + return ret; + + if (!ctdp_level.pbf_support) { + fprintf(stderr, "base-freq feature is not present at this level:%d\n", level); + return -1; + } + pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask); core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu)); @@ -492,6 +502,10 @@ int isst_set_pbf_fact_status(int cpu, int pbf, int enable) else req &= ~BIT(17); } else { + + if (enable && !ctdp_level.sst_cp_enabled) + fprintf(stderr, "Make sure to execute before: core-power enable\n"); + if (ctdp_level.pbf_enabled) req = BIT(17); @@ -579,9 +593,19 @@ int isst_get_fact_bucket_info(int cpu, int level, int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info) { + struct isst_pkg_ctdp_level_info ctdp_level; unsigned int resp; int ret; + ret = isst_get_ctdp_control(cpu, level, &ctdp_level); + if (ret) + return ret; + + if (!ctdp_level.fact_support) { + fprintf(stderr, "turbo-freq feature is not present at this level:%d\n", level); + return -1; + } + ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_FACT_LP_CLIPPING_RATIO, 0, level, &resp); -- cgit v1.2.3 From 6d1f2dc8a5d9d1675e664c11ebecbcef739bf058 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:25 -0800 Subject: tools/power/x86/intel-speed-select: Display error for invalid priority type When priority type for core-power enable command is anything more than 1 display error before change to 1, which is ordered priority. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 732d9a5eacf1..f69c009ef6f6 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -897,6 +897,9 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type) else req = req & ~BIT(1); + if (priority_type > 1) + fprintf(stderr, "Invalid priority type: Changing type to ordered\n"); + if (priority_type) req = req | BIT(2); else -- cgit v1.2.3 From 68e2f109717b55daeec0565dda219b43bebcbb1d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:26 -0800 Subject: tools/power/x86/intel-speed-select: Enhance help for core-power assoc Enhance help to specify CPU and clos by an example. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 4230a19664d3..75c8e1a933ef 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -2104,6 +2104,10 @@ static void set_clos_assoc(int arg) fprintf(stderr, "Associate a clos id to a CPU\n"); fprintf(stderr, "\tSpecify targeted clos id with [--clos|-c]\n"); + fprintf(stderr, + "\tFor example to associate clos 1 to CPU 0: issue\n"); + fprintf(stderr, + "\tintel-speed-select --cpu 0 core-power assoc --clos 1\n"); exit(0); } -- cgit v1.2.3 From 3d1a8579813eff7d1d4b2f186bd718877407cfdb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:27 -0800 Subject: tools/power/x86/intel-speed-select: Improve output of perf-profile commands Improve output of perf-profile commands: get-config-enabled get-lock-status Instead of showing 0/1, show meaningful strings. Also show error when command is failed. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 20 ++++++++++++-------- tools/power/x86/intel-speed-select/isst-display.c | 10 +++++++--- tools/power/x86/intel-speed-select/isst.h | 2 +- 3 files changed, 20 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 75c8e1a933ef..362a352e6266 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -953,6 +953,7 @@ static void isst_print_platform_information(void) exit(0); } +static char *local_str0, *local_str1; static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { @@ -962,13 +963,14 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3, fn_ptr = arg1; ret = fn_ptr(cpu, arg2); if (ret) - perror("get_tdp_*"); + isst_display_error_info_message(1, "get_tdp_* failed", 0, 0); else isst_ctdp_display_core_info(cpu, outf, arg3, - *(unsigned int *)arg4); + *(unsigned int *)arg4, + local_str0, local_str1); } -#define _get_tdp_level(desc, suffix, object, help) \ +#define _get_tdp_level(desc, suffix, object, help, str0, str1) \ static void get_tdp_##object(int arg) \ { \ struct isst_pkg_ctdp ctdp; \ @@ -979,6 +981,8 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3, help); \ exit(0); \ } \ + local_str0 = str0; \ + local_str1 = str1; \ isst_ctdp_display_information_start(outf); \ if (max_target_cpus) \ for_each_online_target_cpu_in_set( \ @@ -992,12 +996,12 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3, isst_ctdp_display_information_end(outf); \ } -_get_tdp_level("get-config-levels", levels, levels, "TDP levels"); -_get_tdp_level("get-config-version", levels, version, "TDP version"); -_get_tdp_level("get-config-enabled", levels, enabled, "TDP enable status"); +_get_tdp_level("get-config-levels", levels, levels, "Max TDP level", NULL, NULL); +_get_tdp_level("get-config-version", levels, version, "TDP version", NULL, NULL); +_get_tdp_level("get-config-enabled", levels, enabled, "perf-profile enable status", "disabled", "enabled"); _get_tdp_level("get-config-current_level", levels, current_level, - "Current TDP Level"); -_get_tdp_level("get-lock-status", levels, locked, "TDP lock status"); + "Current TDP Level", NULL, NULL); +_get_tdp_level("get-lock-status", levels, locked, "TDP lock status", "unlocked", "locked"); struct isst_pkg_ctdp clx_n_pkg_dev; diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 943226d1dd4e..0667b405f17f 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -289,7 +289,7 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level, } void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix, - unsigned int val) + unsigned int val, char *str0, char *str1) { char header[256]; char value[256]; @@ -301,8 +301,12 @@ void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix, format_and_print(outf, 2, header, NULL); snprintf(header, sizeof(header), "cpu-%d", cpu); format_and_print(outf, 3, header, NULL); - - snprintf(value, sizeof(value), "%u", val); + if (str0 && !val) + snprintf(value, sizeof(value), "%s", str0); + else if (str1 && val) + snprintf(value, sizeof(value), "%s", str1); + else + snprintf(value, sizeof(value), "%u", val); format_and_print(outf, 4, prefix, value); format_and_print(outf, 1, NULL, NULL); diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 4950d2368ff8..69fa2c3c70c3 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -208,7 +208,7 @@ extern void isst_get_process_ctdp_complete(int cpu, extern void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, struct isst_pkg_ctdp *pkg_dev); extern void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix, - unsigned int val); + unsigned int val, char *str0, char *str1); extern void isst_ctdp_display_information_start(FILE *outf); extern void isst_ctdp_display_information_end(FILE *outf); extern void isst_pbf_display_information(int cpu, FILE *outf, int level, -- cgit v1.2.3 From 39bae0fce48f6431ceb3ab9cff91cb749c736c48 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:28 -0800 Subject: tools/power/x86/intel-speed-select: Improve error display for base-freq feature This change adds improved error display and handling for commands related to base-freq feature. The changes include: - Replace perror/fprintf with helpful error message - Error for not specifying TDP level when required - For CLX show help which shows limitation Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 25 ++++++++++++++++++++---- tools/power/x86/intel-speed-select/isst-core.c | 14 ++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 362a352e6266..95602f98a138 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1276,7 +1276,7 @@ static void clx_n_dump_pbf_config_for_cpu(int cpu, void *arg1, void *arg2, ret = clx_n_config(cpu); if (ret) { - perror("isst_get_process_ctdp"); + isst_display_error_info_message(1, "clx_n_config failed", 0, 0); } else { struct isst_pkg_ctdp_level_info *ctdp_level; struct isst_pbf_info *pbf_info; @@ -1297,7 +1297,9 @@ static void dump_pbf_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_get_pbf_info(cpu, tdp_level, &pbf_info); if (ret) { - perror("isst_get_pbf_info"); + isst_display_error_info_message(1, "Failed to get base-freq info at this level", 1, tdp_level); + isst_ctdp_display_information_end(outf); + exit(1); } else { isst_pbf_display_information(cpu, outf, tdp_level, &pbf_info); isst_get_pbf_info_complete(&pbf_info); @@ -1317,7 +1319,7 @@ static void dump_pbf_config(int arg) } if (tdp_level == 0xff) { - fprintf(outf, "Invalid command: specify tdp_level\n"); + isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0); exit(1); } @@ -1635,7 +1637,7 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_set_pbf_fact_status(cpu, 1, status); if (ret) { - perror("isst_set_pbf"); + debug_printf("isst_set_pbf_fact_status failed"); if (auto_mode) isst_pm_qos_config(cpu, 0, 0); } else { @@ -1667,10 +1669,25 @@ static void set_pbf_enable(int arg) if (enable) { fprintf(stderr, "Enable Intel Speed Select Technology base frequency feature\n"); + if (is_clx_n_platform()) { + fprintf(stderr, + "\tOn this platform this command doesn't enable feature in the hardware.\n"); + fprintf(stderr, + "\tIt updates the cpufreq scaling_min_freq to match cpufreq base_frequency.\n"); + exit(0); + + } fprintf(stderr, "\tOptional Arguments: -a|--auto : Use priority of cores to set core-power associations\n"); } else { + if (is_clx_n_platform()) { + fprintf(stderr, + "\tOn this platform this command doesn't disable feature in the hardware.\n"); + fprintf(stderr, + "\tIt updates the cpufreq scaling_min_freq to match cpuinfo_min_freq\n"); + exit(0); + } fprintf(stderr, "Disable Intel Speed Select Technology base frequency feature\n"); fprintf(stderr, diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index f69c009ef6f6..7836f9f08af1 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -395,15 +395,27 @@ int isst_set_tdp_level(int cpu, int tdp_level) int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info) { struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; int i, ret, core_cnt, max; unsigned int req, resp; + ret = isst_get_ctdp_levels(cpu, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + return ret; + } + + if (level > pkg_dev.levels) { + isst_display_error_info_message(1, "Invalid level", 1, level); + return -1; + } + ret = isst_get_ctdp_control(cpu, level, &ctdp_level); if (ret) return ret; if (!ctdp_level.pbf_support) { - fprintf(stderr, "base-freq feature is not present at this level:%d\n", level); + isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, level); return -1; } -- cgit v1.2.3 From a9fd6ae739ef9624f9017df08edf0970c7d170e2 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:29 -0800 Subject: tools/power/x86/intel-speed-select: Improve error display for turbo-freq feature This change adds improved error display and handling for commands related to turbo-freq feature. The changes include: - Replace perror/fprintf with helpful error message - Error for not specifying TDP level when required - Show error for invalid bucket number - Show message to enable core-power before enabling turbo-freq feature Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 15 +++++---- tools/power/x86/intel-speed-select/isst-core.c | 41 +++++++++++++++++++---- tools/power/x86/intel-speed-select/isst-display.c | 16 ++++++++- tools/power/x86/intel-speed-select/isst.h | 2 +- 4 files changed, 60 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 95602f98a138..48915470c572 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1712,12 +1712,15 @@ static void dump_fact_config_for_cpu(int cpu, void *arg1, void *arg2, struct isst_fact_info fact_info; int ret; - ret = isst_get_fact_info(cpu, tdp_level, &fact_info); - if (ret) - perror("isst_get_fact_bucket_info"); - else + ret = isst_get_fact_info(cpu, tdp_level, fact_bucket, &fact_info); + if (ret) { + isst_display_error_info_message(1, "Failed to get turbo-freq info at this level", 1, tdp_level); + isst_ctdp_display_information_end(outf); + exit(1); + } else { isst_fact_display_information(cpu, outf, tdp_level, fact_bucket, fact_avx, &fact_info); + } } static void dump_fact_config(int arg) @@ -1735,7 +1738,7 @@ static void dump_fact_config(int arg) } if (tdp_level == 0xff) { - fprintf(outf, "Invalid command: specify tdp_level\n"); + isst_display_error_info_message(1, "Invalid command: specify tdp_level\n", 0, 0); exit(1); } @@ -1763,7 +1766,7 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_set_pbf_fact_status(cpu, 0, status); if (ret) { - perror("isst_set_fact"); + debug_printf("isst_set_pbf_fact_status failed"); if (auto_mode) isst_pm_qos_config(cpu, 0, 0); diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 7836f9f08af1..89b3068a2685 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -516,7 +516,7 @@ int isst_set_pbf_fact_status(int cpu, int pbf, int enable) } else { if (enable && !ctdp_level.sst_cp_enabled) - fprintf(stderr, "Make sure to execute before: core-power enable\n"); + isst_display_error_info_message(0, "Make sure to execute before: core-power enable", 0, 0); if (ctdp_level.pbf_enabled) req = BIT(17); @@ -603,18 +603,30 @@ int isst_get_fact_bucket_info(int cpu, int level, return 0; } -int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info) +int isst_get_fact_info(int cpu, int level, int fact_bucket, struct isst_fact_info *fact_info) { struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; unsigned int resp; - int ret; + int j, ret, print; + + ret = isst_get_ctdp_levels(cpu, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + return ret; + } + + if (level > pkg_dev.levels) { + isst_display_error_info_message(1, "Invalid level", 1, level); + return -1; + } ret = isst_get_ctdp_control(cpu, level, &ctdp_level); if (ret) return ret; if (!ctdp_level.fact_support) { - fprintf(stderr, "turbo-freq feature is not present at this level:%d\n", level); + isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, level); return -1; } @@ -632,8 +644,25 @@ int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info) fact_info->lp_clipping_ratio_license_avx512 = (resp >> 16) & 0xff; ret = isst_get_fact_bucket_info(cpu, level, fact_info->bucket_info); + if (ret) + return ret; - return ret; + print = 0; + for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) { + if (fact_bucket != 0xff && fact_bucket != j) + continue; + + if (!fact_info->bucket_info[j].high_priority_cores_count) + break; + + print = 1; + } + if (!print) { + isst_display_error_info_message(1, "Invalid bucket", 0, 0); + return -1; + } + + return 0; } int isst_set_trl(int cpu, unsigned long long trl) @@ -769,7 +798,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev) } if (ctdp_level->fact_support) { - ret = isst_get_fact_info(cpu, i, + ret = isst_get_fact_info(cpu, i, 0xff, &ctdp_level->fact_info); if (ret) return ret; diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 0667b405f17f..bf9422336fab 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -222,7 +222,21 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level, struct isst_fact_bucket_info *bucket_info = fact_info->bucket_info; char header[256]; char value[256]; - int j; + int print = 0, j; + + for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) { + if (fact_bucket != 0xff && fact_bucket != j) + continue; + + if (!bucket_info[j].high_priority_cores_count) + break; + + print = 1; + } + if (!print) { + fprintf(stderr, "Invalid bucket\n"); + return; + } snprintf(header, sizeof(header), "speed-select-turbo-freq-properties"); format_and_print(outf, base_level, header, NULL); diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 69fa2c3c70c3..2e1afd856a78 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -219,7 +219,7 @@ extern int isst_set_pbf_fact_status(int cpu, int pbf, int enable); extern int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info); extern void isst_get_pbf_info_complete(struct isst_pbf_info *pbf_info); -extern int isst_get_fact_info(int cpu, int level, +extern int isst_get_fact_info(int cpu, int level, int fact_bucket, struct isst_fact_info *fact_info); extern int isst_get_fact_bucket_info(int cpu, int level, struct isst_fact_bucket_info *bucket_info); -- cgit v1.2.3 From 95f8e5694580cbf80599c6a5874cf4fba8b17141 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:30 -0800 Subject: tools/power/x86/intel-speed-select: Kernel interface error handling Treat a case when mailbox/mmio command can't be handled by the kernel drivers when the module is removed or send a command which no driver can handle. In this case ENOTTY result is returned, so print error. Also when the isst_if_mmio module is removed, we can't send CLOS message messages via Mailbox on non SKX based platforms. When this module is removed, isst_platform_info.mmio_supported is set to 0. So it can't be used as a condition to send via mailbox. Here replace check for Skylake based platform to send via mailbox, other platforms can't use mailbox in lieu of MMIO. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 48915470c572..2ab902c18bcc 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -95,6 +95,14 @@ int is_clx_n_platform(void) return 0; } +int is_skx_based_platform(void) +{ + if (cpu_model == 0x55) + return 1; + + return 0; +} + static int update_cpu_model(void) { unsigned int ebx, ecx, edx; @@ -695,7 +703,11 @@ static int isst_send_mmio_command(unsigned int cpu, unsigned int reg, int write, } if (ioctl(fd, cmd, &io_regs) == -1) { - perror("ISST_IF_IO_CMD"); + if (errno == ENOTTY) { + perror("ISST_IF_IO_COMMAND\n"); + fprintf(stderr, "Check presence of kernel modules: isst_if_mmio\n"); + exit(0); + } fprintf(outf, "Error: mmio_cmd cpu:%d reg:%x read_write:%x\n", cpu, reg, write); } else { @@ -724,7 +736,7 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command, "mbox_send: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n", cpu, command, sub_command, parameter, req_data); - if (isst_platform_info.mmio_supported && command == CONFIG_CLOS && + if (!is_skx_based_platform() && command == CONFIG_CLOS && sub_command != CLOS_PM_QOS_CONFIG) { unsigned int value; int write = 0; @@ -774,10 +786,14 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command, err(-1, "%s open failed", pathname); if (ioctl(fd, ISST_IF_MBOX_COMMAND, &mbox_cmds) == -1) { - perror("ISST_IF_MBOX_COMMAND"); - fprintf(outf, - "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n", - cpu, command, sub_command, parameter, req_data); + if (errno == ENOTTY) { + perror("ISST_IF_MBOX_COMMAND\n"); + fprintf(stderr, "Check presence of kernel modules: isst_if_mbox_pci or isst_if_mbox_msr\n"); + exit(0); + } + debug_printf( + "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x errorno:%d\n", + cpu, command, sub_command, parameter, req_data, errno); return -1; } else { *resp = mbox_cmds.mbox_cmd[0].resp_data; -- cgit v1.2.3 From fe6fb2165ade451d7a354787a8e4f00fa5bd3e44 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:31 -0800 Subject: tools/power/x86/intel-speed-select: Improve core-power result and error display This change adds improved error display and handling for commands related to core-power feature. The changes include: - Replace perror with helpful error message - Use ordered priority for SKX based platform by default as the proportional priority is not supported - Don't show weight and epp in help and also give error when user tries to set them in SKX based platforms - Range check for epp and weights and display error Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 87 ++++++++++++++--------- tools/power/x86/intel-speed-select/isst-core.c | 10 +-- tools/power/x86/intel-speed-select/isst-display.c | 5 +- 3 files changed, 62 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 2ab902c18bcc..7a8a315ac8d1 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1361,7 +1361,7 @@ static int set_clos_param(int cpu, int clos, int epp, int wt, int min, int max) ret = isst_pm_get_clos(cpu, clos, &clos_config); if (ret) { - perror("isst_pm_get_clos"); + isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0); return ret; } clos_config.clos_min = min; @@ -1370,7 +1370,7 @@ static int set_clos_param(int cpu, int clos, int epp, int wt, int min, int max) clos_config.clos_prop_prio = wt; ret = isst_set_clos(cpu, clos, &clos_config); if (ret) { - perror("isst_pm_set_clos"); + isst_display_error_info_message(1, "isst_set_clos failed", 0, 0); return ret; } @@ -1577,7 +1577,7 @@ static int set_core_priority_and_min(int cpu, int mask_size, debug_printf("Associate cpu: %d clos: %d\n", i, clos); ret = isst_clos_associate(i, clos); if (ret) { - perror("isst_clos_associate"); + isst_display_error_info_message(1, "isst_clos_associate failed", 0, 0); return ret; } } @@ -1593,14 +1593,14 @@ static int set_pbf_core_power(int cpu) ret = isst_get_ctdp_levels(cpu, &pkg_dev); if (ret) { - perror("isst_get_ctdp_levels"); + debug_printf("isst_get_ctdp_levels failed"); return ret; } debug_printf("Current_level: %d\n", pkg_dev.current_level); ret = isst_get_pbf_info(cpu, pkg_dev.current_level, &pbf_info); if (ret) { - perror("isst_get_pbf_info"); + debug_printf("isst_get_pbf_info failed"); return ret; } debug_printf("p1_high: %d p1_low: %d\n", pbf_info.p1_high, @@ -1610,13 +1610,13 @@ static int set_pbf_core_power(int cpu) pbf_info.core_cpumask, pbf_info.p1_high, pbf_info.p1_low); if (ret) { - perror("set_core_priority_and_min"); + debug_printf("set_core_priority_and_min failed"); return ret; } ret = isst_pm_qos_config(cpu, 1, 1); if (ret) { - perror("isst_pm_qos_config"); + debug_printf("isst_pm_qos_config failed"); return ret; } @@ -1666,7 +1666,7 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, } if (auto_mode && !status) - isst_pm_qos_config(cpu, 0, 0); + isst_pm_qos_config(cpu, 0, 1); disp_result: if (status) @@ -1933,9 +1933,12 @@ static void enable_clos_qos_config(int cpu, void *arg1, void *arg2, void *arg3, int ret; int status = *(int *)arg4; + if (is_skx_based_platform()) + clos_priority_type = 1; + ret = isst_pm_qos_config(cpu, status, clos_priority_type); if (ret) - perror("isst_pm_qos_config"); + isst_display_error_info_message(1, "isst_pm_qos_config failed", 0, 0); if (status) isst_display_result(cpu, outf, "core-power", "enable", @@ -1953,9 +1956,11 @@ static void set_clos_enable(int arg) if (enable) { fprintf(stderr, "Enable core-power for a package/die\n"); - fprintf(stderr, - "\tClos Enable: Specify priority type with [--priority|-p]\n"); - fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n"); + if (!is_skx_based_platform()) { + fprintf(stderr, + "\tClos Enable: Specify priority type with [--priority|-p]\n"); + fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n"); + } } else { fprintf(stderr, "Disable core-power: [No command arguments are required]\n"); @@ -1986,7 +1991,7 @@ static void dump_clos_config_for_cpu(int cpu, void *arg1, void *arg2, ret = isst_pm_get_clos(cpu, current_clos, &clos_config); if (ret) - perror("isst_pm_get_clos"); + isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0); else isst_clos_display_information(cpu, outf, current_clos, &clos_config); @@ -2002,7 +2007,8 @@ static void dump_clos_config(int arg) exit(0); } if (current_clos < 0 || current_clos > 3) { - fprintf(stderr, "Invalid clos id\n"); + isst_display_error_info_message(1, "Invalid clos id\n", 0, 0); + isst_ctdp_display_information_end(outf); exit(0); } @@ -2023,7 +2029,7 @@ static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_clos_get_clos_information(cpu, &enable, &prio_type); if (ret) - perror("isst_clos_get_info"); + isst_display_error_info_message(1, "isst_clos_get_info failed", 0, 0); else { int cp_state, cp_cap; @@ -2069,7 +2075,7 @@ static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, clos_config.clos_desired = clos_desired; ret = isst_set_clos(cpu, current_clos, &clos_config); if (ret) - perror("isst_set_clos"); + isst_display_error_info_message(1, "isst_set_clos failed", 0, 0); else isst_display_result(cpu, outf, "core-power", "config", ret); } @@ -2081,26 +2087,27 @@ static void set_clos_config(int arg) "Set core-power configuration for one of the four clos ids\n"); fprintf(stderr, "\tSpecify targeted clos id with [--clos|-c]\n"); - fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n"); - fprintf(stderr, - "\tSpecify clos Proportional Priority [--weight|-w]\n"); + if (!is_skx_based_platform()) { + fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n"); + fprintf(stderr, + "\tSpecify clos Proportional Priority [--weight|-w]\n"); + } fprintf(stderr, "\tSpecify clos min in MHz with [--min|-n]\n"); fprintf(stderr, "\tSpecify clos max in MHz with [--max|-m]\n"); - fprintf(stderr, "\tSpecify clos desired in MHz with [--desired|-d]\n"); exit(0); } if (current_clos < 0 || current_clos > 3) { - fprintf(stderr, "Invalid clos id\n"); + isst_display_error_info_message(1, "Invalid clos id\n", 0, 0); exit(0); } - if (clos_epp < 0 || clos_epp > 0x0F) { - fprintf(stderr, "clos epp is not specified, default: 0\n"); + if (!is_skx_based_platform() && (clos_epp < 0 || clos_epp > 0x0F)) { + fprintf(stderr, "clos epp is not specified or invalid, default: 0\n"); clos_epp = 0; } - if (clos_prop_prio < 0 || clos_prop_prio > 0x0F) { + if (!is_skx_based_platform() && (clos_prop_prio < 0 || clos_prop_prio > 0x0F)) { fprintf(stderr, - "clos frequency weight is not specified, default: 0\n"); + "clos frequency weight is not specified or invalid, default: 0\n"); clos_prop_prio = 0; } if (clos_min < 0) { @@ -2108,11 +2115,11 @@ static void set_clos_config(int arg) clos_min = 0; } if (clos_max < 0) { - fprintf(stderr, "clos max is not specified, default: 25500 MHz\n"); + fprintf(stderr, "clos max is not specified, default: Max frequency (ratio 0xff)\n"); clos_max = 0xff; } - if (clos_desired < 0) { - fprintf(stderr, "clos desired is not specified, default: 0\n"); + if (clos_desired) { + fprintf(stderr, "clos desired is not supported on this platform\n"); clos_desired = 0x00; } @@ -2133,7 +2140,7 @@ static void set_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_clos_associate(cpu, current_clos); if (ret) - perror("isst_clos_associate"); + debug_printf("isst_clos_associate failed"); else isst_display_result(cpu, outf, "core-power", "assoc", ret); } @@ -2152,15 +2159,14 @@ static void set_clos_assoc(int arg) } if (current_clos < 0 || current_clos > 3) { - fprintf(stderr, "Invalid clos id\n"); + isst_display_error_info_message(1, "Invalid clos id\n", 0, 0); exit(0); } if (max_target_cpus) for_each_online_target_cpu_in_set(set_clos_assoc_for_cpu, NULL, NULL, NULL, NULL); else { - fprintf(stderr, - "Invalid target cpu. Specify with [-c|--cpu]\n"); + isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0); } } @@ -2171,7 +2177,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, ret = isst_clos_get_assoc_status(cpu, &clos); if (ret) - perror("isst_clos_get_assoc_status"); + isst_display_error_info_message(1, "isst_clos_get_assoc_status failed", 0, 0); else isst_clos_display_assoc_information(cpu, outf, clos); } @@ -2185,8 +2191,7 @@ static void get_clos_assoc(int arg) } if (!max_target_cpus) { - fprintf(stderr, - "Invalid target cpu. Specify with [-c|--cpu]\n"); + isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0); exit(0); } @@ -2366,6 +2371,10 @@ static void parse_cmd_args(int argc, int start, char **argv) break; case 'e': clos_epp = atoi(optarg); + if (is_skx_based_platform()) { + isst_display_error_info_message(1, "epp can't be specified on this platform", 0, 0); + exit(0); + } break; case 'n': clos_min = atoi(optarg); @@ -2377,9 +2386,17 @@ static void parse_cmd_args(int argc, int start, char **argv) break; case 'p': clos_priority_type = atoi(optarg); + if (is_skx_based_platform() && !clos_priority_type) { + isst_display_error_info_message(1, "Invalid clos priority type: proportional for this platform", 0, 0); + exit(0); + } break; case 'w': clos_prop_prio = atoi(optarg); + if (is_skx_based_platform()) { + isst_display_error_info_message(1, "weight can't be specified on this platform", 0, 0); + exit(0); + } break; default: printf("no match\n"); diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 89b3068a2685..67c9b1139631 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -917,17 +917,19 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type) } ret = isst_write_pm_config(cpu, 0); if (ret) - perror("isst_write_pm_config\n"); + isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0); } else { ret = isst_write_pm_config(cpu, 1); if (ret) - perror("isst_write_pm_config\n"); + isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0); } ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0, &resp); - if (ret) + if (ret) { + isst_display_error_info_message(1, "CLOS_PM_QOS_CONFIG command failed", 0, 0); return ret; + } debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp); @@ -939,7 +941,7 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type) req = req & ~BIT(1); if (priority_type > 1) - fprintf(stderr, "Invalid priority type: Changing type to ordered\n"); + isst_display_error_info_message(1, "Invalid priority type: Changing type to ordered", 0, 0); if (priority_type) req = req | BIT(2); diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index bf9422336fab..ed51d8b79c41 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -599,7 +599,10 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos, format_and_print(outf, 5, header, value); snprintf(header, sizeof(header), "clos-max"); - snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER); + if (clos_config->clos_max == 0xff) + snprintf(value, sizeof(value), "Max Turbo frequency"); + else + snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER); format_and_print(outf, 5, header, value); snprintf(header, sizeof(header), "clos-desired"); -- cgit v1.2.3 From 070fdea13d4ba3ba6ae5a239fe334947921a1b9f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:32 -0800 Subject: tools/power/x86/intel-speed-select: Show error for invalid CPUs in the options When --cpu or -c is used to specify target CPUs and non of them are valid, display error. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 7a8a315ac8d1..386791aaeca1 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -425,7 +425,7 @@ static void for_each_online_target_cpu_in_set( void (*callback)(int, void *, void *, void *, void *), void *arg1, void *arg2, void *arg3, void *arg4) { - int i; + int i, found = 0; for (i = 0; i < topo_max_cpus; ++i) { int online; @@ -439,9 +439,14 @@ static void for_each_online_target_cpu_in_set( online = 1; /* online entry for CPU 0 needs some special configs */ - if (online && callback) + if (online && callback) { callback(i, arg1, arg2, arg3, arg4); + found = 1; + } } + + if (!found) + fprintf(stderr, "No valid CPU in the list\n"); } #define BITMASK_SIZE 32 -- cgit v1.2.3 From 7fc9fefd994b3b0c6c4dc204a993f1a4c451f4d4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:33 -0800 Subject: tools/power/x86/intel-speed-select: Improve CLX commands CLX doesn't have capability to change the feature in the hardware, but this acts as "--auto | -a" option. So even if user didn't specify the option, use this as --auto | -a to set cpufreq scaling frequency limits. Also remove perror with debug_printf as they don't bring any value. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 386791aaeca1..a44ec2f76f52 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1162,7 +1162,7 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2, ret = clx_n_config(cpu); if (ret) { - perror("isst_get_process_ctdp"); + debug_printf("clx_n_config failed"); } else { struct isst_pkg_ctdp_level_info *ctdp_level; struct isst_pbf_info *pbf_info; @@ -1419,7 +1419,7 @@ static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) ret = clx_n_config(cpu); if (ret) { - perror("set_clx_pbf_cpufreq_scaling_min_max"); + debug_printf("cpufreq_scaling_min_max failed for CLX"); return ret; } @@ -1635,17 +1635,13 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int status = *(int *)arg4; if (is_clx_n_platform()) { + ret = 0; if (status) { - ret = 0; - if (auto_mode) - set_clx_pbf_cpufreq_scaling_min_max(cpu); + set_clx_pbf_cpufreq_scaling_min_max(cpu); } else { - ret = -1; - if (auto_mode) { - set_scaling_max_to_cpuinfo_max(cpu); - set_scaling_min_to_cpuinfo_min(cpu); - } + set_scaling_max_to_cpuinfo_max(cpu); + set_scaling_min_to_cpuinfo_min(cpu); } goto disp_result; } -- cgit v1.2.3 From b86639e1957fe09362a1681b8a061aafacd1b486 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:34 -0800 Subject: tools/power/x86/intel-speed-select: Fix avx options for turbo-freq feature Specifying "avx2" and "avx512" option for display filter doesn't work with short option "-r", only works with --try-type. Also compare full 6 characters for "avx512" string. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index a44ec2f76f52..2a81fb881f04 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -2329,7 +2329,7 @@ static void parse_cmd_args(int argc, int start, char **argv) option_index = start; optind = start + 1; - while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:hoa", + while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:r:hoa", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -2355,7 +2355,7 @@ static void parse_cmd_args(int argc, int start, char **argv) fact_avx = 0x01; } else if (!strncmp(optarg, "avx2", 4)) { fact_avx = 0x02; - } else if (!strncmp(optarg, "avx512", 4)) { + } else if (!strncmp(optarg, "avx512", 6)) { fact_avx = 0x04; } else { fprintf(outf, "Invalid sse,avx options\n"); -- cgit v1.2.3 From 4a9603534aff2bc6e36f36144eb25731a888e835 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:35 -0800 Subject: tools/power/x86/intel-speed-select: Print friendly warning for bad command line When user specifies invalid option, display "Unknown Option: ignore", instead of "no match". Also display error for garbage on the command line. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 2a81fb881f04..924cb871d6d2 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -2400,9 +2400,12 @@ static void parse_cmd_args(int argc, int start, char **argv) } break; default: - printf("no match\n"); + printf("Unknown option: ignore\n"); } } + + if (argv[optind]) + printf("Garbage at the end of command: ignore\n"); } static void isst_help(void) -- cgit v1.2.3 From e44d76569b19c03c786832d67782beb3c65e16ca Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:36 -0800 Subject: tools/power/x86/intel-speed-select: Add display for enabled cpus count In addition to total CPU count also display "enabled-cpu-count" for perf-profile info command. This will show number of CPUs in the "enable-cpu-mask". For example: perf-profile-level-4 cpu-count:32 enable-cpu-count:16 enable-cpu-mask:e42d0000,e42d0000 Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-display.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index ed51d8b79c41..4f4c42147a90 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -354,6 +354,14 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, snprintf(value, sizeof(value), "%d", j); format_and_print(outf, base_level + 4, header, value); + j = CPU_COUNT_S(ctdp_level->core_cpumask_size, + ctdp_level->core_cpumask); + if (j) { + snprintf(header, sizeof(header), "enable-cpu-count"); + snprintf(value, sizeof(value), "%d", j); + format_and_print(outf, base_level + 4, header, value); + } + if (ctdp_level->core_cpumask_size) { snprintf(header, sizeof(header), "enable-cpu-mask"); printcpumask(sizeof(value), value, -- cgit v1.2.3 From 74062363f8556fb15834caf4be4212da065e6712 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:37 -0800 Subject: tools/power/x86/intel-speed-select: Avoid duplicate Package strings for json For platforms where multiple packages/die, this makes "Package-" key duplicate. To make unique, add die and cpu id to key name. So "Package-0" key name will change to "Package-0-die-x:cpu-x". For example: $sudo ./intel-speed-select -f json perf-profile info Intel(R) Speed Select Technology Executing on CPU model:106[0x6a] { "package-0:die-0:cpu-0": { "perf-profile-level-0": { "cpu-count": "32", "enable-cpu-count": "32", ... ... "package-1:die-0:cpu-16": { "perf-profile-level-0": { "cpu-count": "32", "enable-cpu-count": "32", "enable-cpu-mask": "ffff0000,ffff0000", ... ... For non json format, there is no change. Here when print_package_info() is called, it will return the level to print for other information. This level is used formatting. Also in some function duplicate code was there to print package,die and CPU information. Replace all that code with a call to print_package_info(). Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-display.c | 172 +++++++++++----------- 1 file changed, 83 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 4f4c42147a90..51dbaa5f02ec 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -158,10 +158,17 @@ static void format_and_print(FILE *outf, int level, char *header, char *value) last_level = level; } -static void print_package_info(int cpu, FILE *outf) +static int print_package_info(int cpu, FILE *outf) { char header[256]; + if (out_format_is_json()) { + snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d", + get_physical_package_id(cpu), get_physical_die_id(cpu), + cpu); + format_and_print(outf, 1, header, NULL); + return 1; + } snprintf(header, sizeof(header), "package-%d", get_physical_package_id(cpu)); format_and_print(outf, 1, header, NULL); @@ -169,6 +176,8 @@ static void print_package_info(int cpu, FILE *outf) format_and_print(outf, 2, header, NULL); snprintf(header, sizeof(header), "cpu-%d", cpu); format_and_print(outf, 3, header, NULL); + + return 3; } static void _isst_pbf_display_information(int cpu, FILE *outf, int level, @@ -331,10 +340,11 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, { char header[256]; char value[256]; - int i, base_level = 1; + static int level; + int i; if (pkg_dev->processed) - print_package_info(cpu, outf); + level = print_package_info(cpu, outf); for (i = 0; i <= pkg_dev->levels; ++i) { struct isst_pkg_ctdp_level_info *ctdp_level; @@ -346,20 +356,20 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, snprintf(header, sizeof(header), "perf-profile-level-%d", ctdp_level->level); - format_and_print(outf, base_level + 3, header, NULL); + format_and_print(outf, level + 1, header, NULL); snprintf(header, sizeof(header), "cpu-count"); j = get_cpu_count(get_physical_die_id(cpu), get_physical_die_id(cpu)); snprintf(value, sizeof(value), "%d", j); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); j = CPU_COUNT_S(ctdp_level->core_cpumask_size, ctdp_level->core_cpumask); if (j) { snprintf(header, sizeof(header), "enable-cpu-count"); snprintf(value, sizeof(value), "%d", j); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } if (ctdp_level->core_cpumask_size) { @@ -367,59 +377,59 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, printcpumask(sizeof(value), value, ctdp_level->core_cpumask_size, ctdp_level->core_cpumask); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "enable-cpu-list"); printcpulist(sizeof(value), value, ctdp_level->core_cpumask_size, ctdp_level->core_cpumask); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } snprintf(header, sizeof(header), "thermal-design-power-ratio"); snprintf(value, sizeof(value), "%d", ctdp_level->tdp_ratio); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "base-frequency(MHz)"); if (!ctdp_level->sse_p1) ctdp_level->sse_p1 = ctdp_level->tdp_ratio; snprintf(value, sizeof(value), "%d", ctdp_level->sse_p1 * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); if (ctdp_level->avx2_p1) { snprintf(header, sizeof(header), "base-frequency-avx2(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->avx2_p1 * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } if (ctdp_level->avx512_p1) { snprintf(header, sizeof(header), "base-frequency-avx512(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->avx512_p1 * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } if (ctdp_level->uncore_p1) { snprintf(header, sizeof(header), "uncore-frequency-min(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->uncore_p1 * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } if (ctdp_level->uncore_p0) { snprintf(header, sizeof(header), "uncore-frequency-max(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->uncore_p0 * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } if (ctdp_level->mem_freq) { snprintf(header, sizeof(header), "mem-frequency(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->mem_freq * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } snprintf(header, sizeof(header), @@ -431,7 +441,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, snprintf(value, sizeof(value), "disabled"); } else snprintf(value, sizeof(value), "unsupported"); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "speed-select-base-freq"); @@ -442,7 +452,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, snprintf(value, sizeof(value), "disabled"); } else snprintf(value, sizeof(value), "unsupported"); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "speed-select-core-power"); @@ -453,89 +463,89 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, snprintf(value, sizeof(value), "disabled"); } else snprintf(value, sizeof(value), "unsupported"); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); if (is_clx_n_platform()) { if (ctdp_level->pbf_support) _isst_pbf_display_information(cpu, outf, tdp_level, &ctdp_level->pbf_info, - base_level + 4); + level + 1); continue; } if (ctdp_level->pkg_tdp) { snprintf(header, sizeof(header), "thermal-design-power(W)"); snprintf(value, sizeof(value), "%d", ctdp_level->pkg_tdp); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } if (ctdp_level->t_proc_hot) { snprintf(header, sizeof(header), "tjunction-max(C)"); snprintf(value, sizeof(value), "%d", ctdp_level->t_proc_hot); - format_and_print(outf, base_level + 4, header, value); + format_and_print(outf, level + 2, header, value); } snprintf(header, sizeof(header), "turbo-ratio-limits-sse"); - format_and_print(outf, base_level + 4, header, NULL); + format_and_print(outf, level + 2, header, NULL); for (j = 0; j < 8; ++j) { snprintf(header, sizeof(header), "bucket-%d", j); - format_and_print(outf, base_level + 5, header, NULL); + format_and_print(outf, level + 3, header, NULL); snprintf(header, sizeof(header), "core-count"); snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff); - format_and_print(outf, base_level + 6, header, value); + format_and_print(outf, level + 4, header, value); snprintf(header, sizeof(header), "max-turbo-frequency(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->trl_sse_active_cores[j] * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 6, header, value); + format_and_print(outf, level + 4, header, value); } if (ctdp_level->trl_avx_active_cores[0]) { snprintf(header, sizeof(header), "turbo-ratio-limits-avx2"); - format_and_print(outf, base_level + 4, header, NULL); + format_and_print(outf, level + 2, header, NULL); for (j = 0; j < 8; ++j) { snprintf(header, sizeof(header), "bucket-%d", j); - format_and_print(outf, base_level + 5, header, NULL); + format_and_print(outf, level + 3, header, NULL); snprintf(header, sizeof(header), "core-count"); snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff); - format_and_print(outf, base_level + 6, header, value); + format_and_print(outf, level + 4, header, value); snprintf(header, sizeof(header), "max-turbo-frequency(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_active_cores[j] * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 6, header, value); + format_and_print(outf, level + 4, header, value); } } if (ctdp_level->trl_avx_512_active_cores[0]) { snprintf(header, sizeof(header), "turbo-ratio-limits-avx512"); - format_and_print(outf, base_level + 4, header, NULL); + format_and_print(outf, level + 2, header, NULL); for (j = 0; j < 8; ++j) { snprintf(header, sizeof(header), "bucket-%d", j); - format_and_print(outf, base_level + 5, header, NULL); + format_and_print(outf, level + 3, header, NULL); snprintf(header, sizeof(header), "core-count"); snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff); - format_and_print(outf, base_level + 6, header, value); + format_and_print(outf, level + 4, header, value); snprintf(header, sizeof(header), "max-turbo-frequency(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_512_active_cores[j] * DISP_FREQ_MULTIPLIER); - format_and_print(outf, base_level + 6, header, value); + format_and_print(outf, level + 4, header, value); } } if (ctdp_level->pbf_support) _isst_pbf_display_information(cpu, outf, i, &ctdp_level->pbf_info, - base_level + 4); + level + 2); if (ctdp_level->fact_support) _isst_fact_display_information(cpu, outf, i, 0xff, 0xff, &ctdp_level->fact_info, - base_level + 4); + level + 2); } format_and_print(outf, 1, NULL, NULL); @@ -558,8 +568,10 @@ void isst_ctdp_display_information_end(FILE *outf) void isst_pbf_display_information(int cpu, FILE *outf, int level, struct isst_pbf_info *pbf_info) { - print_package_info(cpu, outf); - _isst_pbf_display_information(cpu, outf, level, pbf_info, 4); + int _level; + + _level = print_package_info(cpu, outf); + _isst_pbf_display_information(cpu, outf, level, pbf_info, _level + 1); format_and_print(outf, 1, NULL, NULL); } @@ -567,9 +579,11 @@ void isst_fact_display_information(int cpu, FILE *outf, int level, int fact_bucket, int fact_avx, struct isst_fact_info *fact_info) { - print_package_info(cpu, outf); + int _level; + + _level = print_package_info(cpu, outf); _isst_fact_display_information(cpu, outf, level, fact_bucket, fact_avx, - fact_info, 4); + fact_info, _level + 1); format_and_print(outf, 1, NULL, NULL); } @@ -578,46 +592,41 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos, { char header[256]; char value[256]; + int level; - snprintf(header, sizeof(header), "package-%d", - get_physical_package_id(cpu)); - format_and_print(outf, 1, header, NULL); - snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu)); - format_and_print(outf, 2, header, NULL); - snprintf(header, sizeof(header), "cpu-%d", cpu); - format_and_print(outf, 3, header, NULL); + level = print_package_info(cpu, outf); snprintf(header, sizeof(header), "core-power"); - format_and_print(outf, 4, header, NULL); + format_and_print(outf, level + 1, header, NULL); snprintf(header, sizeof(header), "clos"); snprintf(value, sizeof(value), "%d", clos); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "epp"); snprintf(value, sizeof(value), "%d", clos_config->epp); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "clos-proportional-priority"); snprintf(value, sizeof(value), "%d", clos_config->clos_prop_prio); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "clos-min"); snprintf(value, sizeof(value), "%d MHz", clos_config->clos_min * DISP_FREQ_MULTIPLIER); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "clos-max"); if (clos_config->clos_max == 0xff) snprintf(value, sizeof(value), "Max Turbo frequency"); else snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "clos-desired"); snprintf(value, sizeof(value), "%d MHz", clos_config->clos_desired * DISP_FREQ_MULTIPLIER); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); - format_and_print(outf, 1, NULL, NULL); + format_and_print(outf, level, NULL, NULL); } void isst_clos_display_clos_information(int cpu, FILE *outf, @@ -626,70 +635,60 @@ void isst_clos_display_clos_information(int cpu, FILE *outf, { char header[256]; char value[256]; + int level; - snprintf(header, sizeof(header), "package-%d", - get_physical_package_id(cpu)); - format_and_print(outf, 1, header, NULL); - snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu)); - format_and_print(outf, 2, header, NULL); - snprintf(header, sizeof(header), "cpu-%d", cpu); - format_and_print(outf, 3, header, NULL); + level = print_package_info(cpu, outf); snprintf(header, sizeof(header), "core-power"); - format_and_print(outf, 4, header, NULL); + format_and_print(outf, level + 1, header, NULL); snprintf(header, sizeof(header), "support-status"); if (cap) snprintf(value, sizeof(value), "supported"); else snprintf(value, sizeof(value), "unsupported"); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "enable-status"); if (state) snprintf(value, sizeof(value), "enabled"); else snprintf(value, sizeof(value), "disabled"); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "clos-enable-status"); if (clos_enable) snprintf(value, sizeof(value), "enabled"); else snprintf(value, sizeof(value), "disabled"); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); snprintf(header, sizeof(header), "priority-type"); if (type) snprintf(value, sizeof(value), "ordered"); else snprintf(value, sizeof(value), "proportional"); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); - format_and_print(outf, 1, NULL, NULL); + format_and_print(outf, level, NULL, NULL); } void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos) { char header[256]; char value[256]; + int level; - snprintf(header, sizeof(header), "package-%d", - get_physical_package_id(cpu)); - format_and_print(outf, 1, header, NULL); - snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu)); - format_and_print(outf, 2, header, NULL); - snprintf(header, sizeof(header), "cpu-%d", cpu); - format_and_print(outf, 3, header, NULL); + level = print_package_info(cpu, outf); snprintf(header, sizeof(header), "get-assoc"); - format_and_print(outf, 4, header, NULL); + format_and_print(outf, level + 1, header, NULL); snprintf(header, sizeof(header), "clos"); snprintf(value, sizeof(value), "%d", clos); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); - format_and_print(outf, 1, NULL, NULL); + format_and_print(outf, level, NULL, NULL); } void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd, @@ -697,26 +696,21 @@ void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd, { char header[256]; char value[256]; + int level = 3; + + if (cpu >= 0) + level = print_package_info(cpu, outf); - if (cpu >= 0) { - snprintf(header, sizeof(header), "package-%d", - get_physical_package_id(cpu)); - format_and_print(outf, 1, header, NULL); - snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu)); - format_and_print(outf, 2, header, NULL); - snprintf(header, sizeof(header), "cpu-%d", cpu); - format_and_print(outf, 3, header, NULL); - } snprintf(header, sizeof(header), "%s", feature); - format_and_print(outf, 4, header, NULL); + format_and_print(outf, level + 1, header, NULL); snprintf(header, sizeof(header), "%s", cmd); if (!result) snprintf(value, sizeof(value), "success"); else snprintf(value, sizeof(value), "failed(error %d)", result); - format_and_print(outf, 5, header, value); + format_and_print(outf, level + 2, header, value); - format_and_print(outf, 1, NULL, NULL); + format_and_print(outf, level, NULL, NULL); } void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg) -- cgit v1.2.3 From 1e46d1d59a6ca950a4ddccfea4c7e33455e10b27 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Mar 2020 14:45:38 -0800 Subject: tools/power/x86/intel-speed-select: Update version Fair number of changes including bug fixes done to change version. Signed-off-by: Srinivas Pandruvada Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 924cb871d6d2..cd803a40ee40 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.2"; +static const char *version_str = "v1.3"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; -- cgit v1.2.3 From 9945a2479893682bee8ae46e2ce5f180d4f6b1b2 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 9 Mar 2020 17:54:44 +0900 Subject: tools/power/x86/intel-speed-select: Fix a typo in error message This patch fix a spelling typo in error message. Signed-off-by: Masanari Iida Signed-off-by: Andy Shevchenko --- tools/power/x86/intel-speed-select/isst-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index cd803a40ee40..b73763489410 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -831,7 +831,7 @@ int isst_send_msr_command(unsigned int cpu, unsigned int msr, int write, msr_cmds.msr_cmd[0].data = *req_resp; if (ioctl(fd, ISST_IF_MSR_COMMAND, &msr_cmds) == -1) { - perror("ISST_IF_MSR_COMMAD"); + perror("ISST_IF_MSR_COMMAND"); fprintf(outf, "Error: msr_cmd cpu:%d msr:%x read_write:%d\n", cpu, msr, write); } else { -- cgit v1.2.3 From 27d05ed31acc82052e7b8942e0886b166ba67541 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Jan 2020 17:35:50 +0100 Subject: selftests: firmware: Add firmware_request_platform tests Add tests cases for checking the new firmware_request_platform api. Signed-off-by: Hans de Goede Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20200115163554.101315-7-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_filesystem.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 56894477c8bd..fcc281373b4d 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -86,6 +86,29 @@ else fi fi +# Try platform (EFI embedded fw) loading too +if [ ! -e "$DIR"/trigger_request_platform ]; then + echo "$0: firmware loading: platform trigger not present, ignoring test" >&2 +else + if printf '\000' >"$DIR"/trigger_request_platform 2> /dev/null; then + echo "$0: empty filename should not succeed (platform)" >&2 + exit 1 + fi + + # Note we echo a non-existing name, since files on the file-system + # are preferred over firmware embedded inside the platform's firmware + # The test adds a fake entry with the requested name to the platform's + # fw list, so the name does not matter as long as it does not exist + if ! echo -n "nope-$NAME" >"$DIR"/trigger_request_platform ; then + echo "$0: could not trigger request platform" >&2 + exit 1 + fi + + # The test verifies itself that the loaded firmware contents matches + # the contents for the fake platform fw entry it added. + echo "$0: platform loading works" +fi + ### Batched requests tests test_config_present() { -- cgit v1.2.3 From ca7e6e45d108f5712aec4407e6c757af59d43d0f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Mar 2020 10:16:37 -0700 Subject: bpftool: Print the enum's name instead of value This patch prints the enum's name if there is one found in the array of btf_enum. The commit 9eea98497951 ("bpf: fix BTF verification of enums") has details about an enum could have any power-of-2 size (up to 8 bytes). This patch also takes this chance to accommodate these non 4 byte enums. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200318171637.128862-1-kafai@fb.com --- tools/bpf/bpftool/btf_dumper.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 01cc52b834fa..e86e240da4df 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -43,9 +43,42 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, return btf_dumper_do_type(d, actual_type_id, bit_offset, data); } -static void btf_dumper_enum(const void *data, json_writer_t *jw) +static int btf_dumper_enum(const struct btf_dumper *d, + const struct btf_type *t, + const void *data) { - jsonw_printf(jw, "%d", *(int *)data); + const struct btf_enum *enums = btf_enum(t); + __s64 value; + __u16 i; + + switch (t->size) { + case 8: + value = *(__s64 *)data; + break; + case 4: + value = *(__s32 *)data; + break; + case 2: + value = *(__s16 *)data; + break; + case 1: + value = *(__s8 *)data; + break; + default: + return -EINVAL; + } + + for (i = 0; i < btf_vlen(t); i++) { + if (value == enums[i].val) { + jsonw_string(d->jw, + btf__name_by_offset(d->btf, + enums[i].name_off)); + return 0; + } + } + + jsonw_int(d->jw, value); + return 0; } static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, @@ -366,8 +399,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_ARRAY: return btf_dumper_array(d, type_id, data); case BTF_KIND_ENUM: - btf_dumper_enum(data, d->jw); - return 0; + return btf_dumper_enum(d, t, data); case BTF_KIND_PTR: btf_dumper_ptr(data, d->jw, d->is_plain_text); return 0; -- cgit v1.2.3 From 30255d317579e93ca3ecf7f4ab953094123e4a8c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Mar 2020 10:16:43 -0700 Subject: bpftool: Print as a string for char array A char[] is currently printed as an integer array. This patch will print it as a string when 1) The array element type is an one byte int 2) The array element type has a BTF_INT_CHAR encoding or the array element type's name is "char" 3) All characters is between (0x1f, 0x7f) and it is terminated by a null character. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200318171643.129021-1-kafai@fb.com --- tools/bpf/bpftool/btf_dumper.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index e86e240da4df..e0cff4de2101 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -81,6 +81,42 @@ static int btf_dumper_enum(const struct btf_dumper *d, return 0; } +static bool is_str_array(const struct btf *btf, const struct btf_array *arr, + const char *s) +{ + const struct btf_type *elem_type; + const char *end_s; + + if (!arr->nelems) + return false; + + elem_type = btf__type_by_id(btf, arr->type); + /* Not skipping typedef. typedef to char does not count as + * a string now. + */ + while (elem_type && btf_is_mod(elem_type)) + elem_type = btf__type_by_id(btf, elem_type->type); + + if (!elem_type || !btf_is_int(elem_type) || elem_type->size != 1) + return false; + + if (btf_int_encoding(elem_type) != BTF_INT_CHAR && + strcmp("char", btf__name_by_offset(btf, elem_type->name_off))) + return false; + + end_s = s + arr->nelems; + while (s < end_s) { + if (!*s) + return true; + if (*s <= 0x1f || *s >= 0x7f) + return false; + s++; + } + + /* '\0' is not found */ + return false; +} + static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, const void *data) { @@ -90,6 +126,11 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, int ret = 0; __u32 i; + if (is_str_array(d->btf, arr, data)) { + jsonw_string(d->jw, data); + return 0; + } + elem_size = btf__resolve_size(d->btf, arr->type); if (elem_size < 0) return elem_size; -- cgit v1.2.3 From d5ae04da34a213cc518558ecd25af0f609428560 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Mar 2020 10:16:50 -0700 Subject: bpftool: Translate prog_id to its bpf prog_name The kernel struct_ops obj has kernel's func ptrs implemented by bpf_progs. The bpf prog_id is stored as the value of the func ptr for introspection purpose. In the latter patch, a struct_ops dump subcmd will be added to introspect these func ptrs. It is desired to print the actual bpf prog_name instead of only printing the prog_id. Since struct_ops is the only usecase storing prog_id in the func ptr, this patch adds a prog_id_as_func_ptr bool (default is false) to "struct btf_dumper" in order not to mis-interpret the ptr value for the other existing use-cases. While printing a func_ptr as a bpf prog_name, this patch also prefix the bpf prog_name with the ptr's func_proto. [ Note that it is the ptr's func_proto instead of the bpf prog's func_proto ] It reuses the current btf_dump_func() to obtain the ptr's func_proto string. Here is an example from the bpf_cubic.c: "void (struct sock *, u32, u32) bictcp_cong_avoid/prog_id:140" Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200318171650.129252-1-kafai@fb.com --- tools/bpf/bpftool/btf_dumper.c | 118 ++++++++++++++++++++++++++++++++++++----- tools/bpf/bpftool/main.h | 1 + 2 files changed, 107 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index e0cff4de2101..497807bec675 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -4,11 +4,13 @@ #include #include /* for (FILE *) used by json_writer */ #include +#include #include #include #include #include #include +#include #include "json_writer.h" #include "main.h" @@ -22,13 +24,102 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, __u8 bit_offset, const void *data); -static void btf_dumper_ptr(const void *data, json_writer_t *jw, - bool is_plain_text) +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size); + +static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, + const struct btf_type *func_proto, + __u32 prog_id) { - if (is_plain_text) - jsonw_printf(jw, "%p", *(void **)data); + struct bpf_prog_info_linear *prog_info = NULL; + const struct btf_type *func_type; + const char *prog_name = NULL; + struct bpf_func_info *finfo; + struct btf *prog_btf = NULL; + struct bpf_prog_info *info; + int prog_fd, func_sig_len; + char prog_str[1024]; + + /* Get the ptr's func_proto */ + func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0, + sizeof(prog_str)); + if (func_sig_len == -1) + return -1; + + if (!prog_id) + goto print; + + /* Get the bpf_prog's name. Obtain from func_info. */ + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd == -1) + goto print; + + prog_info = bpf_program__get_prog_info_linear(prog_fd, + 1UL << BPF_PROG_INFO_FUNC_INFO); + close(prog_fd); + if (IS_ERR(prog_info)) { + prog_info = NULL; + goto print; + } + info = &prog_info->info; + + if (!info->btf_id || !info->nr_func_info || + btf__get_from_id(info->btf_id, &prog_btf)) + goto print; + finfo = (struct bpf_func_info *)info->func_info; + func_type = btf__type_by_id(prog_btf, finfo->type_id); + if (!func_type || !btf_is_func(func_type)) + goto print; + + prog_name = btf__name_by_offset(prog_btf, func_type->name_off); + +print: + if (!prog_id) + snprintf(&prog_str[func_sig_len], + sizeof(prog_str) - func_sig_len, " 0"); + else if (prog_name) + snprintf(&prog_str[func_sig_len], + sizeof(prog_str) - func_sig_len, + " %s/prog_id:%u", prog_name, prog_id); else - jsonw_printf(jw, "%lu", *(unsigned long *)data); + snprintf(&prog_str[func_sig_len], + sizeof(prog_str) - func_sig_len, + " /prog_id:%u", prog_id); + + prog_str[sizeof(prog_str) - 1] = '\0'; + jsonw_string(d->jw, prog_str); + btf__free(prog_btf); + free(prog_info); + return 0; +} + +static void btf_dumper_ptr(const struct btf_dumper *d, + const struct btf_type *t, + const void *data) +{ + unsigned long value = *(unsigned long *)data; + const struct btf_type *ptr_type; + __s32 ptr_type_id; + + if (!d->prog_id_as_func_ptr || value > UINT32_MAX) + goto print_ptr_value; + + ptr_type_id = btf__resolve_type(d->btf, t->type); + if (ptr_type_id < 0) + goto print_ptr_value; + ptr_type = btf__type_by_id(d->btf, ptr_type_id); + if (!ptr_type || !btf_is_func_proto(ptr_type)) + goto print_ptr_value; + + if (!dump_prog_id_as_func_ptr(d, ptr_type, value)) + return; + +print_ptr_value: + if (d->is_plain_text) + jsonw_printf(d->jw, "%p", (void *)value); + else + jsonw_printf(d->jw, "%lu", value); } static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, @@ -442,7 +533,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_ENUM: return btf_dumper_enum(d, t, data); case BTF_KIND_PTR: - btf_dumper_ptr(data, d->jw, d->is_plain_text); + btf_dumper_ptr(d, t, data); return 0; case BTF_KIND_UNKN: jsonw_printf(d->jw, "(unknown)"); @@ -487,10 +578,6 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, return -1; \ } while (0) -static int btf_dump_func(const struct btf *btf, char *func_sig, - const struct btf_type *func_proto, - const struct btf_type *func, int pos, int size); - static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig, int pos, int size) { @@ -599,8 +686,15 @@ static int btf_dump_func(const struct btf *btf, char *func_sig, BTF_PRINT_ARG(", "); if (arg->type) { BTF_PRINT_TYPE(arg->type); - BTF_PRINT_ARG("%s", - btf__name_by_offset(btf, arg->name_off)); + if (arg->name_off) + BTF_PRINT_ARG("%s", + btf__name_by_offset(btf, arg->name_off)); + else if (pos && func_sig[pos - 1] == ' ') + /* Remove unnecessary space for + * FUNC_PROTO that does not have + * arg->name_off + */ + func_sig[--pos] = '\0'; } else { BTF_PRINT_ARG("..."); } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 5f6dccd43622..6db2398ae7e9 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -205,6 +205,7 @@ struct btf_dumper { const struct btf *btf; json_writer_t *jw; bool is_plain_text; + bool prog_id_as_func_ptr; }; /* btf_dumper_type - print data along with type information -- cgit v1.2.3 From 65c93628599dff4cd7cfb70130d1f6a2203731ea Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Mar 2020 10:16:56 -0700 Subject: bpftool: Add struct_ops support This patch adds struct_ops support to the bpftool. To recap a bit on the recent bpf_struct_ops feature on the kernel side: It currently supports "struct tcp_congestion_ops" to be implemented in bpf. At a high level, bpf_struct_ops is struct_ops map populated with a number of bpf progs. bpf_struct_ops currently supports the "struct tcp_congestion_ops". However, the bpf_struct_ops design is generic enough that other kernel struct ops can be supported in the future. Although struct_ops is map+progs at a high lever, there are differences in details. For example, 1) After registering a struct_ops, the struct_ops is held by the kernel subsystem (e.g. tcp-cc). Thus, there is no need to pin a struct_ops map or its progs in order to keep them around. 2) To iterate all struct_ops in a system, it iterates all maps in type BPF_MAP_TYPE_STRUCT_OPS. BPF_MAP_TYPE_STRUCT_OPS is the current usual filter. In the future, it may need to filter by other struct_ops specific properties. e.g. filter by tcp_congestion_ops or other kernel subsystem ops in the future. 3) struct_ops requires the running kernel having BTF info. That allows more flexibility in handling other kernel structs. e.g. it can always dump the latest bpf_map_info. 4) Also, "struct_ops" command is not intended to repeat all features already provided by "map" or "prog". For example, if there really is a need to pin the struct_ops map, the user can use the "map" cmd to do that. While the first attempt was to reuse parts from map/prog.c, it ended up not a lot to share. The only obvious item is the map_parse_fds() but that still requires modifications to accommodate struct_ops map specific filtering (for the immediate and the future needs). Together with the earlier mentioned differences, it is better to part away from map/prog.c. The initial set of subcmds are, register, unregister, show, and dump. For register, it registers all struct_ops maps that can be found in an obj file. Option can be added in the future to specify a particular struct_ops map. Also, the common bpf_tcp_cc is stateless (e.g. bpf_cubic.c and bpf_dctcp.c). The "reuse map" feature is not implemented in this patch and it can be considered later also. For other subcmds, please see the man doc for details. A sample output of dump: [root@arch-fb-vm1 bpf]# bpftool struct_ops dump name cubic [{ "bpf_map_info": { "type": 26, "id": 64, "key_size": 4, "value_size": 256, "max_entries": 1, "map_flags": 0, "name": "cubic", "ifindex": 0, "btf_vmlinux_value_type_id": 18452, "netns_dev": 0, "netns_ino": 0, "btf_id": 52, "btf_key_type_id": 0, "btf_value_type_id": 0 } },{ "bpf_struct_ops_tcp_congestion_ops": { "refcnt": { "refs": { "counter": 1 } }, "state": "BPF_STRUCT_OPS_STATE_INUSE", "data": { "list": { "next": 0, "prev": 0 }, "key": 0, "flags": 0, "init": "void (struct sock *) bictcp_init/prog_id:138", "release": "void (struct sock *) 0", "ssthresh": "u32 (struct sock *) bictcp_recalc_ssthresh/prog_id:141", "cong_avoid": "void (struct sock *, u32, u32) bictcp_cong_avoid/prog_id:140", "set_state": "void (struct sock *, u8) bictcp_state/prog_id:142", "cwnd_event": "void (struct sock *, enum tcp_ca_event) bictcp_cwnd_event/prog_id:139", "in_ack_event": "void (struct sock *, u32) 0", "undo_cwnd": "u32 (struct sock *) tcp_reno_undo_cwnd/prog_id:144", "pkts_acked": "void (struct sock *, const struct ack_sample *) bictcp_acked/prog_id:143", "min_tso_segs": "u32 (struct sock *) 0", "sndbuf_expand": "u32 (struct sock *) 0", "cong_control": "void (struct sock *, const struct rate_sample *) 0", "get_info": "size_t (struct sock *, u32, int *, union tcp_cc_info *) 0", "name": "bpf_cubic", "owner": 0 } } } ] Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200318171656.129650-1-kafai@fb.com --- .../bpftool/Documentation/bpftool-struct_ops.rst | 116 ++++ tools/bpf/bpftool/bash-completion/bpftool | 28 + tools/bpf/bpftool/main.c | 3 +- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/struct_ops.c | 596 +++++++++++++++++++++ 5 files changed, 743 insertions(+), 1 deletion(-) create mode 100644 tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst create mode 100644 tools/bpf/bpftool/struct_ops.c (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst new file mode 100644 index 000000000000..f045cc89dd6d --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -0,0 +1,116 @@ +================== +bpftool-struct_ops +================== +------------------------------------------------------------------------------- +tool to register/unregister/introspect BPF struct_ops +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** | **dump** | **register** | **unregister** | **help** } + +STRUCT_OPS COMMANDS +=================== + +| **bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*] +| **bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*] +| **bpftool** **struct_ops register** *OBJ* +| **bpftool** **struct_ops unregister** *STRUCT_OPS_MAP* +| **bpftool** **struct_ops help** +| +| *STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* } +| *OBJ* := /a/file/of/bpf_struct_ops.o + + +DESCRIPTION +=========== + **bpftool struct_ops { show | list }** [*STRUCT_OPS_MAP*] + Show brief information about the struct_ops in the system. + If *STRUCT_OPS_MAP* is specified, it shows information only + for the given struct_ops. Otherwise, it lists all struct_ops + currently existing in the system. + + Output will start with struct_ops map ID, followed by its map + name and its struct_ops's kernel type. + + **bpftool struct_ops dump** [*STRUCT_OPS_MAP*] + Dump details information about the struct_ops in the system. + If *STRUCT_OPS_MAP* is specified, it dumps information only + for the given struct_ops. Otherwise, it dumps all struct_ops + currently existing in the system. + + **bpftool struct_ops register** *OBJ* + Register bpf struct_ops from *OBJ*. All struct_ops under + the ELF section ".struct_ops" will be registered to + its kernel subsystem. + + **bpftool struct_ops unregister** *STRUCT_OPS_MAP* + Unregister the *STRUCT_OPS_MAP* from the kernel subsystem. + + **bpftool struct_ops help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -V, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -d, --debug + Print all logs available, even debug-level information. This + includes logs from libbpf as well as from the verifier, when + attempting to load programs. + +EXAMPLES +======== +**# bpftool struct_ops show** + +:: + + 100: dctcp tcp_congestion_ops + 105: cubic tcp_congestion_ops + +**# bpftool struct_ops unregister id 105** + +:: + + Unregistered tcp_congestion_ops cubic id 105 + +**# bpftool struct_ops register bpf_cubic.o** + +:: + + Registered tcp_congestion_ops cubic id 110 + + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) + **bpftool-gen**\ (8) + diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 9b0534f558f1..45ee99b159e2 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -576,6 +576,34 @@ _bpftool() ;; esac ;; + struct_ops) + local STRUCT_OPS_TYPE='id name' + case $command in + show|list|dump|unregister) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$STRUCT_OPS_TYPE" -- "$cur" ) ) + ;; + id) + _bpftool_get_map_ids_for_type struct_ops + ;; + name) + _bpftool_get_map_names_for_type struct_ops + ;; + esac + return 0 + ;; + register) + _filedir + return 0 + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'register unregister show list dump help' \ + -- "$cur" ) ) + ;; + esac + ;; map) local MAP_TYPE='id pinned name' case $command in diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 06449e846e4b..466c269eabdd 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -58,7 +58,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen | struct_ops }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -221,6 +221,7 @@ static const struct cmd cmds[] = { { "feature", do_feature }, { "btf", do_btf }, { "gen", do_gen }, + { "struct_ops", do_struct_ops }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 6db2398ae7e9..86f14ce26fd7 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -161,6 +161,7 @@ int do_tracelog(int argc, char **arg); int do_feature(int argc, char **argv); int do_btf(int argc, char **argv); int do_gen(int argc, char **argv); +int do_struct_ops(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c new file mode 100644 index 000000000000..2a7befbd11ad --- /dev/null +++ b/tools/bpf/bpftool/struct_ops.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2020 Facebook */ + +#include +#include +#include + +#include + +#include +#include +#include + +#include "json_writer.h" +#include "main.h" + +#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" + +static const struct btf_type *map_info_type; +static __u32 map_info_alloc_len; +static struct btf *btf_vmlinux; +static __s32 map_info_type_id; + +struct res { + unsigned int nr_maps; + unsigned int nr_errs; +}; + +static const struct btf *get_btf_vmlinux(void) +{ + if (btf_vmlinux) + return btf_vmlinux; + + btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(btf_vmlinux)) + p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); + + return btf_vmlinux; +} + +static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) +{ + const struct btf *kern_btf; + const struct btf_type *t; + const char *st_ops_name; + + kern_btf = get_btf_vmlinux(); + if (IS_ERR(kern_btf)) + return ""; + + t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); + st_ops_name = btf__name_by_offset(kern_btf, t->name_off); + st_ops_name += strlen(STRUCT_OPS_VALUE_PREFIX); + + return st_ops_name; +} + +static __s32 get_map_info_type_id(void) +{ + const struct btf *kern_btf; + + if (map_info_type_id) + return map_info_type_id; + + kern_btf = get_btf_vmlinux(); + if (IS_ERR(kern_btf)) { + map_info_type_id = PTR_ERR(kern_btf); + return map_info_type_id; + } + + map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info", + BTF_KIND_STRUCT); + if (map_info_type_id < 0) { + p_err("can't find bpf_map_info from btf_vmlinux"); + return map_info_type_id; + } + map_info_type = btf__type_by_id(kern_btf, map_info_type_id); + + /* Ensure map_info_alloc() has at least what the bpftool needs */ + map_info_alloc_len = map_info_type->size; + if (map_info_alloc_len < sizeof(struct bpf_map_info)) + map_info_alloc_len = sizeof(struct bpf_map_info); + + return map_info_type_id; +} + +/* If the subcmd needs to print out the bpf_map_info, + * it should always call map_info_alloc to allocate + * a bpf_map_info object instead of allocating it + * on the stack. + * + * map_info_alloc() will take the running kernel's btf + * into account. i.e. it will consider the + * sizeof(struct bpf_map_info) of the running kernel. + * + * It will enable the "struct_ops" cmd to print the latest + * "struct bpf_map_info". + * + * [ Recall that "struct_ops" requires the kernel's btf to + * be available ] + */ +static struct bpf_map_info *map_info_alloc(__u32 *alloc_len) +{ + struct bpf_map_info *info; + + if (get_map_info_type_id() < 0) + return NULL; + + info = calloc(1, map_info_alloc_len); + if (!info) + p_err("mem alloc failed"); + else + *alloc_len = map_info_alloc_len; + + return info; +} + +/* It iterates all struct_ops maps of the system. + * It returns the fd in "*res_fd" and map_info in "*info". + * In the very first iteration, info->id should be 0. + * An optional map "*name" filter can be specified. + * The filter can be made more flexible in the future. + * e.g. filter by kernel-struct-ops-name, regex-name, glob-name, ...etc. + * + * Return value: + * 1: A struct_ops map found. It is returned in "*res_fd" and "*info". + * The caller can continue to call get_next in the future. + * 0: No struct_ops map is returned. + * All struct_ops map has been found. + * -1: Error and the caller should abort the iteration. + */ +static int get_next_struct_ops_map(const char *name, int *res_fd, + struct bpf_map_info *info, __u32 info_len) +{ + __u32 id = info->id; + int err, fd; + + while (true) { + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + return 0; + p_err("can't get next map: %s", strerror(errno)); + return -1; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, info, &info_len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + close(fd); + return -1; + } + + if (info->type == BPF_MAP_TYPE_STRUCT_OPS && + (!name || !strcmp(name, info->name))) { + *res_fd = fd; + return 1; + } + close(fd); + } +} + +static int cmd_retval(const struct res *res, bool must_have_one_map) +{ + if (res->nr_errs || (!res->nr_maps && must_have_one_map)) + return -1; + + return 0; +} + +/* "data" is the work_func private storage */ +typedef int (*work_func)(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr); + +/* Find all struct_ops map in the system. + * Filter out by "name" (if specified). + * Then call "func(fd, info, data, wtr)" on each struct_ops map found. + */ +static struct res do_search(const char *name, work_func func, void *data, + struct json_writer *wtr) +{ + struct bpf_map_info *info; + struct res res = {}; + __u32 info_len; + int fd, err; + + info = map_info_alloc(&info_len); + if (!info) { + res.nr_errs++; + return res; + } + + if (wtr) + jsonw_start_array(wtr); + while ((err = get_next_struct_ops_map(name, &fd, info, info_len)) == 1) { + res.nr_maps++; + err = func(fd, info, data, wtr); + if (err) + res.nr_errs++; + close(fd); + } + if (wtr) + jsonw_end_array(wtr); + + if (err) + res.nr_errs++; + + if (!wtr && name && !res.nr_errs && !res.nr_maps) + /* It is not printing empty []. + * Thus, needs to specifically say nothing found + * for "name" here. + */ + p_err("no struct_ops found for %s", name); + else if (!wtr && json_output && !res.nr_errs) + /* The "func()" above is not writing any json (i.e. !wtr + * test here). + * + * However, "-j" is enabled and there is no errs here, + * so call json_null() as the current convention of + * other cmds. + */ + jsonw_null(json_wtr); + + free(info); + return res; +} + +static struct res do_one_id(const char *id_str, work_func func, void *data, + struct json_writer *wtr) +{ + struct bpf_map_info *info; + struct res res = {}; + unsigned long id; + __u32 info_len; + char *endptr; + int fd; + + id = strtoul(id_str, &endptr, 0); + if (*endptr || !id || id > UINT32_MAX) { + p_err("invalid id %s", id_str); + res.nr_errs++; + return res; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd == -1) { + p_err("can't get map by id (%lu): %s", id, strerror(errno)); + res.nr_errs++; + return res; + } + + info = map_info_alloc(&info_len); + if (!info) { + res.nr_errs++; + goto done; + } + + if (bpf_obj_get_info_by_fd(fd, info, &info_len)) { + p_err("can't get map info: %s", strerror(errno)); + res.nr_errs++; + goto done; + } + + if (info->type != BPF_MAP_TYPE_STRUCT_OPS) { + p_err("%s id %u is not a struct_ops map", info->name, info->id); + res.nr_errs++; + goto done; + } + + res.nr_maps++; + + if (func(fd, info, data, wtr)) + res.nr_errs++; + else if (!wtr && json_output) + /* The "func()" above is not writing any json (i.e. !wtr + * test here). + * + * However, "-j" is enabled and there is no errs here, + * so call json_null() as the current convention of + * other cmds. + */ + jsonw_null(json_wtr); + +done: + free(info); + close(fd); + + return res; +} + +static struct res do_work_on_struct_ops(const char *search_type, + const char *search_term, + work_func func, void *data, + struct json_writer *wtr) +{ + if (search_type) { + if (is_prefix(search_type, "id")) + return do_one_id(search_term, func, data, wtr); + else if (!is_prefix(search_type, "name")) + usage(); + } + + return do_search(search_term, func, data, wtr); +} + +static int __do_show(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr) +{ + if (wtr) { + jsonw_start_object(wtr); + jsonw_uint_field(wtr, "id", info->id); + jsonw_string_field(wtr, "name", info->name); + jsonw_string_field(wtr, "kernel_struct_ops", + get_kern_struct_ops_name(info)); + jsonw_end_object(wtr); + } else { + printf("%u: %-15s %-32s\n", info->id, info->name, + get_kern_struct_ops_name(info)); + } + + return 0; +} + +static int do_show(int argc, char **argv) +{ + const char *search_type = NULL, *search_term = NULL; + struct res res; + + if (argc && argc != 2) + usage(); + + if (argc == 2) { + search_type = GET_ARG(); + search_term = GET_ARG(); + } + + res = do_work_on_struct_ops(search_type, search_term, __do_show, + NULL, json_wtr); + + return cmd_retval(&res, !!search_term); +} + +static int __do_dump(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr) +{ + struct btf_dumper *d = (struct btf_dumper *)data; + const struct btf_type *struct_ops_type; + const struct btf *kern_btf = d->btf; + const char *struct_ops_name; + int zero = 0; + void *value; + + /* note: d->jw == wtr */ + + kern_btf = d->btf; + + /* The kernel supporting BPF_MAP_TYPE_STRUCT_OPS must have + * btf_vmlinux_value_type_id. + */ + struct_ops_type = btf__type_by_id(kern_btf, + info->btf_vmlinux_value_type_id); + struct_ops_name = btf__name_by_offset(kern_btf, + struct_ops_type->name_off); + value = calloc(1, info->value_size); + if (!value) { + p_err("mem alloc failed"); + return -1; + } + + if (bpf_map_lookup_elem(fd, &zero, value)) { + p_err("can't lookup struct_ops map %s id %u", + info->name, info->id); + free(value); + return -1; + } + + jsonw_start_object(wtr); + jsonw_name(wtr, "bpf_map_info"); + btf_dumper_type(d, map_info_type_id, (void *)info); + jsonw_end_object(wtr); + + jsonw_start_object(wtr); + jsonw_name(wtr, struct_ops_name); + btf_dumper_type(d, info->btf_vmlinux_value_type_id, value); + jsonw_end_object(wtr); + + free(value); + + return 0; +} + +static int do_dump(int argc, char **argv) +{ + const char *search_type = NULL, *search_term = NULL; + json_writer_t *wtr = json_wtr; + const struct btf *kern_btf; + struct btf_dumper d = {}; + struct res res; + + if (argc && argc != 2) + usage(); + + if (argc == 2) { + search_type = GET_ARG(); + search_term = GET_ARG(); + } + + kern_btf = get_btf_vmlinux(); + if (IS_ERR(kern_btf)) + return -1; + + if (!json_output) { + wtr = jsonw_new(stdout); + if (!wtr) { + p_err("can't create json writer"); + return -1; + } + jsonw_pretty(wtr, true); + } + + d.btf = kern_btf; + d.jw = wtr; + d.is_plain_text = !json_output; + d.prog_id_as_func_ptr = true; + + res = do_work_on_struct_ops(search_type, search_term, __do_dump, &d, + wtr); + + if (!json_output) + jsonw_destroy(&wtr); + + return cmd_retval(&res, !!search_term); +} + +static int __do_unregister(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr) +{ + int zero = 0; + + if (bpf_map_delete_elem(fd, &zero)) { + p_err("can't unload %s %s id %u: %s", + get_kern_struct_ops_name(info), info->name, + info->id, strerror(errno)); + return -1; + } + + p_info("Unregistered %s %s id %u", + get_kern_struct_ops_name(info), info->name, + info->id); + + return 0; +} + +static int do_unregister(int argc, char **argv) +{ + const char *search_type, *search_term; + struct res res; + + if (argc != 2) + usage(); + + search_type = GET_ARG(); + search_term = GET_ARG(); + + res = do_work_on_struct_ops(search_type, search_term, + __do_unregister, NULL, NULL); + + return cmd_retval(&res, true); +} + +static int do_register(int argc, char **argv) +{ + const struct bpf_map_def *def; + struct bpf_map_info info = {}; + __u32 info_len = sizeof(info); + int nr_errs = 0, nr_maps = 0; + struct bpf_object *obj; + struct bpf_link *link; + struct bpf_map *map; + const char *file; + + if (argc != 1) + usage(); + + file = GET_ARG(); + + obj = bpf_object__open(file); + if (IS_ERR_OR_NULL(obj)) + return -1; + + set_max_rlimit(); + + if (bpf_object__load(obj)) { + bpf_object__close(obj); + return -1; + } + + bpf_object__for_each_map(map, obj) { + def = bpf_map__def(map); + if (def->type != BPF_MAP_TYPE_STRUCT_OPS) + continue; + + link = bpf_map__attach_struct_ops(map); + if (IS_ERR(link)) { + p_err("can't register struct_ops %s: %s", + bpf_map__name(map), + strerror(-PTR_ERR(link))); + nr_errs++; + continue; + } + nr_maps++; + + bpf_link__disconnect(link); + bpf_link__destroy(link); + + if (!bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, + &info_len)) + p_info("Registered %s %s id %u", + get_kern_struct_ops_name(&info), + bpf_map__name(map), + info.id); + else + /* Not p_err. The struct_ops was attached + * successfully. + */ + p_info("Registered %s but can't find id: %s", + bpf_map__name(map), strerror(errno)); + } + + bpf_object__close(obj); + + if (nr_errs) + return -1; + + if (!nr_maps) { + p_err("no struct_ops found in %s", file); + return -1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n" + " %s %s dump [STRUCT_OPS_MAP]\n" + " %s %s register OBJ\n" + " %s %s unregister STRUCT_OPS_MAP\n" + " %s %s help\n" + "\n" + " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n" + " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n", + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "register", do_register }, + { "unregister", do_unregister }, + { "dump", do_dump }, + { "help", do_help }, + { 0 } +}; + +int do_struct_ops(int argc, char **argv) +{ + int err; + + err = cmd_select(cmds, argc, argv, do_help); + + btf__free(btf_vmlinux); + return err; +} -- cgit v1.2.3 From 1440e7929392297f1578a54f372e9821bb1b0658 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 20 Mar 2020 13:15:10 -0700 Subject: selftests/bpf: Fix mix of tabs and spaces Clang's -Wmisleading-indentation warns about misleading indentations if there's a mixture of spaces and tabs. Remove extraneous spaces. Signed-off-by: Bill Wendling Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200320201510.217169-1-morbo@google.com --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 7390d3061065..cb33a7ee4e04 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -125,6 +125,6 @@ void test_btf_dump() { if (!test__start_subtest(t->name)) continue; - test_btf_dump_case(i, &btf_dump_test_cases[i]); + test_btf_dump_case(i, &btf_dump_test_cases[i]); } } -- cgit v1.2.3 From afc63da64f1e5e41875c98707020e85050f8a0c5 Mon Sep 17 00:00:00 2001 From: Heidi Fahim Date: Mon, 16 Mar 2020 13:21:24 -0700 Subject: kunit: kunit_parser: make parser more robust Previously, kunit_parser did not properly handle kunit TAP output that - had any prefixes (generated from different configs e.g. CONFIG_PRINTK_TIME) - had unrelated kernel output mixed in the middle of it, which has shown up when testing with allyesconfig To remove prefixes, the parser looks for the first line that includes TAP output, "TAP version 14". It then determines the length of the string before this sequence, and strips that number of characters off the beginning of the following lines until the last KUnit output line is reached. These fixes have been tested with additional tests in the KUnitParseTest and their associated logs have also been added. Signed-off-by: Heidi Fahim Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/.gitattributes | 1 + tools/testing/kunit/kunit_parser.py | 40 ++++++------- tools/testing/kunit/kunit_tool_test.py | 69 ++++++++++++++++++++++ .../kunit/test_data/test_config_printk_time.log | 31 ++++++++++ .../test_data/test_interrupted_tap_output.log | 37 ++++++++++++ .../test_data/test_kernel_panic_interrupt.log | 25 ++++++++ .../kunit/test_data/test_multiple_prefixes.log | 31 ++++++++++ .../test_output_with_prefix_isolated_correctly.log | 33 +++++++++++ .../kunit/test_data/test_pound_no_prefix.log | 33 +++++++++++ tools/testing/kunit/test_data/test_pound_sign.log | 0 10 files changed, 280 insertions(+), 20 deletions(-) create mode 100644 tools/testing/kunit/.gitattributes create mode 100644 tools/testing/kunit/test_data/test_config_printk_time.log create mode 100644 tools/testing/kunit/test_data/test_interrupted_tap_output.log create mode 100644 tools/testing/kunit/test_data/test_kernel_panic_interrupt.log create mode 100644 tools/testing/kunit/test_data/test_multiple_prefixes.log create mode 100644 tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log create mode 100644 tools/testing/kunit/test_data/test_pound_no_prefix.log create mode 100644 tools/testing/kunit/test_data/test_pound_sign.log (limited to 'tools') diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes new file mode 100644 index 000000000000..5b7da1fc3b8f --- /dev/null +++ b/tools/testing/kunit/.gitattributes @@ -0,0 +1 @@ +test_data/* binary diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 4ffbae0f6732..adf86747b07f 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -46,19 +46,21 @@ class TestStatus(Enum): TEST_CRASHED = auto() NO_TESTS = auto() -kunit_start_re = re.compile(r'^TAP version [0-9]+$') -kunit_end_re = re.compile('List of all partitions:') +kunit_start_re = re.compile(r'TAP version [0-9]+$') +kunit_end_re = re.compile('(List of all partitions:|' + 'Kernel panic - not syncing: VFS:|reboot: System halted)') def isolate_kunit_output(kernel_output): started = False for line in kernel_output: - if kunit_start_re.match(line): + if kunit_start_re.search(line): + prefix_len = len(line.split('TAP version')[0]) started = True - yield line - elif kunit_end_re.match(line): + yield line[prefix_len:] if prefix_len > 0 else line + elif kunit_end_re.search(line): break elif started: - yield line + yield line[prefix_len:] if prefix_len > 0 else line def raw_output(kernel_output): for line in kernel_output: @@ -108,18 +110,16 @@ OK_NOT_OK_SUBTEST = re.compile(r'^\t(ok|not ok) [0-9]+ - (.*)$') OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$') -def parse_ok_not_ok_test_case(lines: List[str], - test_case: TestCase, - expecting_test_case: bool) -> bool: +def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: save_non_diagnositic(lines, test_case) if not lines: - if expecting_test_case: - test_case.status = TestStatus.TEST_CRASHED - return True - else: - return False + test_case.status = TestStatus.TEST_CRASHED + return True line = lines[0] match = OK_NOT_OK_SUBTEST.match(line) + while not match and lines: + line = lines.pop(0) + match = OK_NOT_OK_SUBTEST.match(line) if match: test_case.log.append(lines.pop(0)) test_case.name = match.group(2) @@ -150,12 +150,12 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: else: return False -def parse_test_case(lines: List[str], expecting_test_case: bool) -> TestCase: +def parse_test_case(lines: List[str]) -> TestCase: test_case = TestCase() save_non_diagnositic(lines, test_case) while parse_diagnostic(lines, test_case): pass - if parse_ok_not_ok_test_case(lines, test_case, expecting_test_case): + if parse_ok_not_ok_test_case(lines, test_case): return test_case else: return None @@ -234,11 +234,11 @@ def parse_test_suite(lines: List[str]) -> TestSuite: expected_test_case_num = parse_subtest_plan(lines) if not expected_test_case_num: return None - test_case = parse_test_case(lines, expected_test_case_num > 0) - expected_test_case_num -= 1 - while test_case: + while expected_test_case_num > 0: + test_case = parse_test_case(lines) + if not test_case: + break test_suite.cases.append(test_case) - test_case = parse_test_case(lines, expected_test_case_num > 0) expected_test_case_num -= 1 if parse_ok_not_ok_test_suite(lines, test_suite): test_suite.status = bubble_up_test_case_errors(test_suite) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index cba97756ac4a..0efae697f396 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -108,6 +108,36 @@ class KUnitParserTest(unittest.TestCase): self.assertContains('ok 1 - example', result) file.close() + def test_output_with_prefix_isolated_correctly(self): + log_path = get_absolute_path( + 'test_data/test_pound_sign.log') + with open(log_path) as file: + result = kunit_parser.isolate_kunit_output(file.readlines()) + self.assertContains('TAP version 14\n', result) + self.assertContains(' # Subtest: kunit-resource-test', result) + self.assertContains(' 1..5', result) + self.assertContains(' ok 1 - kunit_resource_test_init_resources', result) + self.assertContains(' ok 2 - kunit_resource_test_alloc_resource', result) + self.assertContains(' ok 3 - kunit_resource_test_destroy_resource', result) + self.assertContains(' foo bar #', result) + self.assertContains(' ok 4 - kunit_resource_test_cleanup_resources', result) + self.assertContains(' ok 5 - kunit_resource_test_proper_free_ordering', result) + self.assertContains('ok 1 - kunit-resource-test', result) + self.assertContains(' foo bar # non-kunit output', result) + self.assertContains(' # Subtest: kunit-try-catch-test', result) + self.assertContains(' 1..2', result) + self.assertContains(' ok 1 - kunit_test_try_catch_successful_try_no_catch', + result) + self.assertContains(' ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch', + result) + self.assertContains('ok 2 - kunit-try-catch-test', result) + self.assertContains(' # Subtest: string-stream-test', result) + self.assertContains(' 1..3', result) + self.assertContains(' ok 1 - string_stream_test_empty_on_creation', result) + self.assertContains(' ok 2 - string_stream_test_not_empty_after_add', result) + self.assertContains(' ok 3 - string_stream_test_get_string', result) + self.assertContains('ok 3 - string-stream-test', result) + def test_parse_successful_test_log(self): all_passed_log = get_absolute_path( 'test_data/test_is_test_passed-all_passed.log') @@ -150,6 +180,45 @@ class KUnitParserTest(unittest.TestCase): result.status) file.close() + def test_ignores_prefix_printk_time(self): + prefix_log = get_absolute_path( + 'test_data/test_config_printk_time.log') + with open(prefix_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual('kunit-resource-test', result.suites[0].name) + + def test_ignores_multiple_prefixes(self): + prefix_log = get_absolute_path( + 'test_data/test_multiple_prefixes.log') + with open(prefix_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual('kunit-resource-test', result.suites[0].name) + + def test_prefix_mixed_kernel_output(self): + mixed_prefix_log = get_absolute_path( + 'test_data/test_interrupted_tap_output.log') + with open(mixed_prefix_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual('kunit-resource-test', result.suites[0].name) + + def test_prefix_poundsign(self): + pound_log = get_absolute_path('test_data/test_pound_sign.log') + with open(pound_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual('kunit-resource-test', result.suites[0].name) + + def test_kernel_panic_end(self): + panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') + with open(panic_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual('kunit-resource-test', result.suites[0].name) + + def test_pound_no_prefix(self): + pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') + with open(pound_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual('kunit-resource-test', result.suites[0].name) + class StrContains(str): def __eq__(self, other): return self in other diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log new file mode 100644 index 000000000000..c02ca773946d --- /dev/null +++ b/tools/testing/kunit/test_data/test_config_printk_time.log @@ -0,0 +1,31 @@ +[ 0.060000] printk: console [mc-1] enabled +[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 +[ 0.060000] TAP version 14 +[ 0.060000] # Subtest: kunit-resource-test +[ 0.060000] 1..5 +[ 0.060000] ok 1 - kunit_resource_test_init_resources +[ 0.060000] ok 2 - kunit_resource_test_alloc_resource +[ 0.060000] ok 3 - kunit_resource_test_destroy_resource +[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources +[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering +[ 0.060000] ok 1 - kunit-resource-test +[ 0.060000] # Subtest: kunit-try-catch-test +[ 0.060000] 1..2 +[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch +[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch +[ 0.060000] ok 2 - kunit-try-catch-test +[ 0.060000] # Subtest: string-stream-test +[ 0.060000] 1..3 +[ 0.060000] ok 1 - string_stream_test_empty_on_creation +[ 0.060000] ok 2 - string_stream_test_not_empty_after_add +[ 0.060000] ok 3 - string_stream_test_get_string +[ 0.060000] ok 3 - string-stream-test +[ 0.060000] List of all partitions: +[ 0.060000] No filesystem could mount root, tried: +[ 0.060000] +[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) +[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 +[ 0.060000] Stack: +[ 0.060000] 602086f8 601bc260 705c0000 705c0000 +[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log new file mode 100644 index 000000000000..5c73fb3a1c6f --- /dev/null +++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log @@ -0,0 +1,37 @@ +[ 0.060000] printk: console [mc-1] enabled +[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 +[ 0.060000] TAP version 14 +[ 0.060000] # Subtest: kunit-resource-test +[ 0.060000] 1..5 +[ 0.060000] ok 1 - kunit_resource_test_init_resources +[ 0.060000] ok 2 - kunit_resource_test_alloc_resource +[ 0.060000] ok 3 - kunit_resource_test_destroy_resource +[ 0.060000] kAFS: Red Hat AFS client v0.1 registering. +[ 0.060000] FS-Cache: Netfs 'afs' registered for caching +[ 0.060000] *** VALIDATE kAFS *** +[ 0.060000] Btrfs loaded, crc32c=crc32c-generic, debug=on, assert=on, integrity-checker=on, ref-verify=on +[ 0.060000] BTRFS: selftest: sectorsize: 4096 nodesize: 4096 +[ 0.060000] BTRFS: selftest: running btrfs free space cache tests +[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources +[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering +[ 0.060000] ok 1 - kunit-resource-test +[ 0.060000] # Subtest: kunit-try-catch-test +[ 0.060000] 1..2 +[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch +[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch +[ 0.060000] ok 2 - kunit-try-catch-test +[ 0.060000] # Subtest: string-stream-test +[ 0.060000] 1..3 +[ 0.060000] ok 1 - string_stream_test_empty_on_creation +[ 0.060000] ok 2 - string_stream_test_not_empty_after_add +[ 0.060000] ok 3 - string_stream_test_get_string +[ 0.060000] ok 3 - string-stream-test +[ 0.060000] List of all partitions: +[ 0.060000] No filesystem could mount root, tried: +[ 0.060000] +[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) +[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 +[ 0.060000] Stack: +[ 0.060000] 602086f8 601bc260 705c0000 705c0000 +[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log new file mode 100644 index 000000000000..c045eee75f27 --- /dev/null +++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log @@ -0,0 +1,25 @@ +[ 0.060000] printk: console [mc-1] enabled +[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 +[ 0.060000] TAP version 14 +[ 0.060000] # Subtest: kunit-resource-test +[ 0.060000] 1..5 +[ 0.060000] ok 1 - kunit_resource_test_init_resources +[ 0.060000] ok 2 - kunit_resource_test_alloc_resource +[ 0.060000] ok 3 - kunit_resource_test_destroy_resource +[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources +[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering +[ 0.060000] ok 1 - kunit-resource-test +[ 0.060000] # Subtest: kunit-try-catch-test +[ 0.060000] 1..2 +[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch +[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch +[ 0.060000] ok 2 - kunit-try-catch-test +[ 0.060000] # Subtest: string-stream-test +[ 0.060000] 1..3 +[ 0.060000] ok 1 - string_stream_test_empty_on_creation +[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) +[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 +[ 0.060000] Stack: +[ 0.060000] 602086f8 601bc260 705c0000 705c0000 +[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log new file mode 100644 index 000000000000..bc48407dcc36 --- /dev/null +++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log @@ -0,0 +1,31 @@ +[ 0.060000][ T1] printk: console [mc-1] enabled +[ 0.060000][ T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 +[ 0.060000][ T1] TAP version 14 +[ 0.060000][ T1] # Subtest: kunit-resource-test +[ 0.060000][ T1] 1..5 +[ 0.060000][ T1] ok 1 - kunit_resource_test_init_resources +[ 0.060000][ T1] ok 2 - kunit_resource_test_alloc_resource +[ 0.060000][ T1] ok 3 - kunit_resource_test_destroy_resource +[ 0.060000][ T1] ok 4 - kunit_resource_test_cleanup_resources +[ 0.060000][ T1] ok 5 - kunit_resource_test_proper_free_ordering +[ 0.060000][ T1] ok 1 - kunit-resource-test +[ 0.060000][ T1] # Subtest: kunit-try-catch-test +[ 0.060000][ T1] 1..2 +[ 0.060000][ T1] ok 1 - kunit_test_try_catch_successful_try_no_catch +[ 0.060000][ T1] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch +[ 0.060000][ T1] ok 2 - kunit-try-catch-test +[ 0.060000][ T1] # Subtest: string-stream-test +[ 0.060000][ T1] 1..3 +[ 0.060000][ T1] ok 1 - string_stream_test_empty_on_creation +[ 0.060000][ T1] ok 2 - string_stream_test_not_empty_after_add +[ 0.060000][ T1] ok 3 - string_stream_test_get_string +[ 0.060000][ T1] ok 3 - string-stream-test +[ 0.060000][ T1] List of all partitions: +[ 0.060000][ T1] No filesystem could mount root, tried: +[ 0.060000][ T1] +[ 0.060000][ T1] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) +[ 0.060000][ T1] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 +[ 0.060000][ T1] Stack: +[ 0.060000][ T1] 602086f8 601bc260 705c0000 705c0000 +[ 0.060000][ T1] 602086f8 6005fcec 705c0000 6002c6ab +[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file diff --git a/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log b/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log new file mode 100644 index 000000000000..0f87cdabebb0 --- /dev/null +++ b/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log @@ -0,0 +1,33 @@ +[ 0.060000] printk: console [mc-1] enabled +[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 +[ 0.060000] TAP version 14 +[ 0.060000] # Subtest: kunit-resource-test +[ 0.060000] 1..5 +[ 0.060000] ok 1 - kunit_resource_test_init_resources +[ 0.060000] ok 2 - kunit_resource_test_alloc_resource +[ 0.060000] ok 3 - kunit_resource_test_destroy_resource +[ 0.060000] foo bar # +[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources +[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering +[ 0.060000] ok 1 - kunit-resource-test +[ 0.060000] foo bar # non-kunit output +[ 0.060000] # Subtest: kunit-try-catch-test +[ 0.060000] 1..2 +[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch +[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch +[ 0.060000] ok 2 - kunit-try-catch-test +[ 0.060000] # Subtest: string-stream-test +[ 0.060000] 1..3 +[ 0.060000] ok 1 - string_stream_test_empty_on_creation +[ 0.060000] ok 2 - string_stream_test_not_empty_after_add +[ 0.060000] ok 3 - string_stream_test_get_string +[ 0.060000] ok 3 - string-stream-test +[ 0.060000] List of all partitions: +[ 0.060000] No filesystem could mount root, tried: +[ 0.060000] +[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) +[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 +[ 0.060000] Stack: +[ 0.060000] 602086f8 601bc260 705c0000 705c0000 +[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log new file mode 100644 index 000000000000..2ceb360be7d5 --- /dev/null +++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log @@ -0,0 +1,33 @@ + printk: console [mc-1] enabled + random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 + TAP version 14 + # Subtest: kunit-resource-test + 1..5 + ok 1 - kunit_resource_test_init_resources + ok 2 - kunit_resource_test_alloc_resource + ok 3 - kunit_resource_test_destroy_resource + foo bar # + ok 4 - kunit_resource_test_cleanup_resources + ok 5 - kunit_resource_test_proper_free_ordering + ok 1 - kunit-resource-test + foo bar # non-kunit output + # Subtest: kunit-try-catch-test + 1..2 + ok 1 - kunit_test_try_catch_successful_try_no_catch + ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch + ok 2 - kunit-try-catch-test + # Subtest: string-stream-test + 1..3 + ok 1 - string_stream_test_empty_on_creation + ok 2 - string_stream_test_not_empty_after_add + ok 3 - string_stream_test_get_string + ok 3 - string-stream-test + List of all partitions: + No filesystem could mount root, tried: + + Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) + CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 + Stack: + 602086f8 601bc260 705c0000 705c0000 + 602086f8 6005fcec 705c0000 6002c6ab + 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log new file mode 100644 index 000000000000..e69de29bb2d1 -- cgit v1.2.3 From 021ed9f551da33449a5238e45e849913422671d7 Mon Sep 17 00:00:00 2001 From: Heidi Fahim Date: Mon, 16 Mar 2020 13:21:25 -0700 Subject: kunit: Run all KUnit tests through allyesconfig Implemented the functionality to run all KUnit tests through kunit_tool by specifying an --alltests flag, which builds UML with allyesconfig enabled, and consequently runs every KUnit test. A new function was added to kunit_kernel: make_allyesconfig. Firstly, if --alltests is specified, kunit.py triggers build_um_kernel which call make_allyesconfig. This function calls the make command, disables the broken configs that would otherwise prevent UML from building, then starts the kernel with all possible configurations enabled. All stdout and stderr is sent to test.log and read from there then fed through kunit_parser to parse the tests to the user. Also added a signal_handler in case kunit is interrupted while running. Tested: Run under different conditions such as testing with --raw_output, testing program interrupt then immediately running kunit again without --alltests and making sure to clean the console. Signed-off-by: Heidi Fahim Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/configs/broken_on_uml.config | 41 ++++++++++++++++ tools/testing/kunit/kunit.py | 30 +++++++----- tools/testing/kunit/kunit_kernel.py | 62 +++++++++++++++++------- tools/testing/kunit/kunit_parser.py | 1 + tools/testing/kunit/kunit_tool_test.py | 17 ++++--- 5 files changed, 115 insertions(+), 36 deletions(-) create mode 100644 tools/testing/kunit/configs/broken_on_uml.config (limited to 'tools') diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config new file mode 100644 index 000000000000..239b9f03da2c --- /dev/null +++ b/tools/testing/kunit/configs/broken_on_uml.config @@ -0,0 +1,41 @@ +# These are currently broken on UML and prevent allyesconfig from building +# CONFIG_STATIC_LINK is not set +# CONFIG_UML_NET_VECTOR is not set +# CONFIG_UML_NET_VDE is not set +# CONFIG_UML_NET_PCAP is not set +# CONFIG_NET_PTP_CLASSIFY is not set +# CONFIG_IP_VS is not set +# CONFIG_BRIDGE_EBT_BROUTE is not set +# CONFIG_BRIDGE_EBT_T_FILTER is not set +# CONFIG_BRIDGE_EBT_T_NAT is not set +# CONFIG_MTD_NAND_CADENCE is not set +# CONFIG_MTD_NAND_NANDSIM is not set +# CONFIG_BLK_DEV_NULL_BLK is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_NET_VENDOR_XILINX is not set +# CONFIG_NULL_TTY is not set +# CONFIG_PTP_1588_CLOCK is not set +# CONFIG_PINCTRL_EQUILIBRIUM is not set +# CONFIG_DMABUF_SELFTESTS is not set +# CONFIG_COMEDI is not set +# CONFIG_XIL_AXIS_FIFO is not set +# CONFIG_EXFAT_FS is not set +# CONFIG_STM_DUMMY is not set +# CONFIG_FSI_MASTER_ASPEED is not set +# CONFIG_JFS_FS is not set +# CONFIG_UBIFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_CRYPTO_DEV_SAFEXCEL is not set +# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set +# CONFIG_KCOV is not set +# CONFIG_LKDTM is not set +# CONFIG_REED_SOLOMON_TEST is not set +# CONFIG_TEST_RHASHTABLE is not set +# CONFIG_TEST_MEMINIT is not set +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +# CONFIG_DEBUG_INFO_BTF is not set +# CONFIG_PTP_1588_CLOCK_INES is not set +# CONFIG_QCOM_CPR is not set +# CONFIG_RESET_BRCMSTB_RESCAL is not set +# CONFIG_RESET_INTEL_GW is not set diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 180ad1e1b04f..650bb4cfc544 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -22,7 +22,9 @@ import kunit_parser KunitResult = namedtuple('KunitResult', ['status','result']) -KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig']) +KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', + 'build_dir', 'defconfig', + 'alltests']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -55,24 +57,23 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, kunit_parser.print_with_timestamp('Building KUnit Kernel ...') build_start = time.time() - success = linux.build_um_kernel(request.jobs, request.build_dir) + success = linux.build_um_kernel(request.alltests, + request.jobs, + request.build_dir) build_end = time.time() if not success: return KunitResult(KunitStatus.BUILD_FAILURE, 'could not build kernel') kunit_parser.print_with_timestamp('Starting KUnit Kernel ...') test_start = time.time() - - test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, - [], - 'Tests not Parsed.') + kunit_output = linux.run_kernel( + timeout=None if request.alltests else request.timeout, + build_dir=request.build_dir) if request.raw_output: - kunit_parser.raw_output( - linux.run_kernel(timeout=request.timeout, - build_dir=request.build_dir)) + raw_output = kunit_parser.raw_output(kunit_output) + isolated = list(kunit_parser.isolate_kunit_output(raw_output)) + test_result = kunit_parser.parse_test_result(isolated) else: - kunit_output = linux.run_kernel(timeout=request.timeout, - build_dir=request.build_dir) test_result = kunit_parser.parse_run_tests(kunit_output) test_end = time.time() @@ -120,6 +121,10 @@ def main(argv, linux=None): help='Uses a default .kunitconfig.', action='store_true') + run_parser.add_argument('--alltests', + help='Run all KUnit tests through allyesconfig', + action='store_true') + cli_args = parser.parse_args(argv) if cli_args.subcommand == 'run': @@ -143,7 +148,8 @@ def main(argv, linux=None): cli_args.timeout, cli_args.jobs, cli_args.build_dir, - cli_args.defconfig) + cli_args.defconfig, + cli_args.alltests) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index d99ae75ef72f..415d8f3c4fe4 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -10,11 +10,16 @@ import logging import subprocess import os +import signal + +from contextlib import ExitStack import kunit_config +import kunit_parser KCONFIG_PATH = '.config' kunitconfig_path = '.kunitconfig' +BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' class ConfigError(Exception): """Represents an error trying to configure the Linux kernel.""" @@ -40,12 +45,29 @@ class LinuxSourceTreeOperations(object): if build_dir: command += ['O=' + build_dir] try: - subprocess.check_output(command) + subprocess.check_output(command, stderr=subprocess.PIPE) except OSError as e: raise ConfigError('Could not call make command: ' + e) except subprocess.CalledProcessError as e: raise ConfigError(e.output) + def make_allyesconfig(self): + kunit_parser.print_with_timestamp( + 'Enabling all CONFIGs for UML...') + process = subprocess.Popen( + ['make', 'ARCH=um', 'allyesconfig'], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT) + process.wait() + kunit_parser.print_with_timestamp( + 'Disabling broken configs to run KUnit tests...') + with ExitStack() as es: + config = open(KCONFIG_PATH, 'a') + disable = open(BROKEN_ALLCONFIG_PATH, 'r').read() + config.write(disable) + kunit_parser.print_with_timestamp( + 'Starting Kernel with all configs takes a few minutes...') + def make(self, jobs, build_dir): command = ['make', 'ARCH=um', '--jobs=' + str(jobs)] if build_dir: @@ -57,18 +79,16 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise BuildError(e.output) - def linux_bin(self, params, timeout, build_dir): + def linux_bin(self, params, timeout, build_dir, outfile): """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = './linux' if build_dir: linux_bin = os.path.join(build_dir, 'linux') - process = subprocess.Popen( - [linux_bin] + params, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - process.wait(timeout=timeout) - return process + with open(outfile, 'w') as output: + process = subprocess.Popen([linux_bin] + params, + stdout=output, + stderr=subprocess.STDOUT) + process.wait(timeout) def get_kconfig_path(build_dir): @@ -84,6 +104,7 @@ class LinuxSourceTree(object): self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) self._ops = LinuxSourceTreeOperations() + signal.signal(signal.SIGINT, self.signal_handler) def clean(self): try: @@ -135,7 +156,9 @@ class LinuxSourceTree(object): print('Generating .config ...') return self.build_config(build_dir) - def build_um_kernel(self, jobs, build_dir): + def build_um_kernel(self, alltests, jobs, build_dir): + if alltests: + self._ops.make_allyesconfig() try: self._ops.make_olddefconfig(build_dir) self._ops.make(jobs, build_dir) @@ -144,10 +167,15 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], timeout=None, build_dir=''): - args.extend(['mem=256M']) - process = self._ops.linux_bin(args, timeout, build_dir) - with open(os.path.join(build_dir, 'test.log'), 'w') as f: - for line in process.stdout: - f.write(line.rstrip().decode('ascii') + '\n') - yield line.rstrip().decode('ascii') + def run_kernel(self, args=[], build_dir='', timeout=None): + args.extend(['mem=1G']) + outfile = 'test.log' + self._ops.linux_bin(args, timeout, build_dir, outfile) + subprocess.call(['stty', 'sane']) + with open(outfile, 'r') as file: + for line in file: + yield line + + def signal_handler(self, sig, frame): + logging.error('Build interruption occurred. Cleaning console.') + subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index adf86747b07f..02815062d5a6 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -65,6 +65,7 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: print(line) + yield line DIVIDER = '=' * 60 diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 0efae697f396..ce47e87b633a 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -243,7 +243,8 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run'], self.linux_source_mock) assert self.linux_source_mock.build_reconfig.call_count == 1 assert self.linux_source_mock.run_kernel.call_count == 1 - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=300) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_fail(self): @@ -258,25 +259,27 @@ class KUnitMainTest(unittest.TestCase): def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) - kunit.main(['run', '--raw_output'], self.linux_source_mock) + with self.assertRaises(SystemExit) as e: + kunit.main(['run', '--raw_output'], self.linux_source_mock) + assert type(e.exception) == SystemExit + assert e.exception.code == 1 assert self.linux_source_mock.build_reconfig.call_count == 1 assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) def test_run_timeout(self): timeout = 3453 kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock) assert self.linux_source_mock.build_reconfig.call_count == 1 - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=timeout) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_builddir(self): build_dir = '.kunit' kunit.main(['run', '--build_dir', build_dir], self.linux_source_mock) assert self.linux_source_mock.build_reconfig.call_count == 1 - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir=build_dir, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) if __name__ == '__main__': -- cgit v1.2.3 From abdcbdb265264f736df316622a695ad30019c05f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Mar 2020 00:47:47 -0400 Subject: tools/power turbostat: Print cpuidle information Print cpuidle driver and governor. Originally-by: Antti Laakso Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 77f89371ec5f..05dbe23570d4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3503,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model) dump_nhm_cst_cfg(); } +static void dump_sysfs_file(char *path) +{ + FILE *input; + char cpuidle_buf[64]; + + input = fopen(path, "r"); + if (input == NULL) { + if (debug) + fprintf(outf, "NSFOD %s\n", path); + return; + } + if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) + err(1, "%s: failed to read file", path); + fclose(input); + + fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); +} static void dump_sysfs_cstate_config(void) { @@ -3516,6 +3533,15 @@ dump_sysfs_cstate_config(void) if (!DO_BIC(BIC_sysfs)) return; + if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { + fprintf(outf, "cpuidle not loaded\n"); + return; + } + + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); + for (state = 0; state < 10; ++state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", -- cgit v1.2.3 From b95fffb9b4afa8b9aa4a389ec7a0c578811eaf42 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 17 Nov 2019 21:18:31 -0500 Subject: tools/power turbostat: update version A stitch in time saves nine. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 05dbe23570d4..33b370865d16 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5390,7 +5390,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 19.08.31" + fprintf(outf, "turbostat version 20.03.20" " - Len Brown \n"); } -- cgit v1.2.3 From 2502ec37a7b228b34c1e2e89480f98b92f53046a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 12:56:40 +0100 Subject: lockdep: Rename trace_hardirq_{enter,exit}() Continue what commit: d820ac4c2fa8 ("locking: rename trace_softirq_[enter|exit] => lockdep_softirq_[enter|exit]") started, rename these to avoid confusing them with tracepoints. Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20200320115859.060481361@infradead.org --- include/linux/hardirq.h | 8 ++++---- include/linux/irqflags.h | 8 ++++---- kernel/softirq.c | 7 ++++--- tools/include/linux/irqflags.h | 4 ++-- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index da0af631ded5..7c8b82f69288 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -37,7 +37,7 @@ extern void rcu_nmi_exit(void); do { \ account_irq_enter_time(current); \ preempt_count_add(HARDIRQ_OFFSET); \ - trace_hardirq_enter(); \ + lockdep_hardirq_enter(); \ } while (0) /* @@ -50,7 +50,7 @@ extern void irq_enter(void); */ #define __irq_exit() \ do { \ - trace_hardirq_exit(); \ + lockdep_hardirq_exit(); \ account_irq_exit_time(current); \ preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) @@ -74,12 +74,12 @@ extern void irq_exit(void); BUG_ON(in_nmi()); \ preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ - trace_hardirq_enter(); \ + lockdep_hardirq_enter(); \ } while (0) #define nmi_exit() \ do { \ - trace_hardirq_exit(); \ + lockdep_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 21619c92c377..7c4e64589250 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -35,11 +35,11 @@ # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) -# define trace_hardirq_enter() \ +# define lockdep_hardirq_enter() \ do { \ current->hardirq_context++; \ } while (0) -# define trace_hardirq_exit() \ +# define lockdep_hardirq_exit() \ do { \ current->hardirq_context--; \ } while (0) @@ -58,8 +58,8 @@ do { \ # define trace_softirq_context(p) 0 # define trace_hardirqs_enabled(p) 0 # define trace_softirqs_enabled(p) 0 -# define trace_hardirq_enter() do { } while (0) -# define trace_hardirq_exit() do { } while (0) +# define lockdep_hardirq_enter() do { } while (0) +# define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index 0427a86743a4..b3286892abac 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -226,7 +226,7 @@ static inline bool lockdep_softirq_start(void) if (trace_hardirq_context(current)) { in_hardirq = true; - trace_hardirq_exit(); + lockdep_hardirq_exit(); } lockdep_softirq_enter(); @@ -239,7 +239,7 @@ static inline void lockdep_softirq_end(bool in_hardirq) lockdep_softirq_exit(); if (in_hardirq) - trace_hardirq_enter(); + lockdep_hardirq_enter(); } #else static inline bool lockdep_softirq_start(void) { return false; } @@ -414,7 +414,8 @@ void irq_exit(void) tick_irq_exit(); rcu_irq_exit(); - trace_hardirq_exit(); /* must be last! */ + /* must be last! */ + lockdep_hardirq_exit(); } /* diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h index e734da3e5b33..ced6f64a7367 100644 --- a/tools/include/linux/irqflags.h +++ b/tools/include/linux/irqflags.h @@ -6,8 +6,8 @@ # define trace_softirq_context(p) 0 # define trace_hardirqs_enabled(p) 0 # define trace_softirqs_enabled(p) 0 -# define trace_hardirq_enter() do { } while (0) -# define trace_hardirq_exit() do { } while (0) +# define lockdep_hardirq_enter() do { } while (0) +# define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define INIT_TRACE_IRQFLAGS -- cgit v1.2.3 From ef996916e78e03d25e56c2d372e5e21fdb471882 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Mar 2020 12:56:42 +0100 Subject: lockdep: Rename trace_{hard,soft}{irq_context,irqs_enabled}() Continue what commit: d820ac4c2fa8 ("locking: rename trace_softirq_[enter|exit] => lockdep_softirq_[enter|exit]") started, rename these to avoid confusing them with tracepoints. git grep -l "trace_\(soft\|hard\)\(irq_context\|irqs_enabled\)" | while read file; do sed -ie 's/trace_\(soft\|hard\)\(irq_context\|irqs_enabled\)/lockdep_\1\2/g' $file; done Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20200320115859.178626842@infradead.org --- include/linux/irqflags.h | 16 ++++++++-------- kernel/locking/lockdep.c | 8 ++++---- kernel/softirq.c | 2 +- tools/include/linux/irqflags.h | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 7ca1f2126ae1..f4c3907e241a 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -31,10 +31,10 @@ #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define trace_hardirq_context(p) ((p)->hardirq_context) -# define trace_softirq_context(p) ((p)->softirq_context) -# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) -# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) +# define lockdep_hardirq_context(p) ((p)->hardirq_context) +# define lockdep_softirq_context(p) ((p)->softirq_context) +# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ current->hardirq_context++; \ @@ -54,10 +54,10 @@ do { \ #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define trace_hardirq_context(p) 0 -# define trace_softirq_context(p) 0 -# define trace_hardirqs_enabled(p) 0 -# define trace_softirqs_enabled(p) 0 +# define lockdep_hardirq_context(p) 0 +# define lockdep_softirq_context(p) 0 +# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 26ef41296fb3..4075e3ec460b 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3081,10 +3081,10 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", curr->comm, task_pid_nr(curr), - trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, - trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - trace_hardirqs_enabled(curr), - trace_softirqs_enabled(curr)); + lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, + lockdep_hardirqs_enabled(curr), + lockdep_softirqs_enabled(curr)); print_lock(this); pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]); diff --git a/kernel/softirq.c b/kernel/softirq.c index 0112ca01694b..a47c6dd57452 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -224,7 +224,7 @@ static inline bool lockdep_softirq_start(void) { bool in_hardirq = false; - if (trace_hardirq_context(current)) { + if (lockdep_hardirq_context(current)) { in_hardirq = true; lockdep_hardirq_exit(); } diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h index ced6f64a7367..67e01bbadbfe 100644 --- a/tools/include/linux/irqflags.h +++ b/tools/include/linux/irqflags.h @@ -2,10 +2,10 @@ #ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ #define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -# define trace_hardirq_context(p) 0 -# define trace_softirq_context(p) 0 -# define trace_hardirqs_enabled(p) 0 -# define trace_softirqs_enabled(p) 0 +# define lockdep_hardirq_context(p) 0 +# define lockdep_softirq_context(p) 0 +# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) -- cgit v1.2.3 From 19882ecb55c5b6ca1e0408c8724d55a895e5d0bc Mon Sep 17 00:00:00 2001 From: Jian Yang Date: Tue, 17 Mar 2020 12:25:05 -0700 Subject: selftests: txtimestamp: allow individual txtimestamp tests. The wrapper script txtimestamp.sh executes a pre-defined list of testcases sequentially without configuration options available. Add an option (-r/--run) to setup the test namespace and pass remaining arguments to txtimestamp binary. The script still runs all tests when no argument is passed. Signed-off-by: Jian Yang Acked-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.sh | 31 +++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/networking/timestamping/txtimestamp.sh index df0d86ca72b7..70a8cda7fd53 100755 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.sh +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.sh @@ -43,15 +43,40 @@ run_test_tcpudpraw() { } run_test_all() { + setup run_test_tcpudpraw # setsockopt run_test_tcpudpraw -C # cmsg run_test_tcpudpraw -n # timestamp w/o data + echo "OK. All tests passed" +} + +run_test_one() { + setup + ./txtimestamp $@ +} + +usage() { + echo "Usage: $0 [ -r | --run ] | [ -h | --help ]" + echo " (no args) Run all tests" + echo " -r|--run Run an individual test with arguments" + echo " -h|--help Help" +} + +main() { + if [[ $# -eq 0 ]]; then + run_test_all + else + if [[ "$1" = "-r" || "$1" == "--run" ]]; then + shift + run_test_one $@ + else + usage + fi + fi } if [[ "$(ip netns identify)" == "root" ]]; then ../../net/in_netns.sh $0 $@ else - setup - run_test_all - echo "OK. All tests passed" + main $@ fi -- cgit v1.2.3 From 70a7ee96da3354a879041b20e3214d08f2a9d465 Mon Sep 17 00:00:00 2001 From: Jian Yang Date: Tue, 17 Mar 2020 12:25:06 -0700 Subject: selftests: txtimestamp: allow printing latencies in nsec. Txtimestamp reports latencies in uses resolution, while nsec is needed in cases such as measuring latencies on localhost. Add the following new flag: -N: print timestamps and durations in nsec (instead of usec) Signed-off-by: Jian Yang Acked-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.c | 56 +++++++++++++++++----- 1 file changed, 44 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 7e386be47120..a9b8c41a3009 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -49,6 +49,10 @@ #include #include +#define NSEC_PER_USEC 1000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000LL + /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -67,6 +71,7 @@ static bool cfg_use_cmsg; static bool cfg_use_pf_packet; static bool cfg_do_listen; static uint16_t dest_port = 9000; +static bool cfg_print_nsec; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -77,9 +82,14 @@ static int saved_tskey_type = -1; static bool test_failed; +static int64_t timespec_to_ns64(struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + static int64_t timespec_to_us64(struct timespec *ts) { - return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000; + return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC; } static void validate_key(int tskey, int tstype) @@ -113,25 +123,43 @@ static void validate_timestamp(struct timespec *cur, int min_delay) start64 = timespec_to_us64(&ts_usr); if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { - fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n", + fprintf(stderr, "ERROR: %lu us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); test_failed = true; } } +static void __print_ts_delta_formatted(int64_t ts_delta) +{ + if (cfg_print_nsec) + fprintf(stderr, "%lu ns", ts_delta); + else + fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC); +} + static void __print_timestamp(const char *name, struct timespec *cur, uint32_t key, int payload_len) { + int64_t ts_delta; + if (!(cur->tv_sec | cur->tv_nsec)) return; - fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", - name, cur->tv_sec, cur->tv_nsec / 1000, - key, payload_len); - - if (cur != &ts_usr) - fprintf(stderr, " (USR %+" PRId64 " us)", - timespec_to_us64(cur) - timespec_to_us64(&ts_usr)); + if (cfg_print_nsec) + fprintf(stderr, " %s: %lu s %lu ns (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec, + key, payload_len); + else + fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC, + key, payload_len); + + if (cur != &ts_usr) { + ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr); + fprintf(stderr, " (USR +"); + __print_ts_delta_formatted(ts_delta); + fprintf(stderr, ")"); + } fprintf(stderr, "\n"); } @@ -526,7 +554,7 @@ static void do_test(int family, unsigned int report_opt) /* wait for all errors to be queued, else ACKs arrive OOO */ if (!cfg_no_delay) - usleep(50 * 1000); + usleep(50 * NSEC_PER_USEC); __poll(fd); @@ -537,7 +565,7 @@ static void do_test(int family, unsigned int report_opt) error(1, errno, "close"); free(buf); - usleep(100 * 1000); + usleep(100 * NSEC_PER_USEC); } static void __attribute__((noreturn)) usage(const char *filepath) @@ -555,6 +583,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -l N: send N bytes at a time\n" " -L listen on hostname and port\n" " -n: set no-payload option\n" + " -N: print timestamps and durations in nsec (instead of usec)\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" @@ -572,7 +601,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) { + while ((c = getopt(argc, argv, "46c:CDFhIl:LnNp:PrRuv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -604,6 +633,9 @@ static void parse_opt(int argc, char **argv) case 'n': cfg_loop_nodata = true; break; + case 'N': + cfg_print_nsec = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; -- cgit v1.2.3 From 5090147c30232306fc9d13a384384cc763a7966d Mon Sep 17 00:00:00 2001 From: Jian Yang Date: Tue, 17 Mar 2020 12:25:07 -0700 Subject: selftests: txtimestamp: add new command-line flags. A longer sleep duration between sendmsg()s makes more cachelines to be evicted and results in higher latency. Making the duration configurable. Add the following new flags: -S: Configurable sleep duration. -b: Busy loop instead of poll(). Remove the following flag: -D: No delay between packets: subsumed by -S. Signed-off-by: Jian Yang Acked-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.c | 24 ++++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index a9b8c41a3009..ee060ae3d44f 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -65,8 +65,9 @@ static int cfg_delay_snd; static int cfg_delay_ack; static bool cfg_show_payload; static bool cfg_do_pktinfo; +static bool cfg_busy_poll; +static int cfg_sleep_usec = 50 * 1000; static bool cfg_loop_nodata; -static bool cfg_no_delay; static bool cfg_use_cmsg; static bool cfg_use_pf_packet; static bool cfg_do_listen; @@ -553,10 +554,11 @@ static void do_test(int family, unsigned int report_opt) error(1, errno, "send"); /* wait for all errors to be queued, else ACKs arrive OOO */ - if (!cfg_no_delay) - usleep(50 * NSEC_PER_USEC); + if (cfg_sleep_usec) + usleep(cfg_sleep_usec); - __poll(fd); + if (!cfg_busy_poll) + __poll(fd); while (!recv_errmsg(fd)) {} } @@ -575,9 +577,9 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -b: busy poll to read from error queue\n" " -c N: number of packets for each test\n" " -C: use cmsg to set tstamp recording options\n" - " -D: no delay between packets\n" " -F: poll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" @@ -588,6 +590,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" + " -S N: usec to sleep before reading error queue\n" " -u: use udp\n" " -v: validate SND delay (usec)\n" " -V: validate ACK delay (usec)\n" @@ -601,7 +604,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46c:CDFhIl:LnNp:PrRuv:V:x")) != -1) { + while ((c = getopt(argc, argv, "46bc:CFhIl:LnNp:PrRS:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -609,15 +612,15 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'b': + cfg_busy_poll = true; + break; case 'c': cfg_num_pkts = strtoul(optarg, NULL, 10); break; case 'C': cfg_use_cmsg = true; break; - case 'D': - cfg_no_delay = true; - break; case 'F': cfg_poll_timeout = -1; break; @@ -655,6 +658,9 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_RAW; cfg_ipproto = IPPROTO_RAW; break; + case 'S': + cfg_sleep_usec = strtoul(optarg, NULL, 10); + break; case 'u': proto_count++; cfg_proto = SOCK_DGRAM; -- cgit v1.2.3 From e64be6dea65042e116c98b2502126f48476ed8f1 Mon Sep 17 00:00:00 2001 From: Jian Yang Date: Tue, 17 Mar 2020 12:25:08 -0700 Subject: selftests: txtimestamp: add support for epoll(). Add the following new flags: -e: use level-triggered epoll() instead of poll(). -E: use event-triggered epoll() instead of poll(). Signed-off-by: Jian Yang Acked-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.c | 53 ++++++++++++++++++++-- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index ee060ae3d44f..f915f24db3fa 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,8 @@ static int cfg_sleep_usec = 50 * 1000; static bool cfg_loop_nodata; static bool cfg_use_cmsg; static bool cfg_use_pf_packet; +static bool cfg_use_epoll; +static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; static bool cfg_print_nsec; @@ -227,6 +230,17 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); } +static void __epoll(int epfd) +{ + struct epoll_event events; + int ret; + + memset(&events, 0, sizeof(events)); + ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout); + if (ret != 1) + error(1, errno, "epoll_wait"); +} + static void __poll(int fd) { struct pollfd pollfd; @@ -420,7 +434,7 @@ static void do_test(int family, unsigned int report_opt) struct msghdr msg; struct iovec iov; char *buf; - int fd, i, val = 1, total_len; + int fd, i, val = 1, total_len, epfd = 0; total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { @@ -447,6 +461,20 @@ static void do_test(int family, unsigned int report_opt) if (fd < 0) error(1, errno, "socket"); + if (cfg_use_epoll) { + struct epoll_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.data.fd = fd; + if (cfg_epollet) + ev.events |= EPOLLET; + epfd = epoll_create(1); + if (epfd <= 0) + error(1, errno, "epoll_create"); + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)) + error(1, errno, "epoll_ctl"); + } + /* reset expected key on each new socket */ saved_tskey = -1; @@ -557,8 +585,12 @@ static void do_test(int family, unsigned int report_opt) if (cfg_sleep_usec) usleep(cfg_sleep_usec); - if (!cfg_busy_poll) - __poll(fd); + if (!cfg_busy_poll) { + if (cfg_use_epoll) + __epoll(epfd); + else + __poll(fd); + } while (!recv_errmsg(fd)) {} } @@ -580,7 +612,9 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -b: busy poll to read from error queue\n" " -c N: number of packets for each test\n" " -C: use cmsg to set tstamp recording options\n" - " -F: poll() waits forever for an event\n" + " -e: use level-triggered epoll() instead of poll()\n" + " -E: use event-triggered epoll() instead of poll()\n" + " -F: poll()/epoll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -L listen on hostname and port\n" @@ -604,7 +638,8 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46bc:CFhIl:LnNp:PrRS:uv:V:x")) != -1) { + while ((c = getopt(argc, argv, + "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -621,6 +656,12 @@ static void parse_opt(int argc, char **argv) case 'C': cfg_use_cmsg = true; break; + case 'e': + cfg_use_epoll = true; + break; + case 'E': + cfg_use_epoll = true; + cfg_epollet = true; case 'F': cfg_poll_timeout = -1; break; @@ -691,6 +732,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "pass -P, -r, -R or -u, not multiple"); if (cfg_do_pktinfo && cfg_use_pf_packet) error(1, 0, "cannot ask for pktinfo over pf_packet"); + if (cfg_busy_poll && cfg_use_epoll) + error(1, 0, "pass epoll or busy_poll, not both"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); -- cgit v1.2.3 From 277bc78f3829c5e129048dc5b241f3ce4a898924 Mon Sep 17 00:00:00 2001 From: Jian Yang Date: Tue, 17 Mar 2020 12:25:09 -0700 Subject: selftests: txtimestamp: print statistics for timestamp events. Statistics on timestamps is useful to quantify average and tail latency. Print timestamp statistics in count/avg/min/max format. Signed-off-by: Jian Yang Acked-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.c | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index f915f24db3fa..011b0da6b033 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -84,6 +84,17 @@ static struct timespec ts_usr; static int saved_tskey = -1; static int saved_tskey_type = -1; +struct timing_event { + int64_t min; + int64_t max; + int64_t total; + int count; +}; + +static struct timing_event usr_enq; +static struct timing_event usr_snd; +static struct timing_event usr_ack; + static bool test_failed; static int64_t timespec_to_ns64(struct timespec *ts) @@ -96,6 +107,27 @@ static int64_t timespec_to_us64(struct timespec *ts) return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC; } +static void init_timing_event(struct timing_event *te) +{ + te->min = INT64_MAX; + te->max = 0; + te->total = 0; + te->count = 0; +} + +static void add_timing_event(struct timing_event *te, + struct timespec *t_start, struct timespec *t_end) +{ + int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start); + + te->count++; + if (ts_delta < te->min) + te->min = ts_delta; + if (ts_delta > te->max) + te->max = ts_delta; + te->total += ts_delta; +} + static void validate_key(int tskey, int tstype) { int stepsize; @@ -187,14 +219,17 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, case SCM_TSTAMP_SCHED: tsname = " ENQ"; validate_timestamp(&tss->ts[0], 0); + add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]); break; case SCM_TSTAMP_SND: tsname = " SND"; validate_timestamp(&tss->ts[0], cfg_delay_snd); + add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]); break; case SCM_TSTAMP_ACK: tsname = " ACK"; validate_timestamp(&tss->ts[0], cfg_delay_ack); + add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]); break; default: error(1, 0, "unknown timestamp type: %u", @@ -203,6 +238,21 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); } +static void print_timing_event(char *name, struct timing_event *te) +{ + if (!te->count) + return; + + fprintf(stderr, " %s: count=%d", name, te->count); + fprintf(stderr, ", avg="); + __print_ts_delta_formatted((int64_t)(te->total / te->count)); + fprintf(stderr, ", min="); + __print_ts_delta_formatted(te->min); + fprintf(stderr, ", max="); + __print_ts_delta_formatted(te->max); + fprintf(stderr, "\n"); +} + /* TODO: convert to check_and_print payload once API is stable */ static void print_payload(char *data, int len) { @@ -436,6 +486,10 @@ static void do_test(int family, unsigned int report_opt) char *buf; int fd, i, val = 1, total_len, epfd = 0; + init_timing_event(&usr_enq); + init_timing_event(&usr_snd); + init_timing_event(&usr_ack); + total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); @@ -595,6 +649,10 @@ static void do_test(int family, unsigned int report_opt) while (!recv_errmsg(fd)) {} } + print_timing_event("USR-ENQ", &usr_enq); + print_timing_event("USR-SND", &usr_snd); + print_timing_event("USR-ACK", &usr_ack); + if (close(fd)) error(1, errno, "close"); -- cgit v1.2.3 From 83a9b6f639e9f6b632337f9776de17d51d969c77 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 18 Mar 2020 18:53:21 +0000 Subject: selftests/net: add definition for SOL_DCCP to fix compilation errors for old libc Many systems build/test up-to-date kernels with older libcs, and an older glibc (2.17) lacks the definition of SOL_DCCP in /usr/include/bits/socket.h (it was added in the 4.6 timeframe). Adding the definition to the test program avoids a compilation failure that gets in the way of building tools/testing/selftests/net. The test itself will work once the definition is added; either skipping due to DCCP not being configured in the kernel under test or passing, so there are no other more up-to-date glibc dependencies here it seems beyond that missing definition. Fixes: 11fb60d1089f ("selftests: net: reuseport_addr_any: add DCCP") Signed-off-by: Alan Maguire Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_addr_any.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index c6233935fed1..b8475cb29be7 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -21,6 +21,10 @@ #include #include +#ifndef SOL_DCCP +#define SOL_DCCP 269 +#endif + static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; -- cgit v1.2.3 From d5f5ee2a490547961c0b7ccdeef047c2018892bf Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 6 Feb 2020 02:54:32 -0500 Subject: tools/virtio: option to build an out of tree module Handy for testing with distro kernels. Warn that the resulting module is completely unsupported, and isn't intended for production use. Usage: make oot # builds vhost_test.ko, vhost.ko make oot-clean # cleans out files created Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- tools/virtio/Makefile | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 8e2a908115c2..f33f32f1d208 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -8,7 +8,32 @@ CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-poin vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} -.PHONY: all test mod clean + +#oot: build vhost as an out of tree module for a distro kernel +#no effort is taken to make it actually build or work, but tends to mostly work +#if the distro kernel is very close to upstream +#unsupported! this is a development tool only, don't use the +#resulting modules in production! +OOT_KSRC=/lib/modules/$$(uname -r)/build +OOT_VHOST=`pwd`/../../drivers/vhost +#Everyone depends on vhost +#Tweak the below to enable more modules +OOT_CONFIGS=\ + CONFIG_VHOST=m \ + CONFIG_VHOST_NET=n \ + CONFIG_VHOST_SCSI=n \ + CONFIG_VHOST_VSOCK=n +OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V} +oot-build: + echo "UNSUPPORTED! Don't use the resulting modules in production!" + ${OOT_BUILD} M=`pwd`/vhost_test + ${OOT_BUILD} M=${OOT_VHOST} ${OOT_CONFIGS} + +oot-clean: oot-build +oot: oot-build +oot-clean: OOT_BUILD+=clean + +.PHONY: all test mod clean vhost oot oot-clean oot-build clean: ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ vhost_test/Module.symvers vhost_test/modules.order *.d -- cgit v1.2.3 From d4953f7ef1a2e87ef732823af35361404d13fea8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 14 Mar 2020 10:03:56 -0700 Subject: perf parse-events: Fix 3 use after frees found with clang ASAN Reproducible with a clang asan build and then running perf test in particular 'Parse event definition strings'. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200314170356.62914-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 816d930d774e..15ccd193483f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1287,6 +1287,7 @@ void perf_evsel__exit(struct evsel *evsel) perf_thread_map__put(evsel->core.threads); zfree(&evsel->group_name); zfree(&evsel->name); + zfree(&evsel->pmu_name); perf_evsel__object.fini(evsel); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7dc0b096974..10107747b361 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1449,7 +1449,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats, NULL); if (evsel) { - evsel->pmu_name = name; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; return 0; } else { @@ -1497,7 +1497,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->snapshot = info.snapshot; evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; - evsel->pmu_name = name; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; evsel->percore = config_term_percore(&evsel->config_terms); } @@ -1547,7 +1547,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { - pr_debug("%s -> %s/%s/\n", config, + pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; } -- cgit v1.2.3 From 7eec00a74720d35b6d89505350e5b08ecef376c0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Mar 2020 09:57:58 +0800 Subject: perf symbols: Consolidate symbol fixup issue After copying Arm64's perf archive with object files and perf.data file to x86 laptop, the x86's perf kernel symbol resolution fails. It outputs 'unknown' for all symbols parsing. This issue is root caused by the function elf__needs_adjust_symbols(), x86 perf tool uses one weak version, Arm64 (and powerpc) has rewritten their own version. elf__needs_adjust_symbols() decides if need to parse symbols with the relative offset address; but x86 building uses the weak function which misses to check for the elf type 'ET_DYN', so that it cannot parse symbols in Arm DSOs due to the wrong result from elf__needs_adjust_symbols(). The DSO parsing should not depend on any specific architecture perf building; e.g. x86 perf tool can parse Arm and Arm64 DSOs, vice versa. And confirmed by Naveen N. Rao that powerpc64 kernels are not being built as ET_DYN anymore and change to ET_EXEC. This patch removes the arch specific functions for Arm64 and powerpc and changes elf__needs_adjust_symbols() as a common function. In the common elf__needs_adjust_symbols(), it checks an extra condition 'ET_DYN' for elf header type. With this fixing, the Arm64 DSO can be parsed properly with x86's perf tool. Before: # perf script main 3258 1 branches: 0 [unknown] ([unknown]) => ffff800010c4665c [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c46670 [unknown] ([kernel.kallsyms]) => ffff800010c4eaec [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eaec [unknown] ([kernel.kallsyms]) => ffff800010c4eb00 [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eb08 [unknown] ([kernel.kallsyms]) => ffff800010c4e780 [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4e7a0 [unknown] ([kernel.kallsyms]) => ffff800010c4eeac [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eebc [unknown] ([kernel.kallsyms]) => ffff800010c4ed80 [unknown] ([kernel.kallsyms]) After: # perf script main 3258 1 branches: 0 [unknown] ([unknown]) => ffff800010c4665c coresight_timeout+0x54 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c46670 coresight_timeout+0x68 ([kernel.kallsyms]) => ffff800010c4eaec etm4_enable_hw+0x3cc ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eaec etm4_enable_hw+0x3cc ([kernel.kallsyms]) => ffff800010c4eb00 etm4_enable_hw+0x3e0 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eb08 etm4_enable_hw+0x3e8 ([kernel.kallsyms]) => ffff800010c4e780 etm4_enable_hw+0x60 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4e7a0 etm4_enable_hw+0x80 ([kernel.kallsyms]) => ffff800010c4eeac etm4_enable+0x2d4 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eebc etm4_enable+0x2e4 ([kernel.kallsyms]) => ffff800010c4ed80 etm4_enable+0x1a8 ([kernel.kallsyms]) v3: Changed to check for ET_DYN across all architectures. v2: Fixed Arm64 and powerpc native building. Reported-by: Mike Leach Signed-off-by: Leo Yan Reviewed-by: Naveen N. Rao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Allison Randal Cc: Enrico Weigelt Cc: Greg Kroah-Hartman Cc: Hendrik Brueckner Cc: John Garry Cc: Kate Stewart Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Link: http://lore.kernel.org/lkml/20200306015759.10084-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 - tools/perf/arch/arm64/util/sym-handling.c | 19 ------------------- tools/perf/arch/powerpc/util/Build | 1 - tools/perf/arch/powerpc/util/sym-handling.c | 10 ---------- tools/perf/util/symbol-elf.c | 10 ++++++++-- 5 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 tools/perf/arch/arm64/util/sym-handling.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 0a7782c61209..789956f76d85 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,5 @@ perf-y += header.o perf-y += perf_regs.o -perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c deleted file mode 100644 index 8dfa3e5229f1..000000000000 --- a/tools/perf/arch/arm64/util/sym-handling.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Copyright (C) 2015 Naveen N. Rao, IBM Corporation - */ - -#include "symbol.h" // for the elf__needs_adjust_symbols() prototype -#include - -#ifdef HAVE_LIBELF_SUPPORT -#include - -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - return ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - ehdr.e_type == ET_DYN; -} -#endif diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7cf0b8803097..e5c9504f8586 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,4 @@ perf-y += header.o -perf-y += sym-handling.o perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index abb7a12d8f93..0856b32f9e08 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -10,16 +10,6 @@ #include "probe-event.h" #include "probe-file.h" -#ifdef HAVE_LIBELF_SUPPORT -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - return ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - ehdr.e_type == ET_DYN; -} - -#endif - int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 1965aefccb02..be5b493f8284 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -704,9 +704,15 @@ void symsrc__destroy(struct symsrc *ss) close(ss->fd); } -bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) { - return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL; + /* + * Usually vmlinux is an ELF file with type ET_EXEC for most + * architectures; except Arm64 kernel is linked with option + * '-share', so need to check type ET_DYN. + */ + return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; } int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, -- cgit v1.2.3 From 443bc639e518c6c4e8fc2e0456cccf3a04f6e77f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 Feb 2020 12:39:37 +0800 Subject: perf report: Print al_addr when symbol is not found For branch mode, if the symbol is not found, it prints the address. For example, 0x0000555eee0365a0 in below output. Overhead Command Source Shared Object Source Symbol Target Symbol 17.55% div libc-2.27.so [.] __random [.] __random 6.11% div div [.] 0x0000555eee0365a0 [.] rand 6.10% div libc-2.27.so [.] rand [.] 0x0000555eee036769 5.80% div libc-2.27.so [.] __random_r [.] __random 5.72% div libc-2.27.so [.] __random [.] __random_r 5.62% div libc-2.27.so [.] __random_r [.] __random_r 5.38% div libc-2.27.so [.] __random [.] rand 4.56% div libc-2.27.so [.] __random [.] __random 4.49% div div [.] 0x0000555eee036779 [.] 0x0000555eee0365ff 4.25% div div [.] 0x0000555eee0365fa [.] 0x0000555eee036760 But it's not very easy to understand what the instructions are in the binary. So this patch uses the al_addr instead. With this patch, the output is Overhead Command Source Shared Object Source Symbol Target Symbol 17.55% div libc-2.27.so [.] __random [.] __random 6.11% div div [.] 0x00000000000005a0 [.] rand 6.10% div libc-2.27.so [.] rand [.] 0x0000000000000769 5.80% div libc-2.27.so [.] __random_r [.] __random 5.72% div libc-2.27.so [.] __random [.] __random_r 5.62% div libc-2.27.so [.] __random_r [.] __random_r 5.38% div libc-2.27.so [.] __random [.] rand 4.56% div libc-2.27.so [.] __random [.] __random 4.49% div div [.] 0x0000000000000779 [.] 0x00000000000005ff 4.25% div div [.] 0x00000000000005fa [.] 0x0000000000000760 Now we can use objdump to dump the object starting from 0x5a0. For example, objdump -d --start-address 0x5a0 div 00000000000005a0 : 5a0: ff 25 2a 0a 20 00 jmpq *0x200a2a(%rip) # 200fd0 <__cxa_finalize@plt+0x200a20> 5a6: 68 02 00 00 00 pushq $0x2 5ab: e9 c0 ff ff ff jmpq 570 ... Committer testing: [root@seventh ~]# perf record -a -b sleep 1 [root@seventh ~]# perf report --header-only | grep cpudesc # cpudesc : Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz [root@seventh ~]# perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY [root@seventh ~]# Before: [root@seventh ~]# perf report --stdio --dso libsystemd-shared-241.so | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles' # Event count (approx.): 2240 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ............... ........................ ...................... ...................... .................. # 0.13% systemd-journal libc-2.29.so [.] cfree@GLIBC_2.2.5 [.] _int_free 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465c82 [.] 0x00007fe406465d80 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465ded [.] 0x00007fe406465c30 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465e4e [.] 0x00007fe406465de0 1 0.09% systemd-journal systemd-journald [.] free@plt [.] cfree@GLIBC_2.2.5 1 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 18 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 2 0.04% systemd libsystemd-shared-241.so [.] bus_resolve@plt [.] bus_resolve 204 0.04% systemd libsystemd-shared-241.so [.] getpid_cached@plt [.] getpid_cached 7 [root@seventh ~]# After: [root@seventh ~]# perf report --stdio --dso libsystemd-shared-241.so | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles' # Event count (approx.): 2240 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ............... ........................ ...................... ...................... .................. # 0.13% systemd-journal libc-2.29.so [.] cfree@GLIBC_2.2.5 [.] _int_free 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7c82 [.] 0x00000000000f7d80 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7ded [.] 0x00000000000f7c30 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7e4e [.] 0x00000000000f7de0 1 0.09% systemd-journal systemd-journald [.] free@plt [.] cfree@GLIBC_2.2.5 1 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 18 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 2 0.04% systemd libsystemd-shared-241.so [.] bus_resolve@plt [.] bus_resolve 204 0.04% systemd libsystemd-shared-241.so [.] getpid_cached@plt [.] getpid_cached 7 [root@seventh ~]# Lets use -v to get full paths and then try objdump on the unresolved address: [root@seventh ~]# perf report -v --stdio --dso libsystemd-shared-241.so |& grep libsystemd-shared-241.so | tail -1 0.04% systemd-journal /usr/lib/systemd/libsystemd-shared-241.so 0x80c1a B [.] 0x0000000000080c1a 0x80a95 B [.] 0x0000000000080a95 61 [root@seventh ~]# [root@seventh ~]# objdump -d --start-address 0x00000000000f7d80 /usr/lib/systemd/libsystemd-shared-241.so | head -20 /usr/lib/systemd/libsystemd-shared-241.so: file format elf64-x86-64 Disassembly of section .text: 00000000000f7d80 : f7d80: 41 39 11 cmp %edx,(%r9) f7d83: 0f 84 ff fe ff ff je f7c88 f7d89: 4c 8d 05 97 09 0c 00 lea 0xc0997(%rip),%r8 # 1b8727 f7d90: b9 49 00 00 00 mov $0x49,%ecx f7d95: 48 8d 15 c9 f5 0b 00 lea 0xbf5c9(%rip),%rdx # 1b7365 f7d9c: 31 ff xor %edi,%edi f7d9e: 48 8d 35 9b ff 0b 00 lea 0xbff9b(%rip),%rsi # 1b7d40 f7da5: e8 a6 d6 f4 ff callq 45450 f7daa: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) f7db0: 41 56 push %r14 f7db2: 41 55 push %r13 f7db4: 41 54 push %r12 f7db6: 55 push %rbp [root@seventh ~]# If we tried the the reported address before this patch: [root@seventh ~]# objdump -d --start-address 0x00007fe406465d80 /usr/lib/systemd/libsystemd-shared-241.so | head -20 /usr/lib/systemd/libsystemd-shared-241.so: file format elf64-x86-64 [root@seventh ~]# Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200227043939.4403-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ab0cfd790ad0..e860595576c2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -869,7 +869,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, if (he->branch_info) { struct addr_map_symbol *from = &he->branch_info->from; - return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width); + return _hist_entry__sym_snprintf(&from->ms, from->al_addr, + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -881,7 +882,8 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, if (he->branch_info) { struct addr_map_symbol *to = &he->branch_info->to; - return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width); + return _hist_entry__sym_snprintf(&to->ms, to->al_addr, + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); -- cgit v1.2.3 From 671aa926a90aacf553857cd7eabfa04a3c95b091 Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Fri, 6 Mar 2020 15:49:12 +0530 Subject: thermal: int340x: processor_thermal: Add Tiger Lake support Added new PCI id for Tiger Lake processor thermal device along with MMIO RAPL support. Signed-off-by: Sumeet Pawnikar Signed-off-by: Zhang Rui Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1583489952-29612-1-git-send-email-sumeet.r.pawnikar@intel.com --- .../int340x_thermal/processor_thermal_device.c | 5 +++++ tools/thermal/tmon/tmon.c | 26 ++++++++-------------- 2 files changed, 14 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index b1fd34516e28..297db1d2d960 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -45,6 +45,9 @@ /* JasperLake thermal reporting device */ #define PCI_DEVICE_ID_PROC_JSL_THERMAL 0x4503 +/* TigerLake thermal reporting device */ +#define PCI_DEVICE_ID_PROC_TGL_THERMAL 0x9A03 + #define DRV_NAME "proc_thermal" struct power_config { @@ -728,6 +731,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_ICL_THERMAL), .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_JSL_THERMAL)}, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_TGL_THERMAL), + .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, }, { 0, }, }; diff --git a/tools/thermal/tmon/tmon.c b/tools/thermal/tmon/tmon.c index 83ec6e482f12..7eb3216a27f4 100644 --- a/tools/thermal/tmon/tmon.c +++ b/tools/thermal/tmon/tmon.c @@ -46,7 +46,7 @@ static void start_daemon_mode(void); pthread_t event_tid; pthread_mutex_t input_lock; -void usage() +void usage(void) { printf("Usage: tmon [OPTION...]\n"); printf(" -c, --control cooling device in control\n"); @@ -62,7 +62,7 @@ void usage() exit(0); } -void version() +void version(void) { printf("TMON version %s\n", VERSION); exit(EXIT_SUCCESS); @@ -70,7 +70,6 @@ void version() static void tmon_cleanup(void) { - syslog(LOG_INFO, "TMON exit cleanup\n"); fflush(stdout); refresh(); @@ -96,7 +95,6 @@ static void tmon_cleanup(void) exit(1); } - static void tmon_sig_handler(int sig) { syslog(LOG_INFO, "TMON caught signal %d\n", sig); @@ -120,7 +118,6 @@ static void tmon_sig_handler(int sig) tmon_exit = true; } - static void start_syslog(void) { if (debug_on) @@ -167,7 +164,6 @@ static void prepare_logging(void) return; } - fprintf(tmon_log, "#----------- THERMAL SYSTEM CONFIG -------------\n"); for (i = 0; i < ptdata.nr_tz_sensor; i++) { char binding_str[33]; /* size of long + 1 */ @@ -175,7 +171,7 @@ static void prepare_logging(void) memset(binding_str, 0, sizeof(binding_str)); for (j = 0; j < 32; j++) - binding_str[j] = (ptdata.tzi[i].cdev_binding & 1< 0) + else if (pid > 0) /* kill parent */ exit(EXIT_SUCCESS); @@ -366,11 +360,9 @@ static void start_daemon_mode() if ((chdir("/")) < 0) exit(EXIT_FAILURE); - sleep(10); close(STDIN_FILENO); close(STDOUT_FILENO); close(STDERR_FILENO); - } -- cgit v1.2.3 From eecda7a95646a4590ea02545d15d73cdcdf8beb1 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 6 Mar 2020 12:42:44 +0100 Subject: tools/kvm_stat: rework command line sequence and message texts Make sure command line arguments are sorted alphabetically everywhere, and adjusted existing texts for interactive command 's' to become consistent with the long form --set-delay. Throwing in some PEP8 fixes (all cosmetics) for good measure. Signed-off-by: Stefan Raspl Message-Id: <20200306114250.57585-2-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 9 +++++---- tools/kvm/kvm_stat/kvm_stat.txt | 40 ++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 4cf93110c259..05638ab59b9d 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1183,7 +1183,7 @@ class Tui(object): if not self._is_running_guest(self.stats.pid_filter): if self._gname: - try: # ...to identify the guest by name in case it's back + try: # ...to identify the guest by name in case it's back pids = self.get_pid_from_gname(self._gname) if len(pids) == 1: self._refresh_header(pids[0]) @@ -1336,8 +1336,8 @@ class Tui(object): msg = '' while True: self.screen.erase() - self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' % - DELAY_DEFAULT, curses.A_BOLD) + self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' + % DELAY_DEFAULT, curses.A_BOLD) self.screen.addstr(4, 0, msg) self.screen.addstr(2, 0, 'Change delay from %.1fs to ' % self._delay_regular) @@ -1545,7 +1545,7 @@ Interactive Commands: p filter by PID q quit r reset stats - s set update interval + s set update interval (value range: 0.1-25.5 secs) x toggle reporting of stats for individual child trace events Press any other key to refresh statistics immediately. """ % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING) @@ -1711,5 +1711,6 @@ def main(): else: batch(stats) + if __name__ == "__main__": main() diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index c057ba52364e..8e0658e79eb7 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -49,7 +49,7 @@ INTERACTIVE COMMANDS *r*:: reset stats -*s*:: set update interval +*s*:: set delay between refreshs *x*:: toggle reporting of stats for child trace events :: *Note*: The stats for the parents summarize the respective child trace @@ -64,37 +64,37 @@ OPTIONS --batch:: run in batch mode for one second --l:: ---log:: - run in logging mode (like vmstat) - --t:: ---tracepoints:: - retrieve statistics from tracepoints - -d:: --debugfs:: retrieve statistics from debugfs +-f:: +--fields=:: + fields to display (regex), "-f help" for a list of available events + +-g:: +--guest=:: + limit statistics to one virtual machine (guest name) + +-h:: +--help:: + show help message + -i:: --debugfs-include-past:: include all available data on past events for debugfs +-l:: +--log:: + run in logging mode (like vmstat) + -p:: --pid=:: limit statistics to one virtual machine (pid) --g:: ---guest=:: - limit statistics to one virtual machine (guest name) - --f:: ---fields=:: - fields to display (regex), "-f help" for a list of available events - --h:: ---help:: - show help message +-t:: +--tracepoints:: + retrieve statistics from tracepoints SEE ALSO -------- -- cgit v1.2.3 From 0e6618fba8c98223e17d57d68b7b834e1eb89612 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 6 Mar 2020 12:42:45 +0100 Subject: tools/kvm_stat: switch to argparse optparse is deprecated for a while, hence switching over to argparse (which also works with python2). As a consequence, help output has some subtle changes, the most significant one being that the options are all listed explicitly instead of a universal '[options]' indicator. Also, some of the error messages are phrased slightly different. While at it, squashed a number of minor PEP8 issues. Signed-off-by: Stefan Raspl Message-Id: <20200306114250.57585-3-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 142 +++++++++++++++++++------------------------- 1 file changed, 62 insertions(+), 80 deletions(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 05638ab59b9d..8f1874c7fd8e 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -25,7 +25,7 @@ import sys import locale import os import time -import optparse +import argparse import ctypes import fcntl import resource @@ -873,7 +873,7 @@ class Stats(object): if options.debugfs: providers.append(DebugfsProvider(options.pid, options.fields, - options.dbgfs_include_past)) + options.debugfs_include_past)) if options.tracepoints or not providers: providers.append(TracepointProvider(options.pid, options.fields)) @@ -1550,84 +1550,66 @@ Interactive Commands: Press any other key to refresh statistics immediately. """ % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING) - class PlainHelpFormatter(optparse.IndentedHelpFormatter): - def format_description(self, description): - if description: - return description + "\n" - else: - return "" - - def cb_guest_to_pid(option, opt, val, parser): - try: - pids = Tui.get_pid_from_gname(val) - except: - sys.exit('Error while searching for guest "{}". Use "-p" to ' - 'specify a pid instead?'.format(val)) - if len(pids) == 0: - sys.exit('Error: No guest by the name "{}" found'.format(val)) - if len(pids) > 1: - sys.exit('Error: Multiple processes found (pids: {}). Use "-p" ' - 'to specify the desired pid'.format(" ".join(pids))) - parser.values.pid = pids[0] - - optparser = optparse.OptionParser(description=description_text, - formatter=PlainHelpFormatter()) - optparser.add_option('-1', '--once', '--batch', - action='store_true', - default=False, - dest='once', - help='run in batch mode for one second', - ) - optparser.add_option('-i', '--debugfs-include-past', - action='store_true', - default=False, - dest='dbgfs_include_past', - help='include all available data on past events for ' - 'debugfs', - ) - optparser.add_option('-l', '--log', - action='store_true', - default=False, - dest='log', - help='run in logging mode (like vmstat)', - ) - optparser.add_option('-t', '--tracepoints', - action='store_true', - default=False, - dest='tracepoints', - help='retrieve statistics from tracepoints', - ) - optparser.add_option('-d', '--debugfs', - action='store_true', - default=False, - dest='debugfs', - help='retrieve statistics from debugfs', - ) - optparser.add_option('-f', '--fields', - action='store', - default='', - dest='fields', - help='''fields to display (regex) - "-f help" for a list of available events''', - ) - optparser.add_option('-p', '--pid', - action='store', - default=0, - type='int', - dest='pid', - help='restrict statistics to pid', - ) - optparser.add_option('-g', '--guest', - action='callback', - type='string', - dest='pid', - metavar='GUEST', - help='restrict statistics to guest by name', - callback=cb_guest_to_pid, - ) - options, unkn = optparser.parse_args(sys.argv) - if len(unkn) != 1: - sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:])) + class Guest_to_pid(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + try: + pids = Tui.get_pid_from_gname(values) + except: + sys.exit('Error while searching for guest "{}". Use "-p" to ' + 'specify a pid instead?'.format(values)) + if len(pids) == 0: + sys.exit('Error: No guest by the name "{}" found' + .format(values)) + if len(pids) > 1: + sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' + ' to specify the desired pid'.format(" ".join(pids))) + namespace.pid = pids[0] + + argparser = argparse.ArgumentParser(description=description_text, + formatter_class=argparse + .RawTextHelpFormatter) + argparser.add_argument('-1', '--once', '--batch', + action='store_true', + default=False, + help='run in batch mode for one second', + ) + argparser.add_argument('-d', '--debugfs', + action='store_true', + default=False, + help='retrieve statistics from debugfs', + ) + argparser.add_argument('-f', '--fields', + default='', + help='''fields to display (regex) +"-f help" for a list of available events''', + ) + argparser.add_argument('-g', '--guest', + type=str, + help='restrict statistics to guest by name', + action=Guest_to_pid, + ) + argparser.add_argument('-i', '--debugfs-include-past', + action='store_true', + default=False, + help='include all available data on past events for' + ' debugfs', + ) + argparser.add_argument('-l', '--log', + action='store_true', + default=False, + help='run in logging mode (like vmstat)', + ) + argparser.add_argument('-p', '--pid', + type=int, + default=0, + help='restrict statistics to pid', + ) + argparser.add_argument('-t', '--tracepoints', + action='store_true', + default=False, + help='retrieve statistics from tracepoints', + ) + options = argparser.parse_args() try: # verify that we were passed a valid regex up front re.compile(options.fields) -- cgit v1.2.3 From 3cbb394d9fb68dcd20415dce2c42b695475e9684 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 6 Mar 2020 12:42:46 +0100 Subject: tools/kvm_stat: add command line switch '-s' to set update interval This now controls both, the refresh rate of the interactive mode as well as the logging mode. Which, as a consequence, means that the default of logging mode is now 3s, too (use command line switch '-s' to adjust to your liking). Signed-off-by: Stefan Raspl Message-Id: <20200306114250.57585-4-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 46 ++++++++++++++++++++++++++++++----------- tools/kvm/kvm_stat/kvm_stat.txt | 4 ++++ 2 files changed, 38 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 8f1874c7fd8e..f47d81a18ab1 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -974,15 +974,17 @@ DELAY_DEFAULT = 3.0 MAX_GUEST_NAME_LEN = 48 MAX_REGEX_LEN = 44 SORT_DEFAULT = 0 +MIN_DELAY = 0.1 +MAX_DELAY = 25.5 class Tui(object): """Instruments curses to draw a nice text ui.""" - def __init__(self, stats): + def __init__(self, stats, opts): self.stats = stats self.screen = None self._delay_initial = 0.25 - self._delay_regular = DELAY_DEFAULT + self._delay_regular = opts.set_delay self._sorting = SORT_DEFAULT self._display_guests = 0 @@ -1282,7 +1284,8 @@ class Tui(object): ' p filter by guest name/PID', ' q quit', ' r reset stats', - ' s set update interval', + ' s set delay between refreshs (value range: ' + '%s-%s secs)' % (MIN_DELAY, MAX_DELAY), ' x toggle reporting of stats for individual child trace' ' events', 'Any other key refreshes statistics immediately') @@ -1348,11 +1351,9 @@ class Tui(object): try: if len(val) > 0: delay = float(val) - if delay < 0.1: - msg = '"' + str(val) + '": Value must be >=0.1' - continue - if delay > 25.5: - msg = '"' + str(val) + '": Value must be <=25.5' + err = is_delay_valid(delay) + if err is not None: + msg = err continue else: delay = DELAY_DEFAULT @@ -1488,7 +1489,7 @@ def batch(stats): pass -def log(stats): +def log(stats, opts): """Prints statistics as reiterating key block, multiple value blocks.""" keys = sorted(stats.get().keys()) @@ -1506,7 +1507,7 @@ def log(stats): banner_repeat = 20 while True: try: - time.sleep(1) + time.sleep(opts.set_delay) if line % banner_repeat == 0: banner() statline() @@ -1515,6 +1516,16 @@ def log(stats): break +def is_delay_valid(delay): + """Verify delay is in valid value range.""" + msg = None + if delay < MIN_DELAY: + msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY + if delay > MAX_DELAY: + msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY + return msg + + def get_options(): """Returns processed program arguments.""" description_text = """ @@ -1604,6 +1615,13 @@ Press any other key to refresh statistics immediately. default=0, help='restrict statistics to pid', ) + argparser.add_argument('-s', '--set-delay', + type=float, + default=DELAY_DEFAULT, + metavar='DELAY', + help='set delay between refreshs (value range: ' + '%s-%s secs)' % (MIN_DELAY, MAX_DELAY), + ) argparser.add_argument('-t', '--tracepoints', action='store_true', default=False, @@ -1675,6 +1693,10 @@ def main(): sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') sys.exit('Specified pid does not exist.') + err = is_delay_valid(options.set_delay) + if err is not None: + sys.exit('Error: ' + err) + stats = Stats(options) if options.fields == 'help': @@ -1686,9 +1708,9 @@ def main(): sys.exit(0) if options.log: - log(stats) + log(stats, options) elif not options.once: - with Tui(stats) as tui: + with Tui(stats, options) as tui: tui.show_stats() else: batch(stats) diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index 8e0658e79eb7..20928057cc9e 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -92,6 +92,10 @@ OPTIONS --pid=:: limit statistics to one virtual machine (pid) +-s:: +--set-delay:: + set delay between refreshs (value range: 0.1-25.5 secs) + -t:: --tracepoints:: retrieve statistics from tracepoints -- cgit v1.2.3 From 0c794dcefbbc6b128f74b4c46c3ef49321d88735 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 6 Mar 2020 12:42:47 +0100 Subject: tools/kvm_stat: add command line switch '-c' to log in csv format Add an alternative format that can be more easily used for further processing later on. Note that we add a timestamp in the first column for both, the regular and the new csv format. Signed-off-by: Stefan Raspl Message-Id: <20200306114250.57585-5-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 63 ++++++++++++++++++++++++++++++++--------- tools/kvm/kvm_stat/kvm_stat.txt | 4 +++ 2 files changed, 53 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index f47d81a18ab1..e83fc8e868f4 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -33,6 +33,8 @@ import struct import re import subprocess from collections import defaultdict, namedtuple +from functools import reduce +from datetime import datetime VMX_EXIT_REASONS = { 'EXCEPTION_NMI': 0, @@ -1489,28 +1491,49 @@ def batch(stats): pass -def log(stats, opts): - """Prints statistics as reiterating key block, multiple value blocks.""" - keys = sorted(stats.get().keys()) - - def banner(): +class StdFormat(object): + def __init__(self, keys): + self._banner = '' for key in keys: - print(key.split(' ')[0], end=' ') - print() + self._banner += key.split(' ')[0] + ' ' - def statline(): - s = stats.get() + def get_banner(self): + return self._banner + + @staticmethod + def get_statline(keys, s): + res = '' for key in keys: - print(' %9d' % s[key].delta, end=' ') - print() + res += ' %9d' % s[key].delta + return res + + +class CSVFormat(object): + def __init__(self, keys): + self._banner = 'timestamp' + self._banner += reduce(lambda res, key: "{},{!s}".format(res, + key.split(' ')[0]), keys, '') + + def get_banner(self): + return self._banner + + @staticmethod + def get_statline(keys, s): + return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta), + keys, '') + + +def log(stats, opts, frmt, keys): + """Prints statistics as reiterating key block, multiple value blocks.""" line = 0 banner_repeat = 20 while True: try: time.sleep(opts.set_delay) if line % banner_repeat == 0: - banner() - statline() + print(frmt.get_banner()) + print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + frmt.get_statline(keys, stats.get())) line += 1 except KeyboardInterrupt: break @@ -1584,6 +1607,11 @@ Press any other key to refresh statistics immediately. default=False, help='run in batch mode for one second', ) + argparser.add_argument('-c', '--csv', + action='store_true', + default=False, + help='log in csv format - requires option -l/--log', + ) argparser.add_argument('-d', '--debugfs', action='store_true', default=False, @@ -1628,6 +1656,8 @@ Press any other key to refresh statistics immediately. help='retrieve statistics from tracepoints', ) options = argparser.parse_args() + if options.csv and not options.log: + sys.exit('Error: Option -c/--csv requires -l/--log') try: # verify that we were passed a valid regex up front re.compile(options.fields) @@ -1708,7 +1738,12 @@ def main(): sys.exit(0) if options.log: - log(stats, options) + keys = sorted(stats.get().keys()) + if options.csv: + frmt = CSVFormat(keys) + else: + frmt = StdFormat(keys) + log(stats, options, frmt, keys) elif not options.once: with Tui(stats, options) as tui: tui.show_stats() diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index 20928057cc9e..a97ded2aedad 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -64,6 +64,10 @@ OPTIONS --batch:: run in batch mode for one second +-c:: +--csv=:: + log in csv format - requires option -l/--log + -d:: --debugfs:: retrieve statistics from debugfs -- cgit v1.2.3 From c9b24312040edaa89acb6fb91a9a53fb4775309e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 20 Mar 2020 08:21:07 -0700 Subject: bpf: Add tests for bpf_sk_storage to bpf_tcp_ca This patch adds test to exercise the bpf_sk_storage_get() and bpf_sk_storage_delete() helper from the bpf_dctcp.c. The setup and check on the sk_storage is done immediately before and after the connect(). This patch also takes this chance to move the pthread_create() after the connect() has been done. That will remove the need of the "wait_thread" label. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200320152107.2169904-1-kafai@fb.com --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 39 +++++++++++++++++----- tools/testing/selftests/bpf/progs/bpf_dctcp.c | 16 +++++++++ 2 files changed, 47 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 8482bbc67eec..9a8f47fc0b91 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -11,6 +11,7 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static const struct timeval timeo_sec = { .tv_sec = 10 }; static const size_t timeo_optlen = sizeof(timeo_sec); +static int expected_stg = 0xeB9F; static int stop, duration; static int settimeo(int fd) @@ -88,7 +89,7 @@ done: return NULL; } -static void do_test(const char *tcp_ca) +static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) { struct sockaddr_in6 sa6 = {}; ssize_t nr_recv = 0, bytes = 0; @@ -126,14 +127,34 @@ static void do_test(const char *tcp_ca) err = listen(lfd, 1); if (CHECK(err == -1, "listen", "errno:%d\n", errno)) goto done; - err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (CHECK(err != 0, "pthread_create", "err:%d\n", err)) - goto done; + + if (sk_stg_map) { + err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, + &expected_stg, BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem(sk_stg_map)", + "err:%d errno:%d\n", err, errno)) + goto done; + } /* connect to server */ err = connect(fd, (struct sockaddr *)&sa6, addrlen); if (CHECK(err == -1, "connect", "errno:%d\n", errno)) - goto wait_thread; + goto done; + + if (sk_stg_map) { + int tmp_stg; + + err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, + &tmp_stg); + if (CHECK(!err || errno != ENOENT, + "bpf_map_lookup_elem(sk_stg_map)", + "err:%d errno:%d\n", err, errno)) + goto done; + } + + err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); + if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno)) + goto done; /* recv total_bytes */ while (bytes < total_bytes && !READ_ONCE(stop)) { @@ -149,7 +170,6 @@ static void do_test(const char *tcp_ca) CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", bytes, total_bytes, nr_recv, errno); -wait_thread: WRITE_ONCE(stop, 1); pthread_join(srv_thread, &thread_ret); CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", @@ -175,7 +195,7 @@ static void test_cubic(void) return; } - do_test("bpf_cubic"); + do_test("bpf_cubic", NULL); bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); @@ -197,7 +217,10 @@ static void test_dctcp(void) return; } - do_test("bpf_dctcp"); + do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); + CHECK(dctcp_skel->bss->stg_result != expected_stg, + "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n", + dctcp_skel->bss->stg_result, expected_stg); bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 127ea762a062..3fb4260570b1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -6,6 +6,7 @@ * the kernel BPF logic. */ +#include #include #include #include @@ -14,6 +15,15 @@ char _license[] SEC("license") = "GPL"; +int stg_result = 0; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + #define DCTCP_MAX_ALPHA 1024U struct dctcp { @@ -43,12 +53,18 @@ void BPF_PROG(dctcp_init, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); + int *stg; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->loss_cwnd = 0; ca->ce_state = 0; + stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0); + if (stg) { + stg_result = *stg; + bpf_sk_storage_delete(&sk_stg_map, (void *)tp); + } dctcp_reset(tp, ca); } -- cgit v1.2.3 From 24a6034acc922f1f6292636be4ec4dc3d9b4d2d7 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 21 Mar 2020 19:04:23 +0900 Subject: samples, bpf: Move read_trace_pipe to trace_helpers To reduce the reliance of trace samples (trace*_user) on bpf_load, move read_trace_pipe to trace_helpers. By moving this bpf_loader helper elsewhere, trace functions can be easily migrated to libbbpf. Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200321100424.1593964-2-danieltimlee@gmail.com --- samples/bpf/Makefile | 4 ++-- samples/bpf/bpf_load.c | 20 -------------------- samples/bpf/bpf_load.h | 1 - samples/bpf/tracex1_user.c | 1 + samples/bpf/tracex5_user.c | 1 + tools/testing/selftests/bpf/trace_helpers.c | 23 +++++++++++++++++++++++ tools/testing/selftests/bpf/trace_helpers.h | 1 + 7 files changed, 28 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 79b0fee6943b..ff0061467dd3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -64,11 +64,11 @@ fds_example-objs := fds_example.o sockex1-objs := sockex1_user.o sockex2-objs := sockex2_user.o sockex3-objs := bpf_load.o sockex3_user.o -tracex1-objs := bpf_load.o tracex1_user.o +tracex1-objs := bpf_load.o tracex1_user.o $(TRACE_HELPERS) tracex2-objs := bpf_load.o tracex2_user.o tracex3-objs := bpf_load.o tracex3_user.o tracex4-objs := bpf_load.o tracex4_user.o -tracex5-objs := bpf_load.o tracex5_user.o +tracex5-objs := bpf_load.o tracex5_user.o $(TRACE_HELPERS) tracex6-objs := bpf_load.o tracex6_user.o tracex7-objs := bpf_load.o tracex7_user.o test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 4574b1939e49..c5ad528f046e 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -665,23 +665,3 @@ int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) { return do_load_bpf_file(path, fixup_map); } - -void read_trace_pipe(void) -{ - int trace_fd; - - trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); - if (trace_fd < 0) - return; - - while (1) { - static char buf[4096]; - ssize_t sz; - - sz = read(trace_fd, buf, sizeof(buf) - 1); - if (sz > 0) { - buf[sz] = 0; - puts(buf); - } - } -} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 814894a12974..4fcd258c616f 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -53,6 +53,5 @@ extern int map_data_count; int load_bpf_file(char *path); int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); -void read_trace_pipe(void); int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c index af8c20608ab5..55fddbd08702 100644 --- a/samples/bpf/tracex1_user.c +++ b/samples/bpf/tracex1_user.c @@ -4,6 +4,7 @@ #include #include #include "bpf_load.h" +#include "trace_helpers.h" int main(int ac, char **argv) { diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index c4ab91c89494..c2317b39e0d2 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -8,6 +8,7 @@ #include #include "bpf_load.h" #include +#include "trace_helpers.h" /* install fake seccomp program to enable seccomp code path inside the kernel, * so that our kprobe attached to seccomp_phase1() can be triggered diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 7f989b3e4e22..4d0e913bbb22 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -4,12 +4,15 @@ #include #include #include +#include #include #include #include #include #include "trace_helpers.h" +#define DEBUGFS "/sys/kernel/debug/tracing/" + #define MAX_SYMS 300000 static struct ksym syms[MAX_SYMS]; static int sym_cnt; @@ -86,3 +89,23 @@ long ksym_get_addr(const char *name) return 0; } + +void read_trace_pipe(void) +{ + int trace_fd; + + trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) + return; + + while (1) { + static char buf[4096]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf) - 1); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + } + } +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 0383c9b8adc1..25ef597dd03f 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -12,5 +12,6 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); +void read_trace_pipe(void); #endif -- cgit v1.2.3 From 0476e69f39377192d638c459d11400c6e9a6ffb0 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 23 Mar 2020 12:04:59 -0700 Subject: kunit: add --make_options The kunit.py utility builds an ARCH=um kernel and then runs it. Add optional --make_options flag to kunit.py allowing for the operator to specify extra build options. This allows use of the clang compiler for kunit: tools/testing/kunit/kunit.py run --defconfig \ --make_options CC=clang --make_options HOSTCC=clang Signed-off-by: Greg Thelen Reviewed-by: Brendan Higgins Reviewed-by: Nathan Chancellor Tested-by: David Gow Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 14 ++++++++++---- tools/testing/kunit/kunit_kernel.py | 24 ++++++++++++++---------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 650bb4cfc544..7dca74774dd2 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -24,7 +24,7 @@ KunitResult = namedtuple('KunitResult', ['status','result']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig', - 'alltests']) + 'alltests', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -49,7 +49,7 @@ def get_kernel_root_path(): def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: config_start = time.time() - success = linux.build_reconfig(request.build_dir) + success = linux.build_reconfig(request.build_dir, request.make_options) config_end = time.time() if not success: return KunitResult(KunitStatus.CONFIG_FAILURE, 'could not configure kernel') @@ -59,7 +59,8 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, build_start = time.time() success = linux.build_um_kernel(request.alltests, request.jobs, - request.build_dir) + request.build_dir, + request.make_options) build_end = time.time() if not success: return KunitResult(KunitStatus.BUILD_FAILURE, 'could not build kernel') @@ -125,6 +126,10 @@ def main(argv, linux=None): help='Run all KUnit tests through allyesconfig', action='store_true') + run_parser.add_argument('--make_options', + help='X=Y make option, can be repeated.', + action='append') + cli_args = parser.parse_args(argv) if cli_args.subcommand == 'run': @@ -149,7 +154,8 @@ def main(argv, linux=None): cli_args.jobs, cli_args.build_dir, cli_args.defconfig, - cli_args.alltests) + cli_args.alltests, + cli_args.make_options) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 415d8f3c4fe4..63dbda2d029f 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -40,8 +40,10 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output) - def make_olddefconfig(self, build_dir): + def make_olddefconfig(self, build_dir, make_options): command = ['make', 'ARCH=um', 'olddefconfig'] + if make_options: + command.extend(make_options) if build_dir: command += ['O=' + build_dir] try: @@ -68,8 +70,10 @@ class LinuxSourceTreeOperations(object): kunit_parser.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') - def make(self, jobs, build_dir): + def make(self, jobs, build_dir, make_options): command = ['make', 'ARCH=um', '--jobs=' + str(jobs)] + if make_options: + command.extend(make_options) if build_dir: command += ['O=' + build_dir] try: @@ -128,19 +132,19 @@ class LinuxSourceTree(object): return False return True - def build_config(self, build_dir): + def build_config(self, build_dir, make_options): kconfig_path = get_kconfig_path(build_dir) if build_dir and not os.path.exists(build_dir): os.mkdir(build_dir) self._kconfig.write_to_file(kconfig_path) try: - self._ops.make_olddefconfig(build_dir) + self._ops.make_olddefconfig(build_dir, make_options) except ConfigError as e: logging.error(e) return False return self.validate_config(build_dir) - def build_reconfig(self, build_dir): + def build_reconfig(self, build_dir, make_options): """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) if os.path.exists(kconfig_path): @@ -149,19 +153,19 @@ class LinuxSourceTree(object): if not self._kconfig.is_subset_of(existing_kconfig): print('Regenerating .config ...') os.remove(kconfig_path) - return self.build_config(build_dir) + return self.build_config(build_dir, make_options) else: return True else: print('Generating .config ...') - return self.build_config(build_dir) + return self.build_config(build_dir, make_options) - def build_um_kernel(self, alltests, jobs, build_dir): + def build_um_kernel(self, alltests, jobs, build_dir, make_options): if alltests: self._ops.make_allyesconfig() try: - self._ops.make_olddefconfig(build_dir) - self._ops.make(jobs, build_dir) + self._ops.make_olddefconfig(build_dir, make_options) + self._ops.make(jobs, build_dir, make_options) except (ConfigError, BuildError) as e: logging.error(e) return False -- cgit v1.2.3 From 81573b18f26defe672a7d960f9af9ac2c97f324d Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Mon, 23 Mar 2020 16:24:04 +0200 Subject: selftests/net/forwarding: add Makefile to install tests Add missing Makefile for net/forwarding tests and include it to the targets list, otherwise forwarding tests are not installed in case of cross-compilation. Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/forwarding/Makefile | 75 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tools/testing/selftests/net/forwarding/Makefile (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6ec503912bea..b93fa645ee54 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -33,6 +33,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter TARGETS += networking/timestamping diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile new file mode 100644 index 000000000000..44616103508b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = bridge_igmp.sh \ + bridge_port_isolation.sh \ + bridge_sticky_fdb.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_unaware.sh \ + devlink_lib.sh \ + ethtool_lib.sh \ + ethtool.sh \ + fib_offload_lib.sh \ + forwarding.config.sample \ + gre_inner_v4_multipath.sh \ + gre_inner_v6_multipath.sh \ + gre_multipath.sh \ + ip6gre_inner_v4_multipath.sh \ + ip6gre_inner_v6_multipath.sh \ + ipip_flat_gre_key.sh \ + ipip_flat_gre_keys.sh \ + ipip_flat_gre.sh \ + ipip_hier_gre_key.sh \ + ipip_hier_gre_keys.sh \ + ipip_hier_gre.sh \ + ipip_lib.sh \ + lib.sh \ + loopback.sh \ + mirror_gre_bound.sh \ + mirror_gre_bridge_1d.sh \ + mirror_gre_bridge_1d_vlan.sh \ + mirror_gre_bridge_1q_lag.sh \ + mirror_gre_bridge_1q.sh \ + mirror_gre_changes.sh \ + mirror_gre_flower.sh \ + mirror_gre_lag_lacp.sh \ + mirror_gre_lib.sh \ + mirror_gre_neigh.sh \ + mirror_gre_nh.sh \ + mirror_gre.sh \ + mirror_gre_topo_lib.sh \ + mirror_gre_vlan_bridge_1q.sh \ + mirror_gre_vlan.sh \ + mirror_lib.sh \ + mirror_topo_lib.sh \ + mirror_vlan.sh \ + router_bridge.sh \ + router_bridge_vlan.sh \ + router_broadcast.sh \ + router_mpath_nh.sh \ + router_multicast.sh \ + router_multipath.sh \ + router.sh \ + router_vid_1.sh \ + sch_ets_core.sh \ + sch_ets.sh \ + sch_ets_tests.sh \ + sch_tbf_core.sh \ + sch_tbf_etsprio.sh \ + sch_tbf_ets.sh \ + sch_tbf_prio.sh \ + sch_tbf_root.sh \ + tc_actions.sh \ + tc_chains.sh \ + tc_common.sh \ + tc_flower_router.sh \ + tc_flower.sh \ + tc_shblocks.sh \ + tc_vlan_modify.sh \ + vxlan_asymmetric.sh \ + vxlan_bridge_1d_port_8472.sh \ + vxlan_bridge_1d.sh \ + vxlan_bridge_1q_port_8472.sh \ + vxlan_bridge_1q.sh \ + vxlan_symmetric.sh + +include ../../lib.mk -- cgit v1.2.3 From 7b0a0dcb64705d1dbed46d33c9810251667469b9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 Feb 2020 12:39:38 +0800 Subject: perf report: Support interactive annotation of code without symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For perf report on stripped binaries it is currently impossible to do annotation. The annotation state is all tied to symbols, but there are either no symbols, or symbols are not covering all the code. We should support the annotation functionality even without symbols. This patch fakes a symbol and the symbol name is the string of address. After that, we just follow current annotation working flow. For example, 1. perf report Overhead Command Shared Object Symbol 20.67% div libc-2.27.so [.] __random_r 17.29% div libc-2.27.so [.] __random 10.59% div div [.] 0x0000000000000628 9.25% div div [.] 0x0000000000000612 6.11% div div [.] 0x0000000000000645 2. Select the line of "10.59% div div [.] 0x0000000000000628" and ENTER. Annotate 0x0000000000000628 Zoom into div thread Zoom into div DSO (use the 'k' hotkey to zoom directly into the kernel) Browse map details Run scripts for samples of symbol [0x0000000000000628] Run scripts for all samples Switch to another data file in PWD Exit 3. Select the "Annotate 0x0000000000000628" and ENTER. Percent│ │ │ │ Disassembly of section .text: │ │ 0000000000000628 <.text+0x68>: │ divsd %xmm4,%xmm0 │ divsd %xmm3,%xmm1 │ movsd (%rsp),%xmm2 │ addsd %xmm1,%xmm0 │ addsd %xmm2,%xmm0 │ movsd %xmm0,(%rsp) Now we can see the dump of object starting from 0x628. v5: --- Remove the hotkey 'a' implementation from this patch. It will be moved to a separate patch. v4: --- 1. Support the hotkey 'a'. When we press 'a' on address, now it supports the annotation. 2. Change the patch title from "Support interactive annotation of code without symbols" to "perf report: Support interactive annotation of code without symbols" v3: --- Keep just the ANNOTATION_DUMMY_LEN, and remove the opts->annotate_dummy_len since it's the "maybe in future we will provide" feature. v2: --- Fix a crash issue when annotating an address in "unknown" object. The steps to reproduce this issue: perf record -e cycles:u ls perf report 75.29% ls ld-2.27.so [.] do_lookup_x 23.64% ls ld-2.27.so [.] __GI___tunables_init 1.04% ls [unknown] [k] 0xffffffff85c01210 0.03% ls ld-2.27.so [.] _start When annotating 0xffffffff85c01210, the crash happens. v2 adds checking for ms->map in add_annotate_opt(). If the object is "unknown", ms->map is NULL. Committer notes: Renamed new_annotate_sym() to symbol__new_unresolved(). Use PRIx64 to fix this issue in some 32-bit arches: ui/browsers/hists.c: In function 'symbol__new_unresolved': ui/browsers/hists.c:2474:38: error: format '%lx' expects argument of type 'long unsigned int', but argument 5 has type 'u64' {aka 'long long unsigned int'} [-Werror=format=] snprintf(name, sizeof(name), "%-#.*lx", BITS_PER_LONG / 4, addr); ~~~~~~^ ~~~~ %-#.*llx Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200227043939.4403-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 43 ++++++++++++++++++++++++++++++++++++------ tools/perf/util/annotate.h | 1 + 2 files changed, 38 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f36dee499320..d0f9745856fd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2465,13 +2465,41 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) return 0; } +static struct symbol *symbol__new_unresolved(u64 addr, struct map *map) +{ + struct annotated_source *src; + struct symbol *sym; + char name[64]; + + snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr); + + sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name); + if (sym) { + src = symbol__hists(sym, 1); + if (!src) { + symbol__delete(sym); + return NULL; + } + + dso__insert_symbol(map->dso, sym); + } + + return sym; +} + static int add_annotate_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, - struct map_symbol *ms) + struct map_symbol *ms, + u64 addr) { - if (ms->sym == NULL || ms->map->dso->annotate_warned || - symbol__annotation(ms->sym)->src == NULL) + if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned) + return 0; + + if (!ms->sym) + ms->sym = symbol__new_unresolved(addr, ms->map); + + if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL) return 0; if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0) @@ -3219,17 +3247,20 @@ do_hotkey: // key came straight from options ui__popup_menu() nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - &bi->from.ms); + &bi->from.ms, + bi->from.al_addr); if (bi->to.ms.sym != bi->from.ms.sym) nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - &bi->to.ms); + &bi->to.ms, + bi->to.al_addr); } else { nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection); + browser->selection, + browser->he_selection->ip); } skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 07c775938d46..2d88069d6428 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -74,6 +74,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 #define ANNOTATION__AVG_IPC_WIDTH 36 +#define ANNOTATION_DUMMY_LEN 256 struct annotation_options { bool hide_src_code, -- cgit v1.2.3 From ec0479a63b7657e57665a9fc81d11a99131cfc62 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 Feb 2020 12:39:39 +0800 Subject: perf report/top TUI: Support hotkey 'a' for annotation of unresolved addresses In previous patch, we have supported the annotation functionality even without symbols. For this patch, it supports the hotkey 'a' on address in report view. Note that, for branch mode, we only support the annotation for "branch to" address. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200227043939.4403-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 46 ++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d0f9745856fd..1103a019d83f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3061,21 +3061,45 @@ do_hotkey: // key came straight from options ui__popup_menu() continue; } - if (browser->selection == NULL || - browser->selection->sym == NULL || - browser->selection->map->dso->annotate_warned) + if (!browser->selection || + !browser->selection->map || + !browser->selection->map->dso || + browser->selection->map->dso->annotate_warned) { continue; + } - if (symbol__annotation(browser->selection->sym)->src == NULL) { - ui_browser__warning(&browser->b, delay_secs * 2, - "No samples for the \"%s\" symbol.\n\n" - "Probably appeared just in a callchain", - browser->selection->sym->name); - continue; + if (!browser->selection->sym) { + if (!browser->he_selection) + continue; + + if (sort__mode == SORT_MODE__BRANCH) { + bi = browser->he_selection->branch_info; + if (!bi || !bi->to.ms.map) + continue; + + actions->ms.sym = symbol__new_unresolved(bi->to.al_addr, bi->to.ms.map); + actions->ms.map = bi->to.ms.map; + } else { + actions->ms.sym = symbol__new_unresolved(browser->he_selection->ip, + browser->selection->map); + actions->ms.map = browser->selection->map; + } + + if (!actions->ms.sym) + continue; + } else { + if (symbol__annotation(browser->selection->sym)->src == NULL) { + ui_browser__warning(&browser->b, delay_secs * 2, + "No samples for the \"%s\" symbol.\n\n" + "Probably appeared just in a callchain", + browser->selection->sym->name); + continue; + } + + actions->ms.map = browser->selection->map; + actions->ms.sym = browser->selection->sym; } - actions->ms.map = browser->selection->map; - actions->ms.sym = browser->selection->sym; do_annotate(browser, actions); continue; case 'P': -- cgit v1.2.3 From 429a5f9d89fc52256b2a59cd1277afbfafb739b3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 20 Feb 2020 09:36:14 +0800 Subject: perf report: Allow specifying event to be used as sort key in --group output When performing "perf report --group", it shows the event group information together. By default, the output is sorted by the first event in group. It would be nice for user to select any event for sorting. This patch introduces a new option "--group-sort-idx" to sort the output by the event at the index n in event group. For example, Before: # perf report --group --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, BR_MISP_RETIRED.ALL_BRANCHES:pp, cpu/event=0xc0,umask=1,cmask=1, # Event count (approx.): 6451235635 # # Overhead Command Shared Object Symbol # ................................ ......... ....................... ................................... # 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 1.56% 0.01% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494ce 1.56% 0.00% 0.00% 0.00% mgen [kernel.kallsyms] [k] task_tick_fair 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 0.00% 0.03% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] g_main_context_check 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] apic_timer_interrupt ... After: # perf report --group --stdio --group-sort-idx 3 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, BR_MISP_RETIRED.ALL_BRANCHES:pp, cpu/event=0xc0,umask=1,cmask=1, # Event count (approx.): 6451235635 # # Overhead Command Shared Object Symbol # ................................ ......... ....................... ................................... # 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 0.00% 0.00% 0.00% 0.06% swapper [kernel.kallsyms] [k] hrtimer_start_range_ns 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] update_curr 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] native_apic_msr_eoi_write 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] __update_load_avg_se 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] scheduler_tick Now the output is sorted by the fourth event in group. v7: --- Rebase to latest perf/core, no other change. v4: --- 1. Update Documentation/perf-report.txt to mention '--group-sort-idx' support multiple groups with different amount of events and it should be used on grouped events. 2. Update __hpp__group_sort_idx(), just return when the idx is out of limit. 3. Return failure on symbol_conf.group_sort_idx && !session->evlist->nr_groups. So now we don't need to use together with --group. v3: --- Refine the code in __hpp__group_sort_idx(). Before: for (i = 1; i < nr_members; i++) { if (i == idx) { ret = field_cmp(fields_a[i], fields_b[i]); if (ret) goto out; } } After: if (idx >= 1 && idx < nr_members) { ret = field_cmp(fields_a[idx], fields_b[idx]); if (ret) goto out; } Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200220013616.19916-2-yao.jin@linux.intel.com [ Renamed pair_fields_alloc() to hist_entry__new_pair() and combined decl + assignment of vars ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 5 ++ tools/perf/builtin-report.c | 10 ++++ tools/perf/ui/hist.c | 93 ++++++++++++++++++++++++++------ tools/perf/util/symbol_conf.h | 1 + 4 files changed, 94 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index bd0a029d4c08..e56e3f1344a7 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -377,6 +377,11 @@ OPTIONS Show event group information together. It forces group output also if there are no groups defined in data file. +--group-sort-idx:: + Sort the output by the event at the index n in group. If n is invalid, + sort by the first event. It can support multiple groups with different + amount of events. WARNING: This should be used on grouped events. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5f4045df76f4..fa21c5ae3a51 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1227,6 +1227,10 @@ int cmd_report(int argc, const char **argv) "Show a column with the sum of periods"), OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, "Show event group information together"), + OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx, + "Sort the output by the event at the index n in group. " + "If n is invalid, sort by the first event. " + "WARNING: should be used on grouped events."), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for per branch histogram filling", parse_branch_mode), @@ -1369,6 +1373,12 @@ repeat: setup_forced_leader(&report, session->evlist); + if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) { + parse_options_usage(NULL, options, "group-sort-idx", 0); + ret = -EINVAL; + goto error; + } + if (itrace_synth_opts.last_branch) has_br_stack = true; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f73675500061..025f4c7f96bf 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -151,15 +151,90 @@ static int field_cmp(u64 field_a, u64 field_b) return 0; } +static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field, int nr_members, + u64 **fields_a, u64 **fields_b) +{ + u64 *fa = calloc(nr_members, sizeof(*fa)), + *fb = calloc(nr_members, sizeof(*fb)); + struct hist_entry *pair; + + if (!fa || !fb) + goto out_free; + + list_for_each_entry(pair, &a->pairs.head, pairs.node) { + struct evsel *evsel = hists_to_evsel(pair->hists); + fa[perf_evsel__group_idx(evsel)] = get_field(pair); + } + + list_for_each_entry(pair, &b->pairs.head, pairs.node) { + struct evsel *evsel = hists_to_evsel(pair->hists); + fb[perf_evsel__group_idx(evsel)] = get_field(pair); + } + + *fields_a = fa; + *fields_b = fb; + return 0; +out_free: + free(fa); + free(fb); + *fields_a = *fields_b = NULL; + return -1; +} + +static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field, int idx) +{ + struct evsel *evsel = hists_to_evsel(a->hists); + u64 *fields_a, *fields_b; + int cmp, nr_members, ret, i; + + cmp = field_cmp(get_field(a), get_field(b)); + if (!perf_evsel__is_group_event(evsel)) + return cmp; + + nr_members = evsel->core.nr_members; + if (idx < 1 || idx >= nr_members) + return cmp; + + ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); + if (ret) { + ret = cmp; + goto out; + } + + ret = field_cmp(fields_a[idx], fields_b[idx]); + if (ret) + goto out; + + for (i = 1; i < nr_members; i++) { + if (i != idx) { + ret = field_cmp(fields_a[i], fields_b[i]); + if (ret) + goto out; + } + } + +out: + free(fields_a); + free(fields_b); + + return ret; +} + static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, hpp_field_fn get_field) { s64 ret; int i, nr_members; struct evsel *evsel; - struct hist_entry *pair; u64 *fields_a, *fields_b; + if (symbol_conf.group_sort_idx && symbol_conf.event_group) { + return __hpp__group_sort_idx(a, b, get_field, + symbol_conf.group_sort_idx); + } + ret = field_cmp(get_field(a), get_field(b)); if (ret || !symbol_conf.event_group) return ret; @@ -169,22 +244,10 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; nr_members = evsel->core.nr_members; - fields_a = calloc(nr_members, sizeof(*fields_a)); - fields_b = calloc(nr_members, sizeof(*fields_b)); - - if (!fields_a || !fields_b) + i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); + if (i) goto out; - list_for_each_entry(pair, &a->pairs.head, pairs.node) { - evsel = hists_to_evsel(pair->hists); - fields_a[perf_evsel__group_idx(evsel)] = get_field(pair); - } - - list_for_each_entry(pair, &b->pairs.head, pairs.node) { - evsel = hists_to_evsel(pair->hists); - fields_b[perf_evsel__group_idx(evsel)] = get_field(pair); - } - for (i = 1; i < nr_members; i++) { ret = field_cmp(fields_a[i], fields_b[i]); if (ret) diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 10f1ec3e0349..b916afb95ec5 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -73,6 +73,7 @@ struct symbol_conf { const char *symfs; int res_sample; int pad_output_len_dso; + int group_sort_idx; }; extern struct symbol_conf symbol_conf; -- cgit v1.2.3 From 5e3b810aac49e960c80529e2c9aa8c101c5848ce Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 20 Feb 2020 09:36:15 +0800 Subject: perf report: Support a new key to reload the browser Sometimes we may need to reload the browser to update the output since some options are changed. This patch creates a new key K_RELOAD. Once the __cmd_report() returns K_RELOAD, it would repeat the whole process, such as, read samples from data file, sort the data and display in the browser. v5: --- 1. Fix the 'make NO_SLANG=1' error. Define K_RELOAD in util/hist.h. 2. Skip setup_sorting() in repeat path if last key is K_RELOAD. v4: --- Need to quit in perf_evsel_menu__run if key is K_RELOAD. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200220013616.19916-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 +++--- tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/keysyms.h | 1 + tools/perf/util/hist.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa21c5ae3a51..ea673b7eb3f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -635,7 +635,7 @@ static int report__browse_hists(struct report *rep) * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. */ - if (ret != K_SWITCH_INPUT_DATA) + if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD) ret = 0; break; case 2: @@ -1480,7 +1480,7 @@ repeat: sort_order = sort_tmp; } - if ((last_key != K_SWITCH_INPUT_DATA) && + if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) && (setup_sorting(session->evlist) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); @@ -1559,7 +1559,7 @@ repeat: sort__setup_elide(stdout); ret = __cmd_report(&report); - if (ret == K_SWITCH_INPUT_DATA) { + if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) { perf_session__delete(session); last_key = K_SWITCH_INPUT_DATA; goto repeat; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1103a019d83f..9f3401fd2cf6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3495,6 +3495,7 @@ browse_hists: pos = perf_evsel__prev(pos); goto browse_hists; case K_SWITCH_INPUT_DATA: + case K_RELOAD: case 'q': case CTRL('c'): goto out; diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h index fbfac29077f2..04cc4e5c031f 100644 --- a/tools/perf/ui/keysyms.h +++ b/tools/perf/ui/keysyms.h @@ -25,5 +25,6 @@ #define K_ERROR -2 #define K_RESIZE -3 #define K_SWITCH_INPUT_DATA -4 +#define K_RELOAD -5 #endif /* _PERF_KEYSYMS_H_ */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0aa63aeb58ec..bb994e030495 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -536,6 +536,7 @@ static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 +#define K_RELOAD -4000 #endif unsigned int hists__sort_list_width(struct hists *hists); -- cgit v1.2.3 From dbddf17474411f5725fc76fc7e507410f0d4077f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 20 Feb 2020 09:36:16 +0800 Subject: perf report/top TUI: Support hotkeys to let user select any event for sorting When performing "perf report --group", it shows the event group information together. In previous patch, we have supported a new option "--group-sort-idx" to sort the output by the event at the index n in event group. It would be nice if we can use a hotkey in browser to select a event to sort. For example, # perf report --group Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, ... Overhead Command Shared Object Symbol 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 1.56% 0.01% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494ce 1.56% 0.00% 0.00% 0.00% mgen [kernel.kallsyms] [k] task_tick_fair 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 0.00% 0.03% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] g_main_context_check 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] check_preempt_curr When user press hotkey '3' (event index, starting from 0), it indicates to sort output by the forth event in group. Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, ... Overhead Command Shared Object Symbol 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 0.00% 0.00% 0.00% 0.06% swapper [kernel.kallsyms] [k] hrtimer_start_range_ns 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] update_curr 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] native_apic_msr_eoi_write 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] __update_load_avg_se v6: --- Jiri provided a good improvement to eliminate unneeded refresh. This improvement is added to v6. v2: --- 1. Report warning at helpline when index is invalid. 2. Report warning at helpline when it's not group event. 3. Use "case '0' ... '9'" to refine the code 4. Split K_RELOAD implementation to another patch. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200220013616.19916-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 9f3401fd2cf6..95ac5e2ea949 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2992,7 +2992,8 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, "s Switch to another data file in PWD\n" "t Zoom into current Thread\n" "V Verbose (DSO names in callchains, etc)\n" - "/ Filter symbol by name"; + "/ Filter symbol by name\n" + "0-9 Sort by event n in group"; static const char top_help[] = HIST_BROWSER_HELP_COMMON "P Print histograms to perf.hist.N\n" "t Zoom into current Thread\n" @@ -3053,6 +3054,31 @@ do_hotkey: // key came straight from options ui__popup_menu() * go to the next or previous */ goto out_free_stack; + case '0' ... '9': + if (!symbol_conf.event_group || + evsel->core.nr_members < 2) { + snprintf(buf, sizeof(buf), + "Sort by index only available with group events!"); + helpline = buf; + continue; + } + + if (key - '0' == symbol_conf.group_sort_idx) + continue; + + symbol_conf.group_sort_idx = key - '0'; + + if (symbol_conf.group_sort_idx >= evsel->core.nr_members) { + snprintf(buf, sizeof(buf), + "Max event group index to sort is %d (index from 0 to %d)", + evsel->core.nr_members - 1, + evsel->core.nr_members - 1); + helpline = buf; + continue; + } + + key = K_RELOAD; + goto out_free_stack; case 'a': if (!hists__has(hists, sym)) { ui_browser__warning(&browser->b, delay_secs * 2, -- cgit v1.2.3 From d13e9e413e5b470c4364bbbce559383081201811 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Feb 2020 15:16:14 +0800 Subject: perf stat: Align the output for interval aggregation mode There is a slight misalignment in -A -I output. For example: # perf stat -e cpu/event=cpu-cycles/ -a -A -I 1000 # time CPU counts unit events 1.000440863 CPU0 1,068,388 cpu/event=cpu-cycles/ 1.000440863 CPU1 875,954 cpu/event=cpu-cycles/ 1.000440863 CPU2 3,072,538 cpu/event=cpu-cycles/ 1.000440863 CPU3 4,026,870 cpu/event=cpu-cycles/ 1.000440863 CPU4 5,919,630 cpu/event=cpu-cycles/ 1.000440863 CPU5 2,714,260 cpu/event=cpu-cycles/ 1.000440863 CPU6 2,219,240 cpu/event=cpu-cycles/ 1.000440863 CPU7 1,299,232 cpu/event=cpu-cycles/ The value of counts is not aligned with the column "counts" and the event name is not aligned with the column "events". With this patch, the output is, # perf stat -e cpu/event=cpu-cycles/ -a -A -I 1000 # time CPU counts unit events 1.000423009 CPU0 997,421 cpu/event=cpu-cycles/ 1.000423009 CPU1 1,422,042 cpu/event=cpu-cycles/ 1.000423009 CPU2 484,651 cpu/event=cpu-cycles/ 1.000423009 CPU3 525,791 cpu/event=cpu-cycles/ 1.000423009 CPU4 1,370,100 cpu/event=cpu-cycles/ 1.000423009 CPU5 442,072 cpu/event=cpu-cycles/ 1.000423009 CPU6 205,643 cpu/event=cpu-cycles/ 1.000423009 CPU7 1,302,250 cpu/event=cpu-cycles/ Now output is aligned. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200218071614.25736-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 76c6052b12e2..9e757d18d713 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -115,11 +115,11 @@ static void aggr_printout(struct perf_stat_config *config, fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), cpu_map__id_to_die(id), - config->csv_output ? 0 : -5, + config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id), config->csv_sep); } else { - fprintf(config->output, "CPU%*d%s ", - config->csv_output ? 0 : -5, + fprintf(config->output, "CPU%*d%s", + config->csv_output ? 0 : -7, evsel__cpus(evsel)->map[id], config->csv_sep); } -- cgit v1.2.3 From 58fc90fda0cc983c11c5290c7a9e992b08ac4a5c Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 21 Feb 2020 15:41:21 +0530 Subject: perf metricgroup: Fix printing event names of metric group with multiple events incase of overlapping events Commit f01642e4912b ("perf metricgroup: Support multiple events for metricgroup") introduced support for multiple events in a metric group. But with the current upstream, metric events names are not printed properly incase we try to run multiple metric groups with overlapping event. With current upstream version, incase of overlapping metric events issue is, we always start our comparision logic from start. So, the events which already matched with some metric group also take part in comparision logic. Because of that when we have overlapping events, we end up matching current metric group event with already matched one. For example, in skylake machine we have metric event CoreIPC and Instructions. Both of them need 'inst_retired.any' event value. As events in Instructions is subset of events in CoreIPC, they endup in pointing to same 'inst_retired.any' value. In skylake platform: command:# ./perf stat -M CoreIPC,Instructions -C 0 sleep 1 Performance counter stats for 'CPU(s) 0': 1,254,992,790 inst_retired.any # 1254992790.0 Instructions # 1.3 CoreIPC 977,172,805 cycles 1,254,992,756 inst_retired.any 1.000802596 seconds time elapsed command:# sudo ./perf stat -M UPI,IPC sleep 1 Performance counter stats for 'sleep 1': 948,650 uops_retired.retire_slots 866,182 inst_retired.any # 0.7 IPC 866,182 inst_retired.any 1,175,671 cpu_clk_unhalted.thread Patch fixes the issue by adding a new bool pointer 'evlist_used' to keep track of events which already matched with some group by setting it true. So, we skip all used events in list when we start comparision logic. Patch also make some changes in comparision logic, incase we get a match miss, we discard the whole match and start again with first event id in metric event. With this patch: In skylake platform: command:# ./perf stat -M CoreIPC,Instructions -C 0 sleep 1 Performance counter stats for 'CPU(s) 0': 3,348,415 inst_retired.any # 0.3 CoreIPC 11,779,026 cycles 3,348,381 inst_retired.any # 3348381.0 Instructions 1.001649056 seconds time elapsed command:# ./perf stat -M UPI,IPC sleep 1 Performance counter stats for 'sleep 1': 1,023,148 uops_retired.retire_slots # 1.1 UPI 924,976 inst_retired.any 924,976 inst_retired.any # 0.6 IPC 1,489,414 cpu_clk_unhalted.thread 1.003064672 seconds time elapsed Signed-off-by: Kajol Jain Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Anju T Sudhakar Cc: Jin Yao Cc: Kan Liang Cc: Madhavan Srinivasan Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200221101121.28920-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 49 ++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c3a8c701609a..926449a7cdbf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -95,13 +95,16 @@ struct egroup { static struct evsel *find_evsel_group(struct evlist *perf_evlist, const char **ids, int idnum, - struct evsel **metric_events) + struct evsel **metric_events, + bool *evlist_used) { struct evsel *ev; - int i = 0; + int i = 0, j = 0; bool leader_found; evlist__for_each_entry (perf_evlist, ev) { + if (evlist_used[j++]) + continue; if (!strcmp(ev->name, ids[i])) { if (!metric_events[i]) metric_events[i] = ev; @@ -109,22 +112,17 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, if (i == idnum) break; } else { - if (i + 1 == idnum) { - /* Discard the whole match and start again */ - i = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - continue; - } - - if (!strcmp(ev->name, ids[i])) - metric_events[i] = ev; - else { - /* Discard the whole match and start again */ - i = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - continue; + /* Discard the whole match and start again */ + i = 0; + memset(metric_events, 0, + sizeof(struct evsel *) * idnum); + + if (!strcmp(ev->name, ids[i])) { + if (!metric_events[i]) + metric_events[i] = ev; + i++; + if (i == idnum) + break; } } } @@ -146,7 +144,10 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, !strcmp(ev->name, metric_events[i]->name)) { ev->metric_leader = metric_events[i]; } + j++; } + ev = metric_events[i]; + evlist_used[ev->idx] = true; } return metric_events[0]; @@ -162,6 +163,13 @@ static int metricgroup__setup_events(struct list_head *groups, int ret = 0; struct egroup *eg; struct evsel *evsel; + bool *evlist_used; + + evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool)); + if (!evlist_used) { + ret = -ENOMEM; + return ret; + } list_for_each_entry (eg, groups, nd) { struct evsel **metric_events; @@ -172,7 +180,7 @@ static int metricgroup__setup_events(struct list_head *groups, break; } evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, - metric_events); + metric_events, evlist_used); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", eg->metric_name, eg->metric_expr); @@ -196,6 +204,9 @@ static int metricgroup__setup_events(struct list_head *groups, expr->metric_events = metric_events; list_add(&expr->nd, &me->head); } + + free(evlist_used); + return ret; } -- cgit v1.2.3 From c5f18e9e94bad244115dc5e47f27bd061ecc5552 Mon Sep 17 00:00:00 2001 From: Vijay Thakkar Date: Wed, 18 Mar 2020 15:00:00 -0400 Subject: perf vendor events amd: Restrict model detection for zen1 based processors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes the previous blanket detection of AMD Family 17h processors to be more specific to Zen1 core based products only by replacing model detection regex pattern [[:xdigit:]]+ with ([12][0-9A-F]|[0-9A-F]), restricting to models 0 though 2f only. This change is required to allow for the addition of separate PMU events for Zen2 core based models in the following patches as those belong to family 17h but have different PMCs. Current PMU events directory has also been renamed to "amdzen1" from "amdfam17h" to reflect this specificity. Note that although this change does not break PMU counters for existing zen1 based systems, it does disable the current set of counters for zen2 based systems. Counters for zen2 have been added in the following patches in this patchset. Signed-off-by: Vijay Thakkar Acked-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Jon Grimm Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200318190002.307290-2-vijaythakkar@me.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/amdfam17h/branch.json | 12 - .../perf/pmu-events/arch/x86/amdfam17h/cache.json | 329 --------------------- tools/perf/pmu-events/arch/x86/amdfam17h/core.json | 134 --------- .../arch/x86/amdfam17h/floating-point.json | 168 ----------- .../perf/pmu-events/arch/x86/amdfam17h/memory.json | 162 ---------- .../perf/pmu-events/arch/x86/amdfam17h/other.json | 65 ---- tools/perf/pmu-events/arch/x86/amdzen1/branch.json | 12 + tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 329 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen1/core.json | 134 +++++++++ .../arch/x86/amdzen1/floating-point.json | 168 +++++++++++ tools/perf/pmu-events/arch/x86/amdzen1/memory.json | 162 ++++++++++ tools/perf/pmu-events/arch/x86/amdzen1/other.json | 65 ++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 13 files changed, 871 insertions(+), 871 deletions(-) delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/branch.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/cache.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/core.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/memory.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/other.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/branch.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/core.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/other.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json deleted file mode 100644 index 93ddfd8053ca..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "EventName": "bp_l1_btb_correct", - "EventCode": "0x8a", - "BriefDescription": "L1 BTB Correction." - }, - { - "EventName": "bp_l2_btb_correct", - "EventCode": "0x8b", - "BriefDescription": "L2 BTB Correction." - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json deleted file mode 100644 index 6221a840fcea..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json +++ /dev/null @@ -1,329 +0,0 @@ -[ - { - "EventName": "ic_fw32", - "EventCode": "0x80", - "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." - }, - { - "EventName": "ic_fw32_miss", - "EventCode": "0x81", - "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." - }, - { - "EventName": "ic_cache_fill_l2", - "EventCode": "0x82", - "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." - }, - { - "EventName": "ic_cache_fill_sys", - "EventCode": "0x83", - "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." - }, - { - "EventName": "bp_l1_tlb_miss_l2_hit", - "EventCode": "0x84", - "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." - }, - { - "EventName": "bp_l1_tlb_miss_l2_miss", - "EventCode": "0x85", - "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." - }, - { - "EventName": "bp_snp_re_sync", - "EventCode": "0x86", - "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." - }, - { - "EventName": "ic_fetch_stall.ic_stall_any", - "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "UMask": "0x4" - }, - { - "EventName": "ic_fetch_stall.ic_stall_dq_empty", - "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "UMask": "0x2" - }, - { - "EventName": "ic_fetch_stall.ic_stall_back_pressure", - "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "UMask": "0x1" - }, - { - "EventName": "ic_cache_inval.l2_invalidating_probe", - "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", - "UMask": "0x2" - }, - { - "EventName": "ic_cache_inval.fill_invalidated", - "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to overwriting fill response.", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", - "UMask": "0x1" - }, - { - "EventName": "bp_tlb_rel", - "EventCode": "0x99", - "BriefDescription": "The number of ITLB reload requests." - }, - { - "EventName": "l2_request_g1.rd_blk_l", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x80" - }, - { - "EventName": "l2_request_g1.rd_blk_x", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x40" - }, - { - "EventName": "l2_request_g1.ls_rd_blk_c_s", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x20" - }, - { - "EventName": "l2_request_g1.cacheable_ic_read", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x10" - }, - { - "EventName": "l2_request_g1.change_to_x", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x8" - }, - { - "EventName": "l2_request_g1.prefetch_l2", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x4" - }, - { - "EventName": "l2_request_g1.l2_hw_pf", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x2" - }, - { - "EventName": "l2_request_g1.other_requests", - "EventCode": "0x60", - "BriefDescription": "Events covered by l2_request_g2.", - "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", - "UMask": "0x1" - }, - { - "EventName": "l2_request_g2.group1", - "EventCode": "0x61", - "BriefDescription": "All Group 1 commands not in unit0.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", - "UMask": "0x80" - }, - { - "EventName": "l2_request_g2.ls_rd_sized", - "EventCode": "0x61", - "BriefDescription": "RdSized, RdSized32, RdSized64.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", - "UMask": "0x40" - }, - { - "EventName": "l2_request_g2.ls_rd_sized_nc", - "EventCode": "0x61", - "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", - "UMask": "0x20" - }, - { - "EventName": "l2_request_g2.ic_rd_sized", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x10" - }, - { - "EventName": "l2_request_g2.ic_rd_sized_nc", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x8" - }, - { - "EventName": "l2_request_g2.smc_inval", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x4" - }, - { - "EventName": "l2_request_g2.bus_locks_originator", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x2" - }, - { - "EventName": "l2_request_g2.bus_locks_responses", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x1" - }, - { - "EventName": "l2_latency.l2_cycles_waiting_on_fills", - "EventCode": "0x62", - "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "UMask": "0x1" - }, - { - "EventName": "l2_wcb_req.wcb_write", - "EventCode": "0x63", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", - "BriefDescription": "LS to L2 WCB write requests.", - "UMask": "0x40" - }, - { - "EventName": "l2_wcb_req.wcb_close", - "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB close requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", - "UMask": "0x20" - }, - { - "EventName": "l2_wcb_req.zero_byte_store", - "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB zero byte store requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", - "UMask": "0x4" - }, - { - "EventName": "l2_wcb_req.cl_zero", - "EventCode": "0x63", - "PublicDescription": "LS to L2 WCB cache line zeroing requests.", - "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", - "UMask": "0x1" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_cs", - "EventCode": "0x64", - "BriefDescription": "LS ReadBlock C/S Hit.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", - "UMask": "0x80" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", - "EventCode": "0x64", - "BriefDescription": "LS Read Block L Hit X.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", - "UMask": "0x40" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", - "EventCode": "0x64", - "BriefDescription": "LsRdBlkL Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", - "UMask": "0x20" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_x", - "EventCode": "0x64", - "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", - "UMask": "0x10" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_c", - "EventCode": "0x64", - "BriefDescription": "LS Read Block C S L X Change to X Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", - "UMask": "0x8" - }, - { - "EventName": "l2_cache_req_stat.ic_fill_hit_x", - "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Exclusive Stale.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", - "UMask": "0x4" - }, - { - "EventName": "l2_cache_req_stat.ic_fill_hit_s", - "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", - "UMask": "0x2" - }, - { - "EventName": "l2_cache_req_stat.ic_fill_miss", - "EventCode": "0x64", - "BriefDescription": "IC Fill Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", - "UMask": "0x1" - }, - { - "EventName": "l2_fill_pending.l2_fill_busy", - "EventCode": "0x6d", - "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", - "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", - "UMask": "0x1" - }, - { - "EventName": "l3_request_g1.caching_l3_cache_accesses", - "EventCode": "0x01", - "BriefDescription": "Caching: L3 cache accesses", - "UMask": "0x80", - "Unit": "L3PMC" - }, - { - "EventName": "l3_lookup_state.all_l3_req_typs", - "EventCode": "0x04", - "BriefDescription": "All L3 Request Types", - "UMask": "0xff", - "Unit": "L3PMC" - }, - { - "EventName": "l3_comb_clstr_state.other_l3_miss_typs", - "EventCode": "0x06", - "BriefDescription": "Other L3 Miss Request Types", - "UMask": "0xfe", - "Unit": "L3PMC" - }, - { - "EventName": "l3_comb_clstr_state.request_miss", - "EventCode": "0x06", - "BriefDescription": "L3 cache misses", - "UMask": "0x01", - "Unit": "L3PMC" - }, - { - "EventName": "xi_sys_fill_latency", - "EventCode": "0x90", - "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", - "UMask": "0x00", - "Unit": "L3PMC" - }, - { - "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", - "EventCode": "0x9a", - "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", - "UMask": "0x3f", - "Unit": "L3PMC" - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json deleted file mode 100644 index 1079544eeed5..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json +++ /dev/null @@ -1,134 +0,0 @@ -[ - { - "EventName": "ex_ret_instr", - "EventCode": "0xc0", - "BriefDescription": "Retired Instructions." - }, - { - "EventName": "ex_ret_cops", - "EventCode": "0xc1", - "BriefDescription": "Retired Uops.", - "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4." - }, - { - "EventName": "ex_ret_brn", - "EventCode": "0xc2", - "BriefDescription": "Retired Branch Instructions.", - "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." - }, - { - "EventName": "ex_ret_brn_misp", - "EventCode": "0xc3", - "BriefDescription": "Retired Branch Instructions Mispredicted.", - "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." - }, - { - "EventName": "ex_ret_brn_tkn", - "EventCode": "0xc4", - "BriefDescription": "Retired Taken Branch Instructions.", - "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." - }, - { - "EventName": "ex_ret_brn_tkn_misp", - "EventCode": "0xc5", - "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", - "PublicDescription": "The number of retired taken branch instructions that were mispredicted." - }, - { - "EventName": "ex_ret_brn_far", - "EventCode": "0xc6", - "BriefDescription": "Retired Far Control Transfers.", - "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." - }, - { - "EventName": "ex_ret_brn_resync", - "EventCode": "0xc7", - "BriefDescription": "Retired Branch Resyncs.", - "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." - }, - { - "EventName": "ex_ret_near_ret", - "EventCode": "0xc8", - "BriefDescription": "Retired Near Returns.", - "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." - }, - { - "EventName": "ex_ret_near_ret_mispred", - "EventCode": "0xc9", - "BriefDescription": "Retired Near Returns Mispredicted.", - "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." - }, - { - "EventName": "ex_ret_brn_ind_misp", - "EventCode": "0xca", - "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", - "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." - }, - { - "EventName": "ex_ret_mmx_fp_instr.sse_instr", - "EventCode": "0xcb", - "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "UMask": "0x4" - }, - { - "EventName": "ex_ret_mmx_fp_instr.mmx_instr", - "EventCode": "0xcb", - "BriefDescription": "MMX instructions.", - "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", - "UMask": "0x2" - }, - { - "EventName": "ex_ret_mmx_fp_instr.x87_instr", - "EventCode": "0xcb", - "BriefDescription": "x87 instructions.", - "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", - "UMask": "0x1" - }, - { - "EventName": "ex_ret_cond", - "EventCode": "0xd1", - "BriefDescription": "Retired Conditional Branch Instructions." - }, - { - "EventName": "ex_ret_cond_misp", - "EventCode": "0xd2", - "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." - }, - { - "EventName": "ex_div_busy", - "EventCode": "0xd3", - "BriefDescription": "Div Cycles Busy count." - }, - { - "EventName": "ex_div_count", - "EventCode": "0xd4", - "BriefDescription": "Div Op Count." - }, - { - "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", - "EventCode": "0x1cf", - "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "UMask": "0x4" - }, - { - "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", - "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS that retired.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", - "UMask": "0x2" - }, - { - "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", - "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", - "UMask": "0x1" - }, - { - "EventName": "ex_ret_fus_brnch_inst", - "EventCode": "0x1d0", - "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3." - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json deleted file mode 100644 index ea4711983d1d..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json +++ /dev/null @@ -1,168 +0,0 @@ -[ - { - "EventName": "fpu_pipe_assignment.dual", - "EventCode": "0x00", - "BriefDescription": "Total number multi-pipe uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", - "UMask": "0xf0" - }, - { - "EventName": "fpu_pipe_assignment.total", - "EventCode": "0x00", - "BriefDescription": "Total number uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", - "UMask": "0xf" - }, - { - "EventName": "fp_sched_empty", - "EventCode": "0x01", - "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler." - }, - { - "EventName": "fp_retx87_fp_ops.all", - "EventCode": "0x02", - "BriefDescription": "All Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", - "UMask": "0x7" - }, - { - "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", - "EventCode": "0x02", - "BriefDescription": "Divide and square root Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", - "UMask": "0x4" - }, - { - "EventName": "fp_retx87_fp_ops.mul_ops", - "EventCode": "0x02", - "BriefDescription": "Multiply Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", - "UMask": "0x2" - }, - { - "EventName": "fp_retx87_fp_ops.add_sub_ops", - "EventCode": "0x02", - "BriefDescription": "Add/subtract Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", - "UMask": "0x1" - }, - { - "EventName": "fp_ret_sse_avx_ops.all", - "EventCode": "0x03", - "BriefDescription": "All FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0xff" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "UMask": "0x80" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_div_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision divide/square root FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.", - "UMask": "0x40" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_mult_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision multiply FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.", - "UMask": "0x20" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision add/subtract FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.", - "UMask": "0x10" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops", - "EventCode": "0x03", - "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "UMask": "0x8" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_div_flops", - "EventCode": "0x03", - "BriefDescription": "Single-precision divide/square root FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", - "UMask": "0x4" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", - "EventCode": "0x03", - "BriefDescription": "Single-precision multiply FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", - "UMask": "0x2" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", - "EventCode": "0x03", - "BriefDescription": "Single-precision add/subtract FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", - "UMask": "0x1" - }, - { - "EventName": "fp_num_mov_elim_scal_op.optimized", - "EventCode": "0x04", - "BriefDescription": "Number of Scalar Ops optimized.", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", - "UMask": "0x8" - }, - { - "EventName": "fp_num_mov_elim_scal_op.opt_potential", - "EventCode": "0x04", - "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", - "UMask": "0x4" - }, - { - "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", - "EventCode": "0x04", - "BriefDescription": "Number of SSE Move Ops eliminated.", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", - "UMask": "0x2" - }, - { - "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", - "EventCode": "0x04", - "BriefDescription": "Number of SSE Move Ops.", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", - "UMask": "0x1" - }, - { - "EventName": "fp_retired_ser_ops.x87_ctrl_ret", - "EventCode": "0x05", - "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", - "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", - "UMask": "0x8" - }, - { - "EventName": "fp_retired_ser_ops.x87_bot_ret", - "EventCode": "0x05", - "BriefDescription": "x87 bottom-executing uOps retired.", - "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", - "UMask": "0x4" - }, - { - "EventName": "fp_retired_ser_ops.sse_ctrl_ret", - "EventCode": "0x05", - "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "UMask": "0x2" - }, - { - "EventName": "fp_retired_ser_ops.sse_bot_ret", - "EventCode": "0x05", - "BriefDescription": "SSE bottom-executing uOps retired.", - "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", - "UMask": "0x1" - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json deleted file mode 100644 index fa2d60d4def0..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json +++ /dev/null @@ -1,162 +0,0 @@ -[ - { - "EventName": "ls_locks.bus_lock", - "EventCode": "0x25", - "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "UMask": "0x1" - }, - { - "EventName": "ls_dispatch.ld_st_dispatch", - "EventCode": "0x29", - "BriefDescription": "Load-op-Stores.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", - "UMask": "0x4" - }, - { - "EventName": "ls_dispatch.store_dispatch", - "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x2" - }, - { - "EventName": "ls_dispatch.ld_dispatch", - "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x1" - }, - { - "EventName": "ls_stlf", - "EventCode": "0x35", - "BriefDescription": "Number of STLF hits." - }, - { - "EventName": "ls_dc_accesses", - "EventCode": "0x40", - "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." - }, - { - "EventName": "ls_l1_d_tlb_miss.all", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", - "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", - "UMask": "0xff" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 1G size.", - "PublicDescription": "L1 DTLB Miss of a page of 1G size.", - "UMask": "0x80" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 2M size.", - "PublicDescription": "L1 DTLB Miss of a page of 2M size.", - "UMask": "0x40" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 32K size.", - "PublicDescription": "L1 DTLB Miss of a page of 32K size.", - "UMask": "0x20" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 4K size.", - "PublicDescription": "L1 DTLB Miss of a page of 4K size.", - "UMask": "0x10" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 1G size.", - "PublicDescription": "L1 DTLB Reload of a page of 1G size.", - "UMask": "0x8" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 2M size.", - "PublicDescription": "L1 DTLB Reload of a page of 2M size.", - "UMask": "0x4" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 32K size.", - "PublicDescription": "L1 DTLB Reload of a page of 32K size.", - "UMask": "0x2" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 4K size.", - "PublicDescription": "L1 DTLB Reload of a page of 4K size.", - "UMask": "0x1" - }, - { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", - "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", - "UMask": "0xc" - }, - { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", - "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", - "UMask": "0x3" - }, - { - "EventName": "ls_misal_accesses", - "EventCode": "0x47", - "BriefDescription": "Misaligned loads." - }, - { - "EventName": "ls_pref_instr_disp.prefetch_nta", - "EventCode": "0x4b", - "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "UMask": "0x4" - }, - { - "EventName": "ls_pref_instr_disp.store_prefetch_w", - "EventCode": "0x4b", - "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "UMask": "0x2" - }, - { - "EventName": "ls_pref_instr_disp.load_prefetch_w", - "EventCode": "0x4b", - "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", - "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", - "UMask": "0x1" - }, - { - "EventName": "ls_inef_sw_pref.mab_mch_cnt", - "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "UMask": "0x2" - }, - { - "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", - "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "UMask": "0x1" - }, - { - "EventName": "ls_not_halted_cyc", - "EventCode": "0x76", - "BriefDescription": "Cycles not in Halt." - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json deleted file mode 100644 index b26a00d05a2e..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json +++ /dev/null @@ -1,65 +0,0 @@ -[ - { - "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", - "EventCode": "0x28a", - "BriefDescription": "OC to IC mode switch.", - "PublicDescription": "OC Mode Switch. OC to IC mode switch.", - "UMask": "0x2" - }, - { - "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", - "EventCode": "0x28a", - "BriefDescription": "IC to OC mode switch.", - "PublicDescription": "OC Mode Switch. IC to OC mode switch.", - "UMask": "0x1" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", - "EventCode": "0xaf", - "BriefDescription": "RETIRE Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", - "UMask": "0x40" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", - "EventCode": "0xaf", - "BriefDescription": "AGSQ Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", - "UMask": "0x20" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALU tokens total unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", - "UMask": "0x10" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", - "EventCode": "0xaf", - "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", - "UMask": "0x8" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALSQ 3 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", - "UMask": "0x4" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALSQ 2 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", - "UMask": "0x2" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALSQ 1 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", - "UMask": "0x1" - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json new file mode 100644 index 000000000000..93ddfd8053ca --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json @@ -0,0 +1,12 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 BTB Correction." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 BTB Correction." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json new file mode 100644 index 000000000000..6221a840fcea --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -0,0 +1,329 @@ +[ + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_hit", + "EventCode": "0x84", + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x4" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x2" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x1" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", + "UMask": "0x2" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response.", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", + "UMask": "0x1" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + }, + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g1.prefetch_l2", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g1.other_requests", + "EventCode": "0x60", + "BriefDescription": "Events covered by l2_request_g2.", + "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", + "UMask": "0x1" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "All Group 1 commands not in unit0.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "RdSized, RdSized32, RdSized64.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x1" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x1" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "BriefDescription": "LS to L2 WCB write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x4" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "PublicDescription": "LS to L2 WCB cache line zeroing requests.", + "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x1" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "LS ReadBlock C/S Hit.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "LS Read Block L Hit X.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkL Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "LS Read Block C S L X Change to X Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", + "UMask": "0x8" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Exclusive Stale.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", + "UMask": "0x4" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", + "UMask": "0x2" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "IC Fill Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", + "UMask": "0x1" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x1" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "UMask": "0x00", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", + "EventCode": "0x9a", + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", + "UMask": "0x3f", + "Unit": "L3PMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json new file mode 100644 index 000000000000..1079544eeed5 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -0,0 +1,134 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_cops", + "EventCode": "0xc1", + "BriefDescription": "Retired Uops.", + "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "UMask": "0x4" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x2" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_ret_cond_misp", + "EventCode": "0xd2", + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x4" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS that retired.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x2" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_fus_brnch_inst", + "EventCode": "0x1d0", + "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json new file mode 100644 index 000000000000..ea4711983d1d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json @@ -0,0 +1,168 @@ +[ + { + "EventName": "fpu_pipe_assignment.dual", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", + "UMask": "0xf0" + }, + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", + "UMask": "0xf" + }, + { + "EventName": "fp_sched_empty", + "EventCode": "0x01", + "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler." + }, + { + "EventName": "fp_retx87_fp_ops.all", + "EventCode": "0x02", + "BriefDescription": "All Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", + "UMask": "0x7" + }, + { + "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", + "EventCode": "0x02", + "BriefDescription": "Divide and square root Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", + "UMask": "0x4" + }, + { + "EventName": "fp_retx87_fp_ops.mul_ops", + "EventCode": "0x02", + "BriefDescription": "Multiply Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", + "UMask": "0x2" + }, + { + "EventName": "fp_retx87_fp_ops.add_sub_ops", + "EventCode": "0x02", + "BriefDescription": "Add/subtract Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x80" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.", + "UMask": "0x40" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.", + "UMask": "0x20" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.", + "UMask": "0x10" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x8" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", + "UMask": "0x4" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", + "UMask": "0x2" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", + "UMask": "0x1" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", + "UMask": "0x8" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "UMask": "0x4" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", + "UMask": "0x2" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "UMask": "0x8" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", + "UMask": "0x4" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "UMask": "0x2" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json new file mode 100644 index 000000000000..fa2d60d4def0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json @@ -0,0 +1,162 @@ +[ + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "UMask": "0x1" + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Load-op-Stores.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", + "UMask": "0x4" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x2" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x1" + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", + "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 1G size.", + "PublicDescription": "L1 DTLB Miss of a page of 1G size.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 2M size.", + "PublicDescription": "L1 DTLB Miss of a page of 2M size.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 32K size.", + "PublicDescription": "L1 DTLB Miss of a page of 32K size.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 4K size.", + "PublicDescription": "L1 DTLB Miss of a page of 4K size.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 1G size.", + "PublicDescription": "L1 DTLB Reload of a page of 1G size.", + "UMask": "0x8" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 2M size.", + "PublicDescription": "L1 DTLB Reload of a page of 2M size.", + "UMask": "0x4" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 32K size.", + "PublicDescription": "L1 DTLB Reload of a page of 32K size.", + "UMask": "0x2" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 4K size.", + "PublicDescription": "L1 DTLB Reload of a page of 4K size.", + "UMask": "0x1" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0xc" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0x3" + }, + { + "EventName": "ls_misal_accesses", + "EventCode": "0x47", + "BriefDescription": "Misaligned loads." + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "UMask": "0x4" + }, + { + "EventName": "ls_pref_instr_disp.store_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "UMask": "0x2" + }, + { + "EventName": "ls_pref_instr_disp.load_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", + "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", + "UMask": "0x1" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x2" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x1" + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json new file mode 100644 index 000000000000..b26a00d05a2e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json @@ -0,0 +1,65 @@ +[ + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC to IC mode switch.", + "PublicDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x2" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "IC to OC mode switch.", + "PublicDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "RETIRE Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "AGSQ Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALU tokens total unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 3 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 2 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 1 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 745ced083844..82a9db00125e 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -36,4 +36,4 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core -AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core +AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v1,amdzen1,core -- cgit v1.2.3 From 2079f7aa0a49d3ae83a82f70785a28b07bb9b16b Mon Sep 17 00:00:00 2001 From: Vijay Thakkar Date: Wed, 18 Mar 2020 15:00:01 -0400 Subject: perf vendor events amd: Add Zen2 events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds PMU events for AMD Zen2 core based processors, namely, Matisse (model 71h), Castle Peak (model 31h) and Rome (model 2xh), as documented in the AMD Processor Programming Reference for Matisse [1]. The model number regex has been set to detect all the models under family 17 that do not match those of Zen1, as the range is larger for zen2. Zen2 adds some additional counters that are not present in Zen1 and events for them have been added in this patch. Some counters have also been removed for Zen2 thatwere previously present in Zen1 and have been confirmed to always sample zero on zen2. These added/removed counters have been omitted for brevity but can be found here: https://gist.github.com/thakkarV/5b12ca5fd7488eb2c42e451e40bdd5f3 Note that PPR for Zen2 [1] does not include some counters that were documented in the PPR for Zen1 based processors [2]. After having tested these counters, some of them that still work for zen2 systems have been preserved in the events for zen2. The counters that are omitted in [1] but are still measurable and non-zero on zen2 (tested on a Ryzen 3900X system) are the following: PMC 0x000 fpu_pipe_assignment.{total|total0|total1|total2|total3} PMC 0x004 fp_num_mov_elim_scal_op.* PMC 0x046 ls_tablewalker.* PMC 0x062 l2_latency.l2_cycles_waiting_on_fills PMC 0x063 l2_wcb_req.* PMC 0x06D l2_fill_pending.l2_fill_busy PMC 0x080 ic_fw32 PMC 0x081 ic_fw32_miss PMC 0x086 bp_snp_re_sync PMC 0x087 ic_fetch_stall.* PMC 0x08C ic_cache_inval.* PMC 0x099 bp_tlb_rel PMC 0x0C7 ex_ret_brn_resync PMC 0x28A ic_oc_mode_switch.* L3PMC 0x001 l3_request_g1.* L3PMC 0x006 l3_comb_clstr_state.* [1]: Processor Programming Reference (PPR) for AMD Family 17h Model 71h, Revision B0 Processors, 56176 Rev 3.06 - Jul 17, 2019 [2]: Processor Programming Reference (PPR) for AMD Family 17h Models 01h,08h, Revision B2 Processors, 54945 Rev 3.03 - Jun 14, 2019 All of the PPRs can be found at: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Here are the results of running "fpu_pipe_assignment.total" events on my Ryzen 3900X family 17h model 71h system: Before this patch: $> perf list *fpu_pipe_assignment* List of pre-defined events (to be used in -e): After: $> perf list *fpu_pipe_assignment* floating point: fpu_pipe_assignment.total [Total number of fp uOps] fpu_pipe_assignment.total0 [Total number uOps assigned to pipe 0] fpu_pipe_assignment.total1 [Total number uOps assigned to pipe 1] fpu_pipe_assignment.total2 [Total number uOps assigned to pipe 2] fpu_pipe_assignment.total3 [Total number uOps assigned to pipe 3] Metric Groups: $> perf stat -e fpu_pipe_assignment.total sleep 1 Performance counter stats for 'sleep 1': 25,883 fpu_pipe_assignment.total 1.004145868 seconds time elapsed 0.001805000 seconds user 0.000000000 seconds sys Usage tests while running Linpackin the background: $> perf stat -I1000 -e fpu_pipe_assignment.total 1.000266796 79,313,191,516 fpu_pipe_assignment.total 2.000809630 68,091,474,430 fpu_pipe_assignment.total 3.001028115 52,925,023,174 fpu_pipe_assignment.total $> perf record -e fpu_pipe_assignment.total,fpu_pipe_assignment.total0 -a sleep 1 [ perf record: Woken up 9 times to write data ] [ perf record: Captured and wrote 4.031 MB perf.data (64764 samples) ] $> perf report --stdio --no-header | head -30 98.33% xhpl xhpl [.] dgemm_kernel 0.28% xhpl xhpl [.] dtrsm_kernel_LT 0.10% xhpl [kernel.kallsyms] [k] entry_SYSCALL_64 0.08% xhpl xhpl [.] idamax_k 0.07% baloo_file_extr liblmdb.so [.] mdb_mid2l_insert 0.06% xhpl xhpl [.] dgemm_itcopy 0.06% xhpl xhpl [.] dgemm_oncopy 0.06% xhpl [kernel.kallsyms] [k] __schedule 0.06% xhpl [kernel.kallsyms] [k] syscall_trace_enter 0.06% xhpl [kernel.kallsyms] [k] native_sched_clock 0.06% xhpl [kernel.kallsyms] [k] pick_next_task_fair 0.05% xhpl xhpl [.] blas_thread_server.llvm.15009391670273914865 0.04% xhpl [kernel.kallsyms] [k] do_syscall_64 0.04% xhpl [kernel.kallsyms] [k] yield_task_fair 0.04% xhpl libpthread-2.31.so [.] __pthread_mutex_unlock_usercnt 0.03% xhpl [kernel.kallsyms] [k] cpuacct_charge 0.03% xhpl [kernel.kallsyms] [k] syscall_return_via_sysret 0.03% xhpl libc-2.31.so [.] __sched_yield 0.03% xhpl [kernel.kallsyms] [k] __calc_delta $> perf annotate --stdio2 dgemm_kernel | egrep '^ {0,2}[0-9]+' -B2 -A2 sub $0x60,%rsp mov %rbx,(%rsp) 0.00 mov %rbp,0x8(%rsp) mov %r12,0x10(%rsp) 0.00 mov %r13,0x18(%rsp) mov %r14,0x20(%rsp) mov %r15,0x28(%rsp) -- mov %rdi,%r13 mov %rsi,0x28(%rsp) 0.00 mov %rdx,%r12 vmovsd %xmm0,0x30(%rsp) shl $0x3,%r10 mov 0x28(%rsp),%rax 0.00 xor %rdx,%rdx mov $0x18,%rdi div %rdi -- nop a0: mov %r12,%rax 0.00 shl $0x3,%rax mov %r8,%rdi lea (%r8,%rax,8),%r15 -- mov %r12,%rax nop 0.00 c0: vmovups (%rdi),%ymm1 0.09 vmovups 0x20(%rdi),%ymm2 0.02 vmovups (%r15),%ymm3 0.10 vmovups %ymm1,(%rsi) 0.07 vmovups %ymm2,0x20(%rsi) 0.07 vmovups %ymm3,0x40(%rsi) 0.06 add $0x40,%rdi add $0x40,%r15 add $0x60,%rsi 0.00 dec %rax ↑ jne c0 mov %r9,%r15 -- nop 110: lea 0x80(%rsp),%rsi 0.01 add $0x60,%rsi 0.03 mov %r12,%rax 0.00 sar $0x3,%rax cmp $0x2,%rax ↓ jl d26 prefetcht0 0x200(%rdi) 0.01 vmovups -0x60(%rsi),%ymm1 0.02 prefetcht0 0xa0(%rsi) 0.00 vbroadcastsd -0x80(%rdi),%ymm0 0.00 prefetcht0 0xe0(%rsi) 0.03 vmovups -0x40(%rsi),%ymm2 0.00 prefetcht0 0x120(%rsi) vmovups -0x20(%rsi),%ymm3 vmulpd %ymm0,%ymm1,%ymm4 0.01 prefetcht0 0x160(%rsi) vmulpd %ymm0,%ymm2,%ymm8 0.01 vmulpd %ymm0,%ymm3,%ymm12 0.02 prefetcht0 0x1a0(%rsi) 0.01 vbroadcastsd -0x78(%rdi),%ymm0 vmulpd %ymm0,%ymm1,%ymm5 0.01 vmulpd %ymm0,%ymm2,%ymm9 vmulpd %ymm0,%ymm3,%ymm13 0.01 vbroadcastsd -0x70(%rdi),%ymm0 vmulpd %ymm0,%ymm1,%ymm6 0.00 vmulpd %ymm0,%ymm2,%ymm10 0.00 add $0x60,%rsi ... snip ... nop 65e0: vmovddup -0x60(%rsi),%xmm2 0.00 vmovups -0x80(%rdi),%xmm0 vmovups -0x70(%rdi),%xmm1 0.00 vmovddup -0x58(%rsi),%xmm3 vfmadd231pd %xmm0,%xmm2,%xmm4 0.00 vfmadd231pd %xmm1,%xmm2,%xmm5 0.00 vfmadd231pd %xmm0,%xmm3,%xmm6 0.00 vfmadd231pd %xmm1,%xmm3,%xmm7 0.00 add $0x10,%rsi add $0x20,%rdi 0.00 dec %rax ↑ jne 65e0 nop nop 6620: vmovddup 0x30(%rsp),%xmm0 0.00 vmulpd %xmm0,%xmm4,%xmm4 0.00 vmulpd %xmm0,%xmm5,%xmm5 vmulpd %xmm0,%xmm6,%xmm6 vmulpd %xmm0,%xmm7,%xmm7 vaddpd (%r15),%xmm4,%xmm4 vaddpd 0x10(%r15),%xmm5,%xmm5 0.00 vaddpd (%r15,%r10,1),%xmm6,%xmm6 0.00 vaddpd 0x10(%r15,%r10,1),%xmm7,%xmm7 0.00 vmovups %xmm4,(%r15) vmovups %xmm5,0x10(%r15) 0.00 vmovups %xmm6,(%r15,%r10,1) vmovups %xmm7,0x10(%r15,%r10,1) add $0x20,%r15 -- lea (%r8,%rax,8),%r8 69d8: mov 0x20(%rsp),%r14 0.00 test $0x1,%r14 ↓ je 6d84 mov %r9,%r15 -- vbroadcastsd -0x28(%rsi),%ymm3 vfmadd231pd (%rdi),%ymm0,%ymm4 0.00 vfmadd231pd 0x20(%rdi),%ymm1,%ymm5 vfmadd231pd 0x40(%rdi),%ymm2,%ymm6 vfmadd231pd 0x60(%rdi),%ymm3,%ymm7 -- vmulpd %ymm0,%ymm4,%ymm4 vaddpd (%r15),%ymm4,%ymm4 0.00 vmovups %ymm4,(%r15) add $0x20,%r15 dec %r11 -- mov %rbx,%rsp mov (%rsp),%rbx 0.01 mov 0x8(%rsp),%rbp mov 0x10(%rsp),%r12 mov 0x18(%rsp),%r13 Signed-off-by: Vijay Thakkar Tested-by: Arnaldo Carvalho de Melo Acked-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Jon Grimm Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200318190002.307290-3-vijaythakkar@me.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen2/branch.json | 52 ++++ tools/perf/pmu-events/arch/x86/amdzen2/cache.json | 338 ++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen2/core.json | 130 ++++++++ .../arch/x86/amdzen2/floating-point.json | 140 +++++++++ tools/perf/pmu-events/arch/x86/amdzen2/memory.json | 341 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen2/other.json | 115 +++++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 7 files changed, 1117 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/branch.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/core.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/other.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json new file mode 100644 index 000000000000..ef4166a66288 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json @@ -0,0 +1,52 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_dyn_ind_pred", + "EventCode": "0x8e", + "BriefDescription": "Dynamic Indirect Predictions.", + "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)." + }, + { + "EventName": "bp_de_redirect", + "EventCode": "0x91", + "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." + }, + { + "EventName": "bp_l1_tlb_fetch_hit", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", + "UMask": "0xFF" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if1g", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.", + "UMask": "0x4" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if2m", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.", + "UMask": "0x2" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if4k", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.", + "UMask": "0x1" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json new file mode 100644 index 000000000000..1c60bfa0f00b --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -0,0 +1,338 @@ +[ + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g1.prefetch_l2_cmd", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g1.group2", + "EventCode": "0x60", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", + "UMask": "0x1" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", + "UMask": "0x1" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x1" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x4" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x1" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", + "UMask": "0x8" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", + "UMask": "0x4" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", + "UMask": "0x2" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", + "UMask": "0x1" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x1" + }, + { + "EventName": "l2_pf_hit_l2", + "EventCode": "0x70", + "BriefDescription": "L2 prefetch hit in L2.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_hit_l3", + "EventCode": "0x71", + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_l3", + "EventCode": "0x72", + "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", + "UMask": "0xff" + }, + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_hit", + "EventCode": "0x84", + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.", + "UMask": "0xff" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.", + "UMask": "0x4" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.", + "UMask": "0x2" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.", + "UMask": "0x1" + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x4" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x2" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x1" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x2" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x1" + }, + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x2" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x1" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "UMask": "0x00", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", + "EventCode": "0x9A", + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", + "UMask": "0x3f", + "Unit": "L3PMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json new file mode 100644 index 000000000000..de89e5a44ff1 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json @@ -0,0 +1,130 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_cops", + "EventCode": "0xc1", + "BriefDescription": "Retired Uops.", + "PublicDescription": "The number of micro-ops retired. This count includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "UMask": "0x4" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x2" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_ret_cond_misp", + "EventCode": "0xd2", + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x4" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x2" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_fus_brnch_inst", + "EventCode": "0x1d0", + "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.", + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json new file mode 100644 index 000000000000..622a0c420e46 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json @@ -0,0 +1,140 @@ +[ + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps.", + "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", + "UMask": "0xf" + }, + { + "EventName": "fpu_pipe_assignment.total3", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 3.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", + "UMask": "0x8" + }, + { + "EventName": "fpu_pipe_assignment.total2", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 2.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", + "UMask": "0x4" + }, + { + "EventName": "fpu_pipe_assignment.total1", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 1.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", + "UMask": "0x2" + }, + { + "EventName": "fpu_pipe_assignment.total0", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 0.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", + "UMask": "0x1" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.mac_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "PublicDescription": "", + "UMask": "0x8" + }, + { + "EventName": "fp_ret_sse_avx_ops.div_flops", + "EventCode": "0x03", + "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0x4" + }, + { + "EventName": "fp_ret_sse_avx_ops.mult_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0x2" + }, + { + "EventName": "fp_ret_sse_avx_ops.add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0x1" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x8" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x4" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x2" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x1" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.", + "UMask": "0x8" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "UMask": "0x4" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.", + "UMask": "0x2" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", + "UMask": "0x1" + }, + { + "EventName": "fp_disp_faults.ymm_spill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", + "UMask": "0x8" + }, + { + "EventName": "fp_disp_faults.ymm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", + "UMask": "0x4" + }, + { + "EventName": "fp_disp_faults.xmm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", + "UMask": "0x2" + }, + { + "EventName": "fp_disp_faults.x87_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json new file mode 100644 index 000000000000..715046b339cb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json @@ -0,0 +1,341 @@ +[ + { + "EventName": "ls_bad_status2.stli_other", + "EventCode": "0x24", + "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", + "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", + "UMask": "0x2" + }, + { + "EventName": "ls_locks.spec_lock_hi_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", + "UMask": "0x8" + }, + { + "EventName": "ls_locks.spec_lock_lo_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", + "UMask": "0x4" + }, + { + "EventName": "ls_locks.non_spec_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", + "UMask": "0x2" + }, + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.", + "UMask": "0x1" + }, + { + "EventName": "ls_ret_cl_flush", + "EventCode": "0x26", + "BriefDescription": "Number of retired CLFLUSH instructions." + }, + { + "EventName": "ls_ret_cpuid", + "EventCode": "0x27", + "BriefDescription": "Number of retired CPUID instructions." + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.", + "UMask": "0x4" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x2" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x1" + }, + { + "EventName": "ls_smi_rx", + "EventCode": "0x2B", + "BriefDescription": "Number of SMIs received." + }, + { + "EventName": "ls_int_taken", + "EventCode": "0x2C", + "BriefDescription": "Number of interrupts taken." + }, + { + "EventName": "ls_rdtsc", + "EventCode": "0x2D", + "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full", + "EventCode": "0x37", + "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full." + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "Number of accesses to the dcache for load/store references.", + "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_mab_alloc.dc_prefetcher", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", + "UMask": "0x8" + }, + { + "EventName": "ls_mab_alloc.stores", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Stores.", + "UMask": "0x2" + }, + { + "EventName": "ls_mab_alloc.loads", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Loads.", + "UMask": "0x1" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from different die.", + "UMask": "0x40" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_rmt_cache", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; Remote CCX and the address's Home Node is on a different die.", + "UMask": "0x10" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.", + "UMask": "0x8" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.", + "UMask": "0x2" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "All L1 DTLB Misses or Reloads.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that miss in the L2 TLB.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that miss in the L2 TLB.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page miss.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that miss the L2 TLB.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", + "UMask": "0x8" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", + "UMask": "0x4" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.", + "UMask": "0x2" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", + "UMask": "0x1" + }, + { + "EventName": "ls_tablewalker.iside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on I-side.", + "UMask": "0xc" + }, + { + "EventName": "ls_tablewalker.ic_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 1.", + "UMask": "0x8" + }, + { + "EventName": "ls_tablewalker.ic_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 0.", + "UMask": "0x4" + }, + { + "EventName": "ls_tablewalker.dside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on D-side.", + "UMask": "0x3" + }, + { + "EventName": "ls_tablewalker.dc_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 1.", + "UMask": "0x2" + }, + { + "EventName": "ls_tablewalker.dc_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 0.", + "UMask": "0x1" + }, + { + "EventName": "ls_misal_accesses", + "EventCode": "0x47", + "BriefDescription": "Misaligned loads." + }, + { + "EventName": "ls_pref_instr_disp", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).", + "UMask": "0xff" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", + "UMask": "0x4" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.", + "UMask": "0x2" + }, + { + "EventName": "ls_pref_instr_disp.prefetch", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", + "UMask": "0x1" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", + "UMask": "0x2" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", + "UMask": "0x40" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", + "UMask": "0x10" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).", + "UMask": "0x8" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).", + "UMask": "0x2" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", + "UMask": "0x40" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", + "UMask": "0x10" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", + "UMask": "0x8" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", + "UMask": "0x2" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + }, + { + "EventName": "ls_tlb_flush", + "EventCode": "0x78", + "BriefDescription": "All TLB Flushes" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json new file mode 100644 index 000000000000..e94994d4a60e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/other.json @@ -0,0 +1,115 @@ +[ + { + "EventName": "de_dis_uop_queue_empty_di0", + "EventCode": "0xa9", + "BriefDescription": "Cycles where the Micro-Op Queue is empty." + }, + { + "EventName": "de_dis_uops_from_decoder", + "EventCode": "0xaa", + "BriefDescription": "Ops dispatched from either the decoders, OpCache or both.", + "UMask": "0xff" + }, + { + "EventName": "de_dis_uops_from_decoder.opcache_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Count of dispatched Ops from OpCache.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_uops_from_decoder.decoder_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Count of dispatched Ops from Decoder.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP Miscellaneous resource unavailable. Applies to the recovery of mispredicts with FP ops.", + "UMask": "0x80" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.taken_branch_buffer_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Taken branch buffer resource stall.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. SC AGU dispatch stall.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 82a9db00125e..244a36e37a3a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -37,3 +37,4 @@ GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v1,amdzen1,core +AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -- cgit v1.2.3 From b5b8a7cf141acba6588d2a43cda0dd258299f902 Mon Sep 17 00:00:00 2001 From: Vijay Thakkar Date: Wed, 18 Mar 2020 15:00:02 -0400 Subject: perf vendor events amd: Update Zen1 events to V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the PMCs for AMD Zen1 core based processors (Family 17h; Models 0 through 2F) to be in accordance with PMCs as documented in the latest versions of the AMD Processor Programming Reference [1], [2] and [3]. Note that some events, such as FPU pipe assignment are missing in [1], and therefore [3] is included for full coverage of events. PMCs added: fpu_pipe_assignment.dual{0|1|2|3} fpu_pipe_assignment.total{0|1|2|3} ls_mab_alloc.dc_prefetcher ls_mab_alloc.stores ls_mab_alloc.loads bp_dyn_ind_pred bp_de_redirect PMC removed: ex_ret_cond_misp Cumulative counts, fpu_pipe_assignment.total and fpu_pipe_assignment.dual, existed in v1, but did expose port-level counters. ex_ret_cond_misp has been removed as it has been removed from the latest versions of the PPR, and when tested, always seems to sample zero as tested on a Ryzen 3400G system. [1]: Processor Programming Reference (PPR) for AMD Family 17h Models 01h,08h, Revision B2 Processors, 54945 Rev 3.03 - Jun 14, 2019. [2]: Processor Programming Reference (PPR) for AMD Family 17h Model 18h, Revision B1 Processors, 55570-B1 Rev 3.14 - Sep 26, 2019. [3]: OSRR for AMD Family 17h processors, Models 00h-2Fh, 56255 Rev 3.03 - July, 2018 All of the PPRs can be found at: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Vijay Thakkar Acked-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Jon Grimm Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Cc: vijay thakkar Link: http://lore.kernel.org/lkml/20200318190002.307290-4-vijaythakkar@me.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/branch.json | 11 +++ tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 107 +++++++-------------- tools/perf/pmu-events/arch/x86/amdzen1/core.json | 15 +-- .../arch/x86/amdzen1/floating-point.json | 64 +++++++++++- tools/perf/pmu-events/arch/x86/amdzen1/memory.json | 82 ++++++++++------ tools/perf/pmu-events/arch/x86/amdzen1/other.json | 27 ++---- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 7 files changed, 172 insertions(+), 136 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json index 93ddfd8053ca..a9943eeb8d6b 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json @@ -8,5 +8,16 @@ "EventName": "bp_l2_btb_correct", "EventCode": "0x8b", "BriefDescription": "L2 BTB Correction." + }, + { + "EventName": "bp_dyn_ind_pred", + "EventCode": "0x8e", + "BriefDescription": "Dynamic Indirect Predictions.", + "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)." + }, + { + "EventName": "bp_de_redirect", + "EventCode": "0x91", + "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 6221a840fcea..404d4c569c01 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -37,36 +37,31 @@ { "EventName": "ic_fetch_stall.ic_stall_any", "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", "UMask": "0x4" }, { "EventName": "ic_fetch_stall.ic_stall_dq_empty", "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", "UMask": "0x2" }, { "EventName": "ic_fetch_stall.ic_stall_back_pressure", "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", "UMask": "0x1" }, { "EventName": "ic_cache_inval.l2_invalidating_probe", "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", "UMask": "0x2" }, { "EventName": "ic_cache_inval.fill_invalidated", "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to overwriting fill response.", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", "UMask": "0x1" }, { @@ -77,211 +72,181 @@ { "EventName": "l2_request_g1.rd_blk_l", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", "UMask": "0x80" }, { "EventName": "l2_request_g1.rd_blk_x", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", "UMask": "0x40" }, { "EventName": "l2_request_g1.ls_rd_blk_c_s", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", "UMask": "0x20" }, { "EventName": "l2_request_g1.cacheable_ic_read", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", "UMask": "0x10" }, { "EventName": "l2_request_g1.change_to_x", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", "UMask": "0x8" }, { - "EventName": "l2_request_g1.prefetch_l2", + "EventName": "l2_request_g1.prefetch_l2_cmd", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", "UMask": "0x4" }, { "EventName": "l2_request_g1.l2_hw_pf", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", "UMask": "0x2" }, { - "EventName": "l2_request_g1.other_requests", + "EventName": "l2_request_g1.group2", "EventCode": "0x60", - "BriefDescription": "Events covered by l2_request_g2.", - "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", "UMask": "0x1" }, { "EventName": "l2_request_g2.group1", "EventCode": "0x61", - "BriefDescription": "All Group 1 commands not in unit0.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", "UMask": "0x80" }, { "EventName": "l2_request_g2.ls_rd_sized", "EventCode": "0x61", - "BriefDescription": "RdSized, RdSized32, RdSized64.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", "UMask": "0x40" }, { "EventName": "l2_request_g2.ls_rd_sized_nc", "EventCode": "0x61", - "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", "UMask": "0x20" }, { "EventName": "l2_request_g2.ic_rd_sized", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", "UMask": "0x10" }, { "EventName": "l2_request_g2.ic_rd_sized_nc", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", "UMask": "0x8" }, { "EventName": "l2_request_g2.smc_inval", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", "UMask": "0x4" }, { "EventName": "l2_request_g2.bus_locks_originator", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", "UMask": "0x2" }, { "EventName": "l2_request_g2.bus_locks_responses", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", "UMask": "0x1" }, { "EventName": "l2_latency.l2_cycles_waiting_on_fills", "EventCode": "0x62", "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", "UMask": "0x1" }, { "EventName": "l2_wcb_req.wcb_write", "EventCode": "0x63", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", - "BriefDescription": "LS to L2 WCB write requests.", + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", "UMask": "0x40" }, { "EventName": "l2_wcb_req.wcb_close", "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB close requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", "UMask": "0x20" }, { "EventName": "l2_wcb_req.zero_byte_store", "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB zero byte store requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", "UMask": "0x4" }, { "EventName": "l2_wcb_req.cl_zero", "EventCode": "0x63", - "PublicDescription": "LS to L2 WCB cache line zeroing requests.", - "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", "UMask": "0x1" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_cs", "EventCode": "0x64", - "BriefDescription": "LS ReadBlock C/S Hit.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", "UMask": "0x80" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", "EventCode": "0x64", - "BriefDescription": "LS Read Block L Hit X.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.", "UMask": "0x40" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", "EventCode": "0x64", - "BriefDescription": "LsRdBlkL Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.", "UMask": "0x20" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_x", "EventCode": "0x64", - "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", "UMask": "0x10" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_c", "EventCode": "0x64", - "BriefDescription": "LS Read Block C S L X Change to X Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", "UMask": "0x8" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_x", "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Exclusive Stale.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", "UMask": "0x4" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_s", "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", "UMask": "0x2" }, { "EventName": "l2_cache_req_stat.ic_fill_miss", "EventCode": "0x64", - "BriefDescription": "IC Fill Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", "UMask": "0x1" }, { "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", - "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", - "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", "UMask": "0x1" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json index 1079544eeed5..7e1aa8273935 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -62,7 +62,6 @@ "EventName": "ex_ret_brn_ind_misp", "EventCode": "0xca", "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", - "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." }, { "EventName": "ex_ret_mmx_fp_instr.sse_instr", @@ -90,11 +89,6 @@ "EventCode": "0xd1", "BriefDescription": "Retired Conditional Branch Instructions." }, - { - "EventName": "ex_ret_cond_misp", - "EventCode": "0xd2", - "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." - }, { "EventName": "ex_div_busy", "EventCode": "0xd3", @@ -108,22 +102,19 @@ { "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", "EventCode": "0x1cf", - "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", "UMask": "0x4" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS that retired.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", "UMask": "0x2" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", "UMask": "0x1" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json index ea4711983d1d..a35542bd3b36 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json @@ -2,17 +2,73 @@ { "EventName": "fpu_pipe_assignment.dual", "EventCode": "0x00", - "BriefDescription": "Total number multi-pipe uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", + "BriefDescription": "Total number multi-pipe uOps assigned to all pipes.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to all pipes.", "UMask": "0xf0" }, + { + "EventName": "fpu_pipe_assignment.dual3", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 3.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 3.", + "UMask": "0x80" + }, + { + "EventName": "fpu_pipe_assignment.dual2", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 2.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 2.", + "UMask": "0x40" + }, + { + "EventName": "fpu_pipe_assignment.dual1", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 1.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 1.", + "UMask": "0x20" + }, + { + "EventName": "fpu_pipe_assignment.dual0", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 0.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 0.", + "UMask": "0x10" + }, { "EventName": "fpu_pipe_assignment.total", "EventCode": "0x00", - "BriefDescription": "Total number uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", + "BriefDescription": "Total number uOps assigned to all fpu pipes.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.", "UMask": "0xf" }, + { + "EventName": "fpu_pipe_assignment.total3", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 3.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", + "UMask": "0x8" + }, + { + "EventName": "fpu_pipe_assignment.total2", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 2.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", + "UMask": "0x4" + }, + { + "EventName": "fpu_pipe_assignment.total1", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 1.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", + "UMask": "0x2" + }, + { + "EventName": "fpu_pipe_assignment.total0", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 0.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", + "UMask": "0x1" + }, { "EventName": "fp_sched_empty", "EventCode": "0x01", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json index fa2d60d4def0..b33a3c308019 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json @@ -3,28 +3,24 @@ "EventName": "ls_locks.bus_lock", "EventCode": "0x25", "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", "UMask": "0x1" }, { "EventName": "ls_dispatch.ld_st_dispatch", "EventCode": "0x29", - "BriefDescription": "Load-op-Stores.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", "UMask": "0x4" }, { "EventName": "ls_dispatch.store_dispatch", "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.", "UMask": "0x2" }, { "EventName": "ls_dispatch.ld_dispatch", "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.", "UMask": "0x1" }, { @@ -37,83 +33,114 @@ "EventCode": "0x40", "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." }, + { + "EventName": "ls_mab_alloc.dc_prefetcher", + "EventCode": "0x41", + "BriefDescription": "LS MAB allocates by type - DC prefetcher.", + "UMask": "0x8" + }, + { + "EventName": "ls_mab_alloc.stores", + "EventCode": "0x41", + "BriefDescription": "LS MAB allocates by type - stores.", + "UMask": "0x2" + }, + { + "EventName": "ls_mab_alloc.loads", + "EventCode": "0x41", + "BriefDescription": "LS MAB allocates by type - loads.", + "UMask": "0x01" + }, { "EventName": "ls_l1_d_tlb_miss.all", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", - "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", "UMask": "0xff" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 1G size.", - "PublicDescription": "L1 DTLB Miss of a page of 1G size.", "UMask": "0x80" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 2M size.", - "PublicDescription": "L1 DTLB Miss of a page of 2M size.", "UMask": "0x40" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 32K size.", - "PublicDescription": "L1 DTLB Miss of a page of 32K size.", "UMask": "0x20" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 4K size.", - "PublicDescription": "L1 DTLB Miss of a page of 4K size.", "UMask": "0x10" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 1G size.", - "PublicDescription": "L1 DTLB Reload of a page of 1G size.", "UMask": "0x8" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 2M size.", - "PublicDescription": "L1 DTLB Reload of a page of 2M size.", "UMask": "0x4" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 32K size.", - "PublicDescription": "L1 DTLB Reload of a page of 32K size.", "UMask": "0x2" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 4K size.", - "PublicDescription": "L1 DTLB Reload of a page of 4K size.", "UMask": "0x1" }, { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", + "EventName": "ls_tablewalker.iside", "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", + "BriefDescription": "Total Page Table Walks on I-side.", "UMask": "0xc" }, { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", + "EventName": "ls_tablewalker.ic_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 1.", + "UMask": "0x8" + }, + { + "EventName": "ls_tablewalker.ic_type0", "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", + "BriefDescription": "Total Page Table Walks IC Type 0.", + "UMask": "0x4" + }, + { + "EventName": "ls_tablewalker.dside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on D-side.", "UMask": "0x3" }, + { + "EventName": "ls_tablewalker.dc_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 1.", + "UMask": "0x2" + }, + { + "EventName": "ls_tablewalker.dc_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 0.", + "UMask": "0x1" + }, { "EventName": "ls_misal_accesses", "EventCode": "0x47", @@ -123,35 +150,30 @@ "EventName": "ls_pref_instr_disp.prefetch_nta", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", "UMask": "0x4" }, { "EventName": "ls_pref_instr_disp.store_prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", "UMask": "0x2" }, { "EventName": "ls_pref_instr_disp.load_prefetch_w", "EventCode": "0x4b", - "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", - "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", + "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", "UMask": "0x1" }, { "EventName": "ls_inef_sw_pref.mab_mch_cnt", "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", "UMask": "0x2" }, { "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", "UMask": "0x1" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json index b26a00d05a2e..ff780098d36e 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/other.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json @@ -2,64 +2,55 @@ { "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", "EventCode": "0x28a", - "BriefDescription": "OC to IC mode switch.", - "PublicDescription": "OC Mode Switch. OC to IC mode switch.", + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", "UMask": "0x2" }, { "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", "EventCode": "0x28a", - "BriefDescription": "IC to OC mode switch.", - "PublicDescription": "OC Mode Switch. IC to OC mode switch.", + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", "UMask": "0x1" }, { "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", "EventCode": "0xaf", - "BriefDescription": "RETIRE Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", "UMask": "0x40" }, { "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", "EventCode": "0xaf", - "BriefDescription": "AGSQ Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", "UMask": "0x20" }, { "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALU tokens total unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", "UMask": "0x10" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", "EventCode": "0xaf", - "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.", "UMask": "0x8" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALSQ 3 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", "UMask": "0x4" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALSQ 2 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", "UMask": "0x2" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALSQ 1 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", "UMask": "0x1" } ] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 244a36e37a3a..25b06cf98747 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -36,5 +36,5 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core -AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v1,amdzen1,core +AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -- cgit v1.2.3 From 29f36c168813f1beaf59812cc79ef46fb33c669a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Mar 2020 11:42:56 -0300 Subject: tools headers uapi: Update linux/in.h copy To get the changes in: 267762538705 ("seg6: fix SRv6 L2 tunnels to use IANA-assigned protocol number") That ends up automatically adding the new IPPROTO_ETHERNET to the socket args beautifiers: $ tools/perf/trace/beauty/socket_ipproto.sh > before Apply this patch: $ tools/perf/trace/beauty/socket_ipproto.sh > after $ diff -u before after --- before 2020-03-19 11:48:36.876673819 -0300 +++ after 2020-03-19 11:49:00.148541377 -0300 @@ -6,6 +6,7 @@ [132] = "SCTP", [136] = "UDPLITE", [137] = "MPLS", + [143] = "ETHERNET", [17] = "UDP", [1] = "ICMP", [22] = "IDP", $ Addresses this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: David S. Miller Cc: Paolo Lungaroni Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 1521073b6348..8533bf07450f 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -74,6 +74,8 @@ enum { #define IPPROTO_UDPLITE IPPROTO_UDPLITE IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ #define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ -- cgit v1.2.3 From 7cd053d4cf8a3dc1a06bdb4be0ed9b0ecbaa21f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 18 Mar 2020 21:45:22 +0100 Subject: perf tools: Unify a bit the build directory output Removing the extra 'SUBDIR' line from clean and doc build output. Because it's annoying.. ;-) Before: $ make clean ... SUBDIR Documentation CLEAN Documentation After: $ make clean ... CLEAN Documentation Before: $ make doc BUILD: Doing 'make -j8' parallel build SUBDIR Documentation ASCIIDOC perf-stat.html ... After: $ make doc BUILD: Doing 'make -j8' parallel build ASCIIDOC perf-stat.html ... Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200318204522.1200981-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3eda9d4b88e7..a02aca9b21f4 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -231,6 +231,7 @@ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ BPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ +DOC_DIR = $(srctree)/tools/perf/Documentation/ # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. # Without this setting the output feature dump file misses some features, for @@ -792,7 +793,6 @@ $(LIBSUBCMD): FORCE $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a $(LIBSUBCMD)-clean: - $(call QUIET_CLEAN, libsubcmd) $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean help: @@ -832,7 +832,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html # 'make doc' should call 'make -C Documentation all' $(DOC_TARGETS): - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) TAG_FOLDERS= . ../lib ../include TAG_FILES= ../../include/uapi/linux/perf_event.h @@ -959,7 +959,7 @@ install-python_ext: # 'make install-doc' should call 'make -C Documentation install' $(INSTALL_DOC_TARGETS): - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ### Cleaning rules @@ -1008,7 +1008,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(rename_flags_array) \ $(OUTPUT)$(arch_errno_name_array) \ $(OUTPUT)$(sync_file_range_arrays) - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean + $(call QUIET_CLEAN, Documentation) \ + $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) -- cgit v1.2.3 From c52db67a74b3a9b834af4588489cfea22ec280a3 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:13 +0800 Subject: perf jevents: Add some test events Add some test PMU events. The events are randomly chosen from x86 and arm64 JSONs. The events include CPU and uncore events. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/test/test_cpu/branch.json | 12 ++++++++++ .../perf/pmu-events/arch/test/test_cpu/other.json | 26 ++++++++++++++++++++++ .../perf/pmu-events/arch/test/test_cpu/uncore.json | 21 +++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/branch.json create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/other.json create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/uncore.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/test/test_cpu/branch.json b/tools/perf/pmu-events/arch/test/test_cpu/branch.json new file mode 100644 index 000000000000..93ddfd8053ca --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/branch.json @@ -0,0 +1,12 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 BTB Correction." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 BTB Correction." + } +] diff --git a/tools/perf/pmu-events/arch/test/test_cpu/other.json b/tools/perf/pmu-events/arch/test/test_cpu/other.json new file mode 100644 index 000000000000..7d53d7ecd723 --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/other.json @@ -0,0 +1,26 @@ +[ + { + "EventCode": "0x6", + "Counter": "0,1", + "UMask": "0x80", + "EventName": "SEGMENT_REG_LOADS.ANY", + "SampleAfterValue": "200000", + "BriefDescription": "Number of segment register loads." + }, + { + "EventCode": "0x9", + "Counter": "0,1", + "UMask": "0x20", + "EventName": "DISPATCH_BLOCKED.ANY", + "SampleAfterValue": "200000", + "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason" + }, + { + "EventCode": "0x3A", + "Counter": "0,1", + "UMask": "0x0", + "EventName": "EIST_TRANS", + "SampleAfterValue": "200000", + "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json new file mode 100644 index 000000000000..d0a890cc814d --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json @@ -0,0 +1,21 @@ +[ + { + "EventCode": "0x02", + "EventName": "uncore_hisi_ddrc.flux_wcmd", + "BriefDescription": "DDRC write commands", + "PublicDescription": "DDRC write commands", + "Unit": "hisi_sccl,ddrc" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] -- cgit v1.2.3 From d84478088780acdecfcf50a0e52b71cc4ab7c520 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:14 +0800 Subject: perf jevents: Support test events folder With the goal of supporting pmu-events test case, introduce support for a test events folder. These test events can be used for testing generation of pmu-event tables and alias creation for any arch. When running the pmu-events test case, these test events will be used as the platform-agnostic events, so aliases can be created per-PMU and validated against known expected values. To support the test events, add a "testcpu" entry in pmu_events_map[]. The pmu-events test will be able to lookup the events map for "testcpu", to verify the generated tables against expected values. The resultant generated pmu-events.c will now look like the following: struct pmu_event pme_ampere_emag[] = { { .name = "ldrex_spec", .event = "event=0x6c", .desc = "Exclusive operation spe...", .topic = "intrinsic", .long_desc = "Exclusive operation ...", }, ... }; struct pmu_event pme_test_cpu[] = { { .name = "uncore_hisi_ddrc.flux_wcmd", .event = "event=0x2", .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", .topic = "uncore", .long_desc = "DDRC write commands", .pmu = "hisi_sccl,ddrc", }, { .name = "unc_cbo_xsnp_response.miss_eviction", .event = "umask=0x81,event=0x22", .desc = "Unit: uncore_cbox A cross-core snoop resulted ...", .topic = "uncore", .long_desc = "A cross-core snoop resulted from L3 ...", .pmu = "uncore_cbox", }, { .name = "eist_trans", .event = "umask=0x0,period=200000,event=0x3a", .desc = "Number of Enhanced Intel SpeedStep(R) ...", .topic = "other", }, { .name = 0, }, }; struct pmu_events_map pmu_events_map[] = { ... { .cpuid = "0x00000000500f0000", .version = "v1", .type = "core", .table = pme_ampere_emag }, ... { .cpuid = "testcpu", .version = "v1", .type = "core", .table = pme_test_cpu, }, { .cpuid = 0, .version = 0, .type = 0, .table = 0, }, }; Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 3c4236a5bad8..fa86c5f997cc 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -771,6 +771,19 @@ static void print_mapping_table_suffix(FILE *outfp) fprintf(outfp, "};\n"); } +static void print_mapping_test_table(FILE *outfp) +{ + /* + * Print the terminating, NULL entry. + */ + fprintf(outfp, "{\n"); + fprintf(outfp, "\t.cpuid = \"testcpu\",\n"); + fprintf(outfp, "\t.version = \"v1\",\n"); + fprintf(outfp, "\t.type = \"core\",\n"); + fprintf(outfp, "\t.table = pme_test_cpu,\n"); + fprintf(outfp, "},\n"); +} + static int process_mapfile(FILE *outfp, char *fpath) { int n = 16384; @@ -848,6 +861,7 @@ static int process_mapfile(FILE *outfp, char *fpath) } out: + print_mapping_test_table(outfp); print_mapping_table_suffix(outfp); fclose(mapfp); free(line); @@ -1168,6 +1182,22 @@ int main(int argc, char *argv[]) goto empty_map; } + sprintf(ldirname, "%s/test", start_dirname); + + rc = nftw(ldirname, process_one_file, maxfds, 0); + if (rc && verbose) { + pr_info("%s: Error walking file tree %s rc=%d for test\n", + prog, ldirname, rc); + goto empty_map; + } else if (rc < 0) { + /* Make build fail */ + free_arch_std_events(); + ret = 1; + goto out_free_mapfile; + } else if (rc) { + goto empty_map; + } + if (close_table) print_events_table_suffix(eventsfp); -- cgit v1.2.3 From e45ad701e784e0eed8a07b537b47afb302c59dab Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:15 +0800 Subject: perf pmu: Refactor pmu_add_cpu_aliases() Create pmu_add_cpu_aliases_map() from pmu_add_cpu_aliases(), so the caller can pass the map; the pmu-events test would use this since there would be no CPUID matching to a mapfile there. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 21 +++++++++++++-------- tools/perf/util/pmu.h | 3 +++ 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8b99fd312aae..c616a06a34a8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -21,7 +21,6 @@ #include "pmu.h" #include "parse-events.h" #include "header.h" -#include "pmu-events/pmu-events.h" #include "string2.h" #include "strbuf.h" #include "fncache.h" @@ -744,16 +743,11 @@ out: * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, + struct pmu_events_map *map) { int i; - struct pmu_events_map *map; const char *name = pmu->name; - - map = perf_pmu__find_map(pmu); - if (!map) - return; - /* * Found a matching PMU events table. Create aliases */ @@ -788,6 +782,17 @@ new_alias: } } +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +{ + struct pmu_events_map *map; + + map = perf_pmu__find_map(pmu); + if (!map) + return; + + pmu_add_cpu_aliases_map(head, pmu, map); +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6737e3d5d568..0b4a0efae38e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -7,6 +7,7 @@ #include #include #include "parse-events.h" +#include "pmu-events/pmu-events.h" struct perf_evsel_config_term; @@ -97,6 +98,8 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); +void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, + struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -- cgit v1.2.3 From a6c925fd3aa2aba043a73b0ad57ffd296e1c42c9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:16 +0800 Subject: perf test: Add pmu-events test The initial test will verify that the test tables in generated pmu-events.c match against known, expected values. For known events added in pmu-events/arch/test, we need to add an entry in test_cpu_aliases_events[] or test_uncore_events[]. A sample run is as follows for x86: john@linux-3c19:~/linux> tools/perf/perf test -vv 10 10: PMU event aliases : --- start --- test child forked, pid 5316 testing event table bp_l1_btb_correct: pass testing event table bp_l2_btb_correct: pass testing event table segment_reg_loads.any: pass testing event table dispatch_blocked.any: pass testing event table eist_trans: pass testing event table uncore_hisi_ddrc.flux_wcmd: pass testing event table unc_cbo_xsnp_response.miss_eviction: pass test child finished with 0 ---- end ---- PMU event aliases: Ok Signed-off-by: John Garry Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com [ Fixup test_cpu_events[] and test_uncore_events[] sentinels to initialize one of its members to NULL, fixing the build in older compilers ] Link: http://lore.kernel.org/lkml/1584442939-8911-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/pmu-events.c | 233 ++++++++++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 239 insertions(+) create mode 100644 tools/perf/tests/pmu-events.c (limited to 'tools') diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1692529639b0..b3d1bf13ca07 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -14,6 +14,7 @@ perf-y += evsel-roundtrip-name.o perf-y += evsel-tp-sched.o perf-y += fdarray.o perf-y += pmu.o +perf-y += pmu-events.o perf-y += hists_common.o perf-y += hists_link.o perf-y += hists_filter.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 54d9516c9839..b6322eb0f423 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -72,6 +72,10 @@ static struct test generic_tests[] = { .desc = "Parse perf pmu format", .func = test__pmu, }, + { + .desc = "PMU events", + .func = test__pmu_events, + }, { .desc = "DSO data read", .func = test__dso_data, diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c new file mode 100644 index 000000000000..12fc29746a09 --- /dev/null +++ b/tools/perf/tests/pmu-events.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "parse-events.h" +#include "pmu.h" +#include "tests.h" +#include +#include +#include + +#include "debug.h" +#include "../pmu-events/pmu-events.h" + +struct perf_pmu_test_event { + struct pmu_event event; +}; +static struct perf_pmu_test_event test_cpu_events[] = { + { + .event = { + .name = "bp_l1_btb_correct", + .event = "event=0x8a", + .desc = "L1 BTB Correction", + .topic = "branch", + }, + }, + { + .event = { + .name = "bp_l2_btb_correct", + .event = "event=0x8b", + .desc = "L2 BTB Correction", + .topic = "branch", + }, + }, + { + .event = { + .name = "segment_reg_loads.any", + .event = "umask=0x80,period=200000,event=0x6", + .desc = "Number of segment register loads", + .topic = "other", + }, + }, + { + .event = { + .name = "dispatch_blocked.any", + .event = "umask=0x20,period=200000,event=0x9", + .desc = "Memory cluster signals to block micro-op dispatch for any reason", + .topic = "other", + }, + }, + { + .event = { + .name = "eist_trans", + .event = "umask=0x0,period=200000,event=0x3a", + .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", + .topic = "other", + }, + }, + { /* sentinel */ + .event = { + .name = NULL, + }, + }, +}; + +static struct perf_pmu_test_event test_uncore_events[] = { + { + .event = { + .name = "uncore_hisi_ddrc.flux_wcmd", + .event = "event=0x2", + .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", + .topic = "uncore", + .long_desc = "DDRC write commands", + .pmu = "hisi_sccl,ddrc", + }, + }, + { + .event = { + .name = "unc_cbo_xsnp_response.miss_eviction", + .event = "umask=0x81,event=0x22", + .desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core", + .topic = "uncore", + .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", + .pmu = "uncore_cbox", + }, + }, + { /* sentinel */ + .event = { + .name = NULL, + }, + } +}; + +const int total_test_events_size = ARRAY_SIZE(test_uncore_events); + +static bool is_same(const char *reference, const char *test) +{ + if (!reference && !test) + return true; + + if (reference && !test) + return false; + + if (!reference && test) + return false; + + return !strcmp(reference, test); +} + +static struct pmu_events_map *__test_pmu_get_events_map(void) +{ + struct pmu_events_map *map; + + for (map = &pmu_events_map[0]; map->cpuid; map++) { + if (!strcmp(map->cpuid, "testcpu")) + return map; + } + + pr_err("could not find test events map\n"); + + return NULL; +} + +/* Verify generated events from pmu-events.c is as expected */ +static int __test_pmu_event_table(void) +{ + struct pmu_events_map *map = __test_pmu_get_events_map(); + struct pmu_event *table; + int map_events = 0, expected_events; + + /* ignore 2x sentinels */ + expected_events = ARRAY_SIZE(test_cpu_events) + + ARRAY_SIZE(test_uncore_events) - 2; + + if (!map) + return -1; + + for (table = map->table; table->name; table++) { + struct perf_pmu_test_event *test; + struct pmu_event *te; + bool found = false; + + if (table->pmu) + test = &test_uncore_events[0]; + else + test = &test_cpu_events[0]; + + te = &test->event; + + for (; te->name; test++, te = &test->event) { + if (strcmp(table->name, te->name)) + continue; + found = true; + map_events++; + + if (!is_same(table->desc, te->desc)) { + pr_debug2("testing event table %s: mismatched desc, %s vs %s\n", + table->name, table->desc, te->desc); + return -1; + } + + if (!is_same(table->topic, te->topic)) { + pr_debug2("testing event table %s: mismatched topic, %s vs %s\n", + table->name, table->topic, + te->topic); + return -1; + } + + if (!is_same(table->long_desc, te->long_desc)) { + pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n", + table->name, table->long_desc, + te->long_desc); + return -1; + } + + if (!is_same(table->unit, te->unit)) { + pr_debug2("testing event table %s: mismatched unit, %s vs %s\n", + table->name, table->unit, + te->unit); + return -1; + } + + if (!is_same(table->perpkg, te->perpkg)) { + pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n", + table->name, table->perpkg, + te->perpkg); + return -1; + } + + if (!is_same(table->metric_expr, te->metric_expr)) { + pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n", + table->name, table->metric_expr, + te->metric_expr); + return -1; + } + + if (!is_same(table->metric_name, te->metric_name)) { + pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n", + table->name, table->metric_name, + te->metric_name); + return -1; + } + + if (!is_same(table->deprecated, te->deprecated)) { + pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n", + table->name, table->deprecated, + te->deprecated); + return -1; + } + + pr_debug("testing event table %s: pass\n", table->name); + } + + if (!found) { + pr_err("testing event table: could not find event %s\n", + table->name); + return -1; + } + } + + if (map_events != expected_events) { + pr_err("testing event table: found %d, but expected %d\n", + map_events, expected_events); + return -1; + } + + return 0; +} +int test__pmu_events(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + if (__test_pmu_event_table()) + return -1; + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 9a160fef47c9..61a1ab032080 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -49,6 +49,7 @@ int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest); int test__perf_evsel__tp_sched_test(struct test *test, int subtest); int test__syscall_openat_tp_fields(struct test *test, int subtest); int test__pmu(struct test *test, int subtest); +int test__pmu_events(struct test *test, int subtest); int test__attr(struct test *test, int subtest); int test__dso_data(struct test *test, int subtest); int test__dso_data_cache(struct test *test, int subtest); -- cgit v1.2.3 From d504fae93dd61b734aefc403c7653d958aef655a Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:17 +0800 Subject: perf pmu: Add is_pmu_core() Add a function to decide whether a PMU is a core PMU. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 5 +++++ tools/perf/util/pmu.h | 1 + 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c616a06a34a8..55129d09f19d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1400,6 +1400,11 @@ static void wordwrap(char *s, int start, int max, int corr) } } +bool is_pmu_core(const char *name) +{ + return !strcmp(name, "cpu") || is_arm_pmu_core(name); +} + void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool long_desc, bool details_flag, bool deprecated) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 0b4a0efae38e..b756946ae78d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -88,6 +88,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); +bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, bool deprecated); -- cgit v1.2.3 From 5b9a50001b2c23f90f2a21db4763f3a9599588c4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:18 +0800 Subject: perf pmu: Make pmu_uncore_alias_match() public The perf pmu-events test will want to use pmu_uncore_alias_match(), so make it public. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 2 +- tools/perf/util/pmu.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 55129d09f19d..616fbda7c3fc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -698,7 +698,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +bool pmu_uncore_alias_match(const char *pmu_name, const char *name) { char *tmp = NULL, *tok, *str; bool res; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b756946ae78d..5fb3f16828df 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -103,6 +103,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +bool pmu_uncore_alias_match(const char *pmu_name, const char *name); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); -- cgit v1.2.3 From 956a78356c24c1ac3becf854135dc065b6b74265 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:19 +0800 Subject: perf test: Test pmu-events aliases Add creating event aliases to the pmu-events test. So currently we verify that the generated pmu-events.c is as expected for some test events. Now test that we generate aliases as expected for those events during normal operation. For that, we cycle through each HW PMU in the system, and use the test events to create aliases, and verify those against known, expected values. For core PMUs, we should create an alias for every event in test_cpu_events[]. However, for uncore PMUs, they need to be matched by the pmu_event.pmu member, so use test_uncore_events[]; so check the match beforehand with pmu_uncore_alias_match(). A sample run is as follows for my x86 machine: john@linux-3c19:~/linux> tools/perf/perf test -vv 10 10: PMU events : --- start --- ... testing PMU uncore_arb aliases: no events to match testing PMU cstate_pkg aliases: no events to match skipping testing PMU breakpoint testing aliases PMU uncore_cbox_1: matched event unc_cbo_xsnp_response.miss_eviction testing PMU uncore_cbox_1 aliases: pass testing PMU power aliases: no events to match testing aliases PMU cpu: matched event bp_l1_btb_correct testing aliases PMU cpu: matched event bp_l2_btb_correct testing aliases PMU cpu: matched event segment_reg_loads.any testing aliases PMU cpu: matched event dispatch_blocked.any testing aliases PMU cpu: matched event eist_trans testing PMU cpu aliases: pass testing PMU intel_pt aliases: no events to match skipping testing PMU software skipping testing PMU intel_bts testing PMU uncore_imc aliases: no events to match testing aliases PMU uncore_cbox_0: matched event unc_cbo_xsnp_response.miss_eviction testing PMU uncore_cbox_0 aliases: pass testing PMU cstate_core aliases: no events to match skipping testing PMU tracepoint testing PMU msr aliases: no events to match test child finished with 0 Signed-off-by: John Garry Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-8-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 148 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 12fc29746a09..d64261da8bf7 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -5,13 +5,24 @@ #include #include #include - +#include #include "debug.h" #include "../pmu-events/pmu-events.h" struct perf_pmu_test_event { struct pmu_event event; + + /* extra events for aliases */ + const char *alias_str; + + /* + * Note: For when PublicDescription does not exist in the JSON, we + * will have no long_desc in pmu_event.long_desc, but long_desc may + * be set in the alias. + */ + const char *alias_long_desc; }; + static struct perf_pmu_test_event test_cpu_events[] = { { .event = { @@ -20,6 +31,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "L1 BTB Correction", .topic = "branch", }, + .alias_str = "event=0x8a", + .alias_long_desc = "L1 BTB Correction", }, { .event = { @@ -28,6 +41,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "L2 BTB Correction", .topic = "branch", }, + .alias_str = "event=0x8b", + .alias_long_desc = "L2 BTB Correction", }, { .event = { @@ -36,6 +51,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "Number of segment register loads", .topic = "other", }, + .alias_str = "umask=0x80,(null)=0x30d40,event=0x6", + .alias_long_desc = "Number of segment register loads", }, { .event = { @@ -44,6 +61,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "Memory cluster signals to block micro-op dispatch for any reason", .topic = "other", }, + .alias_str = "umask=0x20,(null)=0x30d40,event=0x9", + .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", }, { .event = { @@ -52,6 +71,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", .topic = "other", }, + .alias_str = "umask=0,(null)=0x30d40,event=0x3a", + .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }, { /* sentinel */ .event = { @@ -70,6 +91,8 @@ static struct perf_pmu_test_event test_uncore_events[] = { .long_desc = "DDRC write commands", .pmu = "hisi_sccl,ddrc", }, + .alias_str = "event=0x2", + .alias_long_desc = "DDRC write commands", }, { .event = { @@ -80,6 +103,8 @@ static struct perf_pmu_test_event test_uncore_events[] = { .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", .pmu = "uncore_cbox", }, + .alias_str = "umask=0x81,event=0x22", + .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", }, { /* sentinel */ .event = { @@ -223,11 +248,132 @@ static int __test_pmu_event_table(void) return 0; } + +static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases) +{ + struct perf_pmu_alias *alias; + + list_for_each_entry(alias, aliases, list) + if (!strcmp(test_event, alias->name)) + return alias; + + return NULL; +} + +/* Verify aliases are as expected */ +static int __test__pmu_event_aliases(char *pmu_name, int *count) +{ + struct perf_pmu_test_event *test; + struct pmu_event *te; + struct perf_pmu *pmu; + LIST_HEAD(aliases); + int res = 0; + bool use_uncore_table; + struct pmu_events_map *map = __test_pmu_get_events_map(); + + if (!map) + return -1; + + if (is_pmu_core(pmu_name)) { + test = &test_cpu_events[0]; + use_uncore_table = false; + } else { + test = &test_uncore_events[0]; + use_uncore_table = true; + } + + pmu = zalloc(sizeof(*pmu)); + if (!pmu) + return -1; + + pmu->name = pmu_name; + + pmu_add_cpu_aliases_map(&aliases, pmu, map); + + for (te = &test->event; te->name; test++, te = &test->event) { + struct perf_pmu_alias *alias = find_alias(te->name, &aliases); + + if (!alias) { + bool uncore_match = pmu_uncore_alias_match(pmu_name, + te->pmu); + + if (use_uncore_table && !uncore_match) { + pr_debug3("testing aliases PMU %s: skip matching alias %s\n", + pmu_name, te->name); + continue; + } + + pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n", + pmu_name, te->name); + res = -1; + break; + } + + if (!is_same(alias->desc, te->desc)) { + pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n", + pmu_name, alias->desc, te->desc); + res = -1; + break; + } + + if (!is_same(alias->long_desc, test->alias_long_desc)) { + pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n", + pmu_name, alias->long_desc, + test->alias_long_desc); + res = -1; + break; + } + + if (!is_same(alias->str, test->alias_str)) { + pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n", + pmu_name, alias->str, test->alias_str); + res = -1; + break; + } + + if (!is_same(alias->topic, te->topic)) { + pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n", + pmu_name, alias->topic, te->topic); + res = -1; + break; + } + + (*count)++; + pr_debug2("testing aliases PMU %s: matched event %s\n", + pmu_name, alias->name); + } + + free(pmu); + return res; +} + int test__pmu_events(struct test *test __maybe_unused, int subtest __maybe_unused) { + struct perf_pmu *pmu = NULL; + if (__test_pmu_event_table()) return -1; + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + int count = 0; + + if (list_empty(&pmu->format)) { + pr_debug2("skipping testing PMU %s\n", pmu->name); + continue; + } + + if (__test__pmu_event_aliases(pmu->name, &count)) { + pr_debug("testing PMU %s aliases: failed\n", pmu->name); + return -1; + } + + if (count == 0) + pr_debug3("testing PMU %s aliases: no events to match\n", + pmu->name); + else + pr_debug("testing PMU %s aliases: pass\n", pmu->name); + } + return 0; } -- cgit v1.2.3 From d74b181a028bb5a468f0c609553eff6a8fdf4887 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 24 Mar 2020 08:03:19 +0100 Subject: perf cpumap: Fix snprintf overflow check 'snprintf' returns the number of characters which would be generated for the given input. If the returned value is *greater than* or equal to the buffer size, it means that the output has been truncated. Fix the overflow test accordingly. Fixes: 7780c25bae59f ("perf tools: Allow ability to map cpus to nodes easily") Fixes: 92a7e1278005b ("perf cpumap: Add cpu__max_present_cpu()") Signed-off-by: Christophe JAILLET Suggested-by: David Laight Cc: Alexander Shishkin Cc: Don Zickus Cc: He Zhe Cc: Jan Stancek Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20200324070319.10901-1-christophe.jaillet@wanadoo.fr Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 983b7388f22b..dc5c5e6fc502 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -317,7 +317,7 @@ static void set_max_cpu_num(void) /* get the highest possible cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -328,7 +328,7 @@ static void set_max_cpu_num(void) /* get the highest present cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -356,7 +356,7 @@ static void set_max_node_num(void) /* get the highest possible cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -441,7 +441,7 @@ int cpu__setup_cpunode_map(void) return 0; n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt); - if (n == PATH_MAX) { + if (n >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); return -1; } @@ -456,7 +456,7 @@ int cpu__setup_cpunode_map(void) continue; n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name); - if (n == PATH_MAX) { + if (n >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); continue; } -- cgit v1.2.3 From 0d33b34352531ff7029c58eda2321340c0ea3f5f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 24 Mar 2020 09:54:24 +0530 Subject: perf dso: Fix dso comparison Perf gets dso details from two different sources. 1st, from builid headers in perf.data and 2nd from MMAP2 samples. Dso from buildid header does not have dso_id detail. And dso from MMAP2 samples does not have buildid information. If detail of the same dso is present at both the places, filename is common. Previously, __dsos__findnew_link_by_longname_id() used to compare only long or short names, but Commit 0e3149f86b99 ("perf dso: Move dso_id from 'struct map' to 'struct dso'") also added a dso_id comparison. Because of that, now perf is creating two different dso objects of the same file, one from buildid header (with dso_id but without buildid) and second from MMAP2 sample (with buildid but without dso_id). This is causing issues with archive, buildid-list etc subcommands. Fix this by comparing dso_id only when it's present. And incase dso is present in 'dsos' list without dso_id, inject dso_id detail as well. Before: $ sudo ./perf buildid-list -H 0000000000000000000000000000000000000000 /usr/bin/ls 0000000000000000000000000000000000000000 /usr/lib64/ld-2.30.so 0000000000000000000000000000000000000000 /usr/lib64/libc-2.30.so $ ./perf archive perf archive: no build-ids found After: $ ./perf buildid-list -H b6b1291d0cead046ed0fa5734037fa87a579adee /usr/bin/ls 641f0c90cfa15779352f12c0ec3c7a2b2b6f41e8 /usr/lib64/ld-2.30.so 675ace3ca07a0b863df01f461a7b0984c65c8b37 /usr/lib64/libc-2.30.so $ ./perf archive Now please run: $ tar xvf perf.data.tar.bz2 -C ~/.debug wherever you need to run 'perf report' on. Committer notes: Renamed is_empty_dso_id() to dso_id__empty() and inject_dso_id() to dso__inject_id() to keep namespacing consistent. Fixes: 0e3149f86b99 ("perf dso: Move dso_id from 'struct map' to 'struct dso'") Reported-by: Naveen N. Rao Signed-off-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Tested-by: Naveen N. Rao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200324042424.68366-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dsos.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 591707c69c39..939471731ea6 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -26,13 +26,29 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) return 0; } +static bool dso_id__empty(struct dso_id *id) +{ + if (!id) + return true; + + return !id->maj && !id->min && !id->ino && !id->ino_generation; +} + +static void dso__inject_id(struct dso *dso, struct dso_id *id) +{ + dso->id.maj = id->maj; + dso->id.min = id->min; + dso->id.ino = id->ino; + dso->id.ino_generation = id->ino_generation; +} + static int dso_id__cmp(struct dso_id *a, struct dso_id *b) { /* * The second is always dso->id, so zeroes if not set, assume passing * NULL for a means a zeroed id */ - if (a == NULL) + if (dso_id__empty(a) || dso_id__empty(b)) return 0; return __dso_id__cmp(a, b); @@ -249,6 +265,10 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { struct dso *dso = __dsos__find_id(dsos, name, id, false); + + if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id)) + dso__inject_id(dso, id); + return dso ? dso : __dsos__addnew_id(dsos, name, id); } -- cgit v1.2.3 From a64d558d8cf98424cc5eb9ae6631782cd8bf789c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Mar 2020 17:34:30 +0100 Subject: selftests: netfilter: add nfqueue test case Add a test case to check nf queue infrastructure. Could be extended in the future to also cover serialization of conntrack, uid and secctx attributes in nfqueue. For now, this checks that 'queue bypass' works, that a queue rule with no bypass option blocks traffic and that userspace receives the expected number of packets. For this we add two queues and hook all of prerouting/input/forward/output/postrouting. Packets get queued twice with a dummy base chain in between: This passes with current nf tree, but reverting commit 946c0d8e6ed4 ("netfilter: nf_queue: fix reinject verdict handling") makes this trip (it processes 30 instead of expected 20 packets). v2: update config file with queue and other options missing/needed for other tests. v3: also test with tcp, this reveals problem with commit 28f8bfd1ac94 ("netfilter: Support iif matches in POSTROUTING"), due to skb->dev pointing at another skb in the retransmit rbtree (skb->dev aliases to rbnode child). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 6 +- tools/testing/selftests/netfilter/config | 6 + tools/testing/selftests/netfilter/nf-queue.c | 352 +++++++++++++++++++++++++ tools/testing/selftests/netfilter/nft_queue.sh | 332 +++++++++++++++++++++++ 4 files changed, 695 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/netfilter/nf-queue.c create mode 100755 tools/testing/selftests/netfilter/nft_queue.sh (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 08194aa44006..9c0f758310fe 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -3,6 +3,10 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ - nft_concat_range.sh + nft_concat_range.sh \ + nft_queue.sh + +LDLIBS = -lmnl +TEST_GEN_FILES = nf-queue include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 59caa8f71cd8..4faf2ce021d9 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,8 @@ CONFIG_NET_NS=y CONFIG_NF_TABLES_INET=y +CONFIG_NFT_QUEUE=m +CONFIG_NFT_NAT=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NF_CT_NETLINK=m diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c new file mode 100644 index 000000000000..29c73bce38fa --- /dev/null +++ b/tools/testing/selftests/netfilter/nf-queue.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct options { + bool count_packets; + int verbose; + unsigned int queue_num; + unsigned int timeout; +}; + +static unsigned int queue_stats[5]; +static struct options opts; + +static void help(const char *p) +{ + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p); +} + +static int parse_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + /* skip unsupported attribute in user-space */ + if (mnl_attr_type_valid(attr, NFQA_MAX) < 0) + return MNL_CB_OK; + + switch (type) { + case NFQA_MARK: + case NFQA_IFINDEX_INDEV: + case NFQA_IFINDEX_OUTDEV: + case NFQA_IFINDEX_PHYSINDEV: + case NFQA_IFINDEX_PHYSOUTDEV: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + perror("mnl_attr_validate"); + return MNL_CB_ERROR; + } + break; + case NFQA_TIMESTAMP: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_timestamp)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_HWADDR: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_hw)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_PAYLOAD: + break; + } + tb[type] = attr; + return MNL_CB_OK; +} + +static int queue_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[NFQA_MAX+1] = { 0 }; + struct nfqnl_msg_packet_hdr *ph = NULL; + uint32_t id = 0; + + (void)data; + + mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb); + if (tb[NFQA_PACKET_HDR]) { + ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]); + id = ntohl(ph->packet_id); + + if (opts.verbose > 0) + printf("packet hook=%u, hwproto 0x%x", + ntohs(ph->hw_protocol), ph->hook); + + if (ph->hook >= 5) { + fprintf(stderr, "Unknown hook %d\n", ph->hook); + return MNL_CB_ERROR; + } + + if (opts.verbose > 0) { + uint32_t skbinfo = 0; + + if (tb[NFQA_SKB_INFO]) + skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO])); + if (skbinfo & NFQA_SKB_CSUMNOTREADY) + printf(" csumnotready"); + if (skbinfo & NFQA_SKB_GSO) + printf(" gso"); + if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED) + printf(" csumnotverified"); + puts(""); + } + + if (opts.count_packets) + queue_stats[ph->hook]++; + } + + return MNL_CB_OK + id; +} + +static struct nlmsghdr * +nfq_build_cfg_request(char *buf, uint8_t command, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_cmd cmd = { + .command = command, + .pf = htons(AF_INET), + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_params params = { + .copy_range = htonl(range), + .copy_mode = mode, + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), ¶ms); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_verdict(char *buf, int id, int queue_num, int verd) +{ + struct nfqnl_msg_verdict_hdr vh = { + .verdict = htonl(verd), + .id = htonl(id), + }; + struct nlmsghdr *nlh; + struct nfgenmsg *nfg; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT; + nlh->nlmsg_flags = NLM_F_REQUEST; + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh); + + return nlh; +} + +static void print_stats(void) +{ + unsigned int last, total; + int i; + + if (!opts.count_packets) + return; + + total = 0; + last = queue_stats[0]; + + for (i = 0; i < 5; i++) { + printf("hook %d packets %08u\n", i, queue_stats[i]); + last = queue_stats[i]; + total += last; + } + + printf("%u packets total\n", total); +} + +struct mnl_socket *open_queue(void) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + unsigned int queue_num; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + struct timeval tv; + uint32_t flags; + + nl = mnl_socket_open(NETLINK_NETFILTER); + if (nl == NULL) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + queue_num = opts.queue_num; + nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); + + flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID; + mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); + mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + memset(&tv, 0, sizeof(tv)); + tv.tv_sec = opts.timeout; + if (opts.timeout && setsockopt(mnl_socket_get_fd(nl), + SOL_SOCKET, SO_RCVTIMEO, + &tv, sizeof(tv))) { + perror("setsockopt(SO_RCVTIMEO)"); + exit(EXIT_FAILURE); + } + + return nl; +} + +static int mainloop(void) +{ + unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + unsigned int portid; + char *buf; + int ret; + + buf = malloc(buflen); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + nl = open_queue(); + portid = mnl_socket_get_portid(nl); + + for (;;) { + uint32_t id; + + ret = mnl_socket_recvfrom(nl, buf, buflen); + if (ret == -1) { + if (errno == ENOBUFS) + continue; + + if (errno == EAGAIN) { + errno = 0; + ret = 0; + break; + } + + perror("mnl_socket_recvfrom"); + exit(EXIT_FAILURE); + } + + ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + exit(EXIT_FAILURE); + } + + id = ret - MNL_CB_OK; + nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + + mnl_socket_close(nl); + + return ret; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "chvt:q:")) != -1) { + switch (c) { + case 'c': + opts.count_packets = true; + break; + case 'h': + help(argv[0]); + exit(0); + break; + case 'q': + opts.queue_num = atoi(optarg); + if (opts.queue_num > 0xffff) + opts.queue_num = 0; + break; + case 't': + opts.timeout = atoi(optarg); + break; + case 'v': + opts.verbose++; + break; + } + } +} + +int main(int argc, char *argv[]) +{ + int ret; + + parse_opts(argc, argv); + + ret = mainloop(); + if (opts.count_packets) + print_stats(); + + return ret; +} diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh new file mode 100755 index 000000000000..6898448b4266 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -0,0 +1,332 @@ +#!/bin/bash +# +# This tests nf_queue: +# 1. can process packets from all hooks +# 2. support running nfqueue from more than one base chain +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsrouter="nsrouter-$sfx" + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} + ip netns del ${nsrouter} + rm -f "$TMPFILE0" + rm -f "$TMPFILE1" +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ${nsrouter} +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +TMPFILE0=$(mktemp) +TMPFILE1=$(mktemp) +trap cleanup EXIT + +ip netns add ${ns1} +ip netns add ${ns2} + +ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} + +ip -net ${nsrouter} link set lo up +ip -net ${nsrouter} link set veth0 up +ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 +ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 + +ip -net ${nsrouter} link set veth1 up +ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 +ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set eth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set eth0 up + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 +ip -net ${ns1} route add default via 10.0.1.1 +ip -net ${ns1} route add default via dead:1::1 + +ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns2} addr add dead:2::99/64 dev eth0 +ip -net ${ns2} route add default via 10.0.2.1 +ip -net ${ns2} route add default via dead:2::1 + +load_ruleset() { + local name=$1 + local prio=$2 + +ip netns exec ${nsrouter} nft -f - < /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + return 0 +} + +test_ping_router() { + ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + return 0 +} + +test_queue_blackhole() { + local proto=$1 + +ip netns exec ${nsrouter} nft -f - < /dev/null + lret=$? + elif [ $proto = "ip6" ]; then + ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + lret=$? + else + lret=111 + fi + + # queue without bypass keyword should drop traffic if no listener exists. + if [ $lret -eq 0 ];then + echo "FAIL: $proto expected failure, got $lret" 1>&2 + exit 1 + fi + + ip netns exec ${nsrouter} nft delete table $proto blackh + if [ $? -ne 0 ] ;then + echo "FAIL: $proto: Could not delete blackh table" + exit 1 + fi + + echo "PASS: $proto: statement with no listener results in packet drop" +} + +test_queue() +{ + local expected=$1 + local last="" + + # spawn nf-queue listeners + ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" & + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" & + sleep 1 + test_ping + ret=$? + if [ $ret -ne 0 ];then + echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2 + exit $ret + fi + + test_ping_router + ret=$? + if [ $ret -ne 0 ];then + echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2 + exit $ret + fi + + wait + ret=$? + + for file in $TMPFILE0 $TMPFILE1; do + last=$(tail -n1 "$file") + if [ x"$last" != x"$expected packets total" ]; then + echo "FAIL: Expected $expected packets total, but got $last" 1>&2 + cat "$file" 1>&2 + + ip netns exec ${nsrouter} nft list ruleset + exit 1 + fi + done + + echo "PASS: Expected and received $last" +} + +test_tcp_forward() +{ + ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 & + local nfqpid=$! + + tmpfile=$(mktemp) || exit 1 + dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile + ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & + local rpid=$! + + sleep 1 + ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null & + + rm -f "$tmpfile" + + wait $rpid + wait $lpid + [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain" +} + +test_tcp_localhost() +{ + tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1% + + tmpfile=$(mktemp) || exit 1 + + dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile + ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & + local rpid=$! + + ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 & + local nfqpid=$! + + sleep 1 + ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null + rm -f "$tmpfile" + + wait $rpid + [ $? -eq 0 ] && echo "PASS: tcp via loopback" +} + +ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +load_ruleset "filter" 0 + +sleep 3 + +test_ping +ret=$? +if [ $ret -eq 0 ];then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_queue_blackhole ip +test_queue_blackhole ip6 + +# dummy ruleset to add base chains between the +# queueing rules. We don't want the second reinject +# to re-execute the old hooks. +load_counter_ruleset 10 + +# we are hooking all: prerouting/input/forward/output/postrouting. +# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so: +# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply). +# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply. +# so we expect that userspace program receives 10 packets. +test_queue 10 + +# same. We queue to a second program as well. +load_ruleset "filter2" 20 +test_queue 20 + +test_tcp_forward +test_tcp_localhost + +exit $ret -- cgit v1.2.3 From 0f8f554e5244f56f496b4ce30ada1126fe290345 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 11 Feb 2020 00:38:31 -0300 Subject: selftests/powerpc: Don't rely on segfault to rerun the test The test case tm-signal-context-force-tm expects a segfault to happen on returning from signal handler, and then does a setcontext() to run the test again. However, the test doesn't always segfault, causing the test to run a single time. This patch fixes the test by putting it within a loop and jumping, via setcontext, just prior to the loop in case it segfaults. This way we get the desired behavior (run the test COUNT_MAX times) regardless if it segfaults or not. This also reduces the use of setcontext for control flow logic, keeping it only in the segfault handler. Also, since 'count' is changed within the signal handler, it is declared as volatile to prevent any compiler optimization getting confused with asynchronous changes. Signed-off-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200211033831.11165-3-gustavold@linux.ibm.com --- .../powerpc/tm/tm-signal-context-force-tm.c | 74 ++++++++++------------ 1 file changed, 35 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 31717625f318..421cb082f6be 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -42,9 +42,10 @@ #endif /* Setting contexts because the test will crash and we want to recover */ -ucontext_t init_context, main_context; +ucontext_t init_context; -static int count, first_time; +/* count is changed in the signal handler, so it must be volatile */ +static volatile int count; void usr_signal_handler(int signo, siginfo_t *si, void *uc) { @@ -98,11 +99,6 @@ void usr_signal_handler(int signo, siginfo_t *si, void *uc) void seg_signal_handler(int signo, siginfo_t *si, void *uc) { - if (count == COUNT_MAX) { - /* Return to tm_signal_force_msr() and exit */ - setcontext(&main_context); - } - count++; /* Reexecute the test */ @@ -126,37 +122,41 @@ void tm_trap_test(void) */ getcontext(&init_context); - /* Allocated an alternative signal stack area */ - ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - ss.ss_size = SIGSTKSZ; - ss.ss_flags = 0; + while (count < COUNT_MAX) { + /* Allocated an alternative signal stack area */ + ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; - if (ss.ss_sp == (void *)-1) { - perror("mmap error\n"); - exit(-1); - } + if (ss.ss_sp == (void *)-1) { + perror("mmap error\n"); + exit(-1); + } - /* Force the allocation through a page fault */ - if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { - perror("madvise\n"); - exit(-1); - } + /* Force the allocation through a page fault */ + if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { + perror("madvise\n"); + exit(-1); + } - /* Setting an alternative stack to generate a page fault when - * the signal is raised. - */ - if (sigaltstack(&ss, NULL)) { - perror("sigaltstack\n"); - exit(-1); + /* + * Setting an alternative stack to generate a page fault when + * the signal is raised. + */ + if (sigaltstack(&ss, NULL)) { + perror("sigaltstack\n"); + exit(-1); + } + + /* The signal handler will enable MSR_TS */ + sigaction(SIGUSR1, &usr_sa, NULL); + /* If it does not crash, it might segfault, avoid it to retest */ + sigaction(SIGSEGV, &seg_sa, NULL); + + raise(SIGUSR1); + count++; } - - /* The signal handler will enable MSR_TS */ - sigaction(SIGUSR1, &usr_sa, NULL); - /* If it does not crash, it will segfault, avoid it to retest */ - sigaction(SIGSEGV, &seg_sa, NULL); - - raise(SIGUSR1); } int tm_signal_context_force_tm(void) @@ -169,11 +169,7 @@ int tm_signal_context_force_tm(void) */ SKIP_IF(!is_ppc64le()); - /* Will get back here after COUNT_MAX interactions */ - getcontext(&main_context); - - if (!first_time++) - tm_trap_test(); + tm_trap_test(); return EXIT_SUCCESS; } -- cgit v1.2.3 From 850507f30c38dff21ed557cb98ab16db26c32bbc Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Wed, 18 Mar 2020 14:00:04 +0800 Subject: selftests/powerpc: Turn off timeout setting for benchmarks, dscr, signal, tm Some specific tests in powerpc can take longer than the default 45 seconds that added in commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") to run, the following test result was collected across 2 Power8 nodes and 1 Power9 node in our pool: powerpc/benchmarks/futex_bench - 52s powerpc/dscr/dscr_sysfs_test - 116s powerpc/signal/signal_fuzzer - 88s powerpc/tm/tm_unavailable_test - 168s powerpc/tm/tm-poison - 240s Thus they will fail with TIMEOUT error. Disable the timeout setting for these sub-tests to allow them finish properly. https://bugs.launchpad.net/bugs/1864642 Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Signed-off-by: Po-Hsu Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200318060004.10685-1-po-hsu.lin@canonical.com --- tools/testing/selftests/powerpc/benchmarks/Makefile | 2 ++ tools/testing/selftests/powerpc/benchmarks/settings | 1 + tools/testing/selftests/powerpc/dscr/Makefile | 2 ++ tools/testing/selftests/powerpc/dscr/settings | 1 + tools/testing/selftests/powerpc/signal/Makefile | 2 ++ tools/testing/selftests/powerpc/signal/settings | 1 + tools/testing/selftests/powerpc/tm/Makefile | 2 ++ tools/testing/selftests/powerpc/tm/settings | 1 + 8 files changed, 12 insertions(+) create mode 100644 tools/testing/selftests/powerpc/benchmarks/settings create mode 100644 tools/testing/selftests/powerpc/dscr/settings create mode 100644 tools/testing/selftests/powerpc/signal/settings create mode 100644 tools/testing/selftests/powerpc/tm/settings (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index d40300a65b42..a32a6ab89914 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -2,6 +2,8 @@ TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall TEST_GEN_FILES := exec_target +TEST_FILES := settings + CFLAGS += -O2 top_srcdir = ../../../../.. diff --git a/tools/testing/selftests/powerpc/benchmarks/settings b/tools/testing/selftests/powerpc/benchmarks/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 5df476364b4d..cfa6eedcb66c 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -3,6 +3,8 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ dscr_sysfs_thread_test +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 63b57583e07d..932a032bf036 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -5,6 +5,8 @@ CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/signal/settings b/tools/testing/selftests/powerpc/signal/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index b1d99736f8b8..0b0db8d3857c 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -7,6 +7,8 @@ TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ tm-signal-context-force-tm tm-poison tm-signal-pagefault +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/settings b/tools/testing/selftests/powerpc/tm/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/settings @@ -0,0 +1 @@ +timeout=0 -- cgit v1.2.3 From d1ee7e1f5c9191afb69ce46cc7752e4257340a31 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Thu, 12 Mar 2020 15:50:21 +0100 Subject: tools: gpio-hammer: Avoid potential overflow in main If '-o' was used more than 64 times in a single invocation of gpio-hammer, this could lead to an overflow of the 'lines' array. This commit fixes this by avoiding the overflow and giving a proper diagnostic back to the user Signed-off-by: Gabriel Ravier Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-hammer.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 0e0060a6eb34..083399d276e4 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -135,7 +135,14 @@ int main(int argc, char **argv) device_name = optarg; break; case 'o': - lines[i] = strtoul(optarg, NULL, 10); + /* + * Avoid overflow. Do not immediately error, we want to + * be able to accurately report on the amount of times + * '-o' was given to give an accurate error message + */ + if (i < GPIOHANDLES_MAX) + lines[i] = strtoul(optarg, NULL, 10); + i++; break; case '?': @@ -143,6 +150,14 @@ int main(int argc, char **argv) return -1; } } + + if (i >= GPIOHANDLES_MAX) { + fprintf(stderr, + "Only %d occurences of '-o' are allowed, %d were found\n", + GPIOHANDLES_MAX, i + 1); + return -1; + } + nlines = i; if (!device_name || !nlines) { -- cgit v1.2.3 From 55f17e2ae916e4443e5f0acf067a3b6e9180f1cd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 16 Mar 2020 09:23:40 +0000 Subject: tools: gpio-hammer: fix spelling mistake: "occurences" -> "occurrences" There is a spelling mistake in an error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-hammer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 083399d276e4..28d2329e83d2 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -153,7 +153,7 @@ int main(int argc, char **argv) if (i >= GPIOHANDLES_MAX) { fprintf(stderr, - "Only %d occurences of '-o' are allowed, %d were found\n", + "Only %d occurrences of '-o' are allowed, %d were found\n", GPIOHANDLES_MAX, i + 1); return -1; } -- cgit v1.2.3 From 1003bc16481a46fe5cecf99dcd3edbf48648d2a0 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Mon, 16 Mar 2020 20:50:48 +0100 Subject: tools: gpio-hammer: Apply scripts/Lindent and retain good changes "retain good changes" means that I left the help string split up instead of having this weird thing where it tries to merge together the last three lines and it looks **really** bad Signed-off-by: Gabriel Ravier Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-hammer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 28d2329e83d2..9fd926e8cb52 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -77,7 +77,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, fprintf(stdout, "[%c] ", swirr[j]); j++; - if (j == sizeof(swirr)-1) + if (j == sizeof(swirr) - 1) j = 0; fprintf(stdout, "["); -- cgit v1.2.3 From 97551625025320e9d635531625316e43c8ed62b0 Mon Sep 17 00:00:00 2001 From: Mykyta Poturai Date: Sun, 22 Mar 2020 12:10:17 +0200 Subject: tools: gpio: Fix typo in gpio-utils Replace COMSUMER with proper CONSUMER Signed-off-by: Mykyta Poturai Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index 53470de6a502..06003789e7c7 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -17,7 +17,7 @@ #include #include "gpio-utils.h" -#define COMSUMER "gpio-utils" +#define CONSUMER "gpio-utils" /** * doc: Operation of gpio @@ -209,7 +209,7 @@ int gpiotools_gets(const char *device_name, unsigned int *lines, ret = gpiotools_request_linehandle(device_name, lines, nlines, GPIOHANDLE_REQUEST_INPUT, data, - COMSUMER); + CONSUMER); if (ret < 0) return ret; @@ -259,7 +259,7 @@ int gpiotools_sets(const char *device_name, unsigned int *lines, ret = gpiotools_request_linehandle(device_name, lines, nlines, GPIOHANDLE_REQUEST_OUTPUT, data, - COMSUMER); + CONSUMER); if (ret < 0) return ret; -- cgit v1.2.3 From d198b34f3855eee2571dda03eea75a09c7c31480 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Mar 2020 22:35:59 +0900 Subject: .gitignore: add SPDX License Identifier Add SPDX License Identifier to all .gitignore files. Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- .gitignore | 1 + Documentation/.gitignore | 1 + Documentation/devicetree/bindings/.gitignore | 1 + Documentation/vm/.gitignore | 1 + arch/.gitignore | 1 + arch/alpha/kernel/.gitignore | 1 + arch/arc/boot/.gitignore | 1 + arch/arc/kernel/.gitignore | 1 + arch/arm/boot/.gitignore | 1 + arch/arm/boot/compressed/.gitignore | 1 + arch/arm/crypto/.gitignore | 1 + arch/arm/kernel/.gitignore | 1 + arch/arm/mach-at91/.gitignore | 1 + arch/arm/mach-omap2/.gitignore | 1 + arch/arm/vdso/.gitignore | 1 + arch/arm64/boot/.gitignore | 1 + arch/arm64/crypto/.gitignore | 1 + arch/arm64/kernel/.gitignore | 1 + arch/arm64/kernel/vdso/.gitignore | 1 + arch/arm64/kernel/vdso32/.gitignore | 1 + arch/ia64/kernel/.gitignore | 1 + arch/m68k/kernel/.gitignore | 1 + arch/microblaze/boot/.gitignore | 1 + arch/microblaze/kernel/.gitignore | 1 + arch/mips/boot/.gitignore | 1 + arch/mips/boot/compressed/.gitignore | 1 + arch/mips/boot/tools/.gitignore | 1 + arch/mips/kernel/.gitignore | 1 + arch/mips/tools/.gitignore | 1 + arch/mips/vdso/.gitignore | 1 + arch/nds32/kernel/.gitignore | 1 + arch/nds32/kernel/vdso/.gitignore | 1 + arch/nios2/boot/.gitignore | 1 + arch/nios2/kernel/.gitignore | 1 + arch/openrisc/kernel/.gitignore | 1 + arch/parisc/boot/.gitignore | 1 + arch/parisc/boot/compressed/.gitignore | 1 + arch/parisc/kernel/.gitignore | 1 + arch/powerpc/boot/.gitignore | 1 + arch/powerpc/kernel/.gitignore | 1 + arch/powerpc/kernel/vdso32/.gitignore | 1 + arch/powerpc/kernel/vdso64/.gitignore | 1 + arch/powerpc/platforms/cell/spufs/.gitignore | 1 + arch/powerpc/purgatory/.gitignore | 1 + arch/riscv/boot/.gitignore | 1 + arch/riscv/kernel/.gitignore | 1 + arch/riscv/kernel/vdso/.gitignore | 1 + arch/s390/boot/.gitignore | 1 + arch/s390/boot/compressed/.gitignore | 1 + arch/s390/kernel/.gitignore | 1 + arch/s390/kernel/vdso64/.gitignore | 1 + arch/s390/purgatory/.gitignore | 1 + arch/s390/tools/.gitignore | 1 + arch/sh/boot/.gitignore | 1 + arch/sh/boot/compressed/.gitignore | 1 + arch/sh/kernel/.gitignore | 1 + arch/sh/kernel/vsyscall/.gitignore | 1 + arch/sparc/boot/.gitignore | 1 + arch/sparc/kernel/.gitignore | 1 + arch/sparc/vdso/.gitignore | 1 + arch/sparc/vdso/vdso32/.gitignore | 1 + arch/um/.gitignore | 1 + arch/unicore32/.gitignore | 1 + arch/x86/.gitignore | 1 + arch/x86/boot/.gitignore | 1 + arch/x86/boot/compressed/.gitignore | 1 + arch/x86/boot/tools/.gitignore | 1 + arch/x86/crypto/.gitignore | 1 + arch/x86/entry/vdso/.gitignore | 1 + arch/x86/entry/vdso/vdso32/.gitignore | 1 + arch/x86/kernel/.gitignore | 1 + arch/x86/kernel/cpu/.gitignore | 1 + arch/x86/lib/.gitignore | 1 + arch/x86/realmode/rm/.gitignore | 1 + arch/x86/tools/.gitignore | 1 + arch/x86/um/vdso/.gitignore | 1 + arch/xtensa/boot/.gitignore | 1 + arch/xtensa/boot/boot-elf/.gitignore | 1 + arch/xtensa/boot/lib/.gitignore | 1 + arch/xtensa/kernel/.gitignore | 1 + certs/.gitignore | 1 + drivers/atm/.gitignore | 1 + drivers/crypto/vmx/.gitignore | 1 + drivers/eisa/.gitignore | 1 + drivers/gpu/drm/i915/.gitignore | 1 + drivers/gpu/drm/radeon/.gitignore | 1 + drivers/memory/.gitignore | 1 + drivers/net/wan/.gitignore | 1 + drivers/scsi/.gitignore | 1 + drivers/scsi/aic7xxx/.gitignore | 1 + drivers/staging/comedi/drivers/ni_routing/tools/.gitignore | 1 + drivers/staging/greybus/tools/.gitignore | 1 + drivers/video/logo/.gitignore | 1 + drivers/zorro/.gitignore | 1 + fs/unicode/.gitignore | 1 + kernel/.gitignore | 1 + kernel/debug/kdb/.gitignore | 1 + lib/.gitignore | 1 + lib/raid6/.gitignore | 1 + net/bpfilter/.gitignore | 1 + net/wireless/.gitignore | 1 + samples/auxdisplay/.gitignore | 1 + samples/bpf/.gitignore | 1 + samples/connector/.gitignore | 1 + samples/hidraw/.gitignore | 1 + samples/mei/.gitignore | 1 + samples/mic/mpssd/.gitignore | 1 + samples/pidfd/.gitignore | 1 + samples/seccomp/.gitignore | 1 + samples/timers/.gitignore | 1 + samples/vfs/.gitignore | 1 + samples/watchdog/.gitignore | 1 + scripts/.gitignore | 1 + scripts/basic/.gitignore | 1 + scripts/dtc/.gitignore | 1 + scripts/gcc-plugins/.gitignore | 1 + scripts/gdb/linux/.gitignore | 1 + scripts/genksyms/.gitignore | 1 + scripts/kconfig/.gitignore | 1 + scripts/mod/.gitignore | 1 + scripts/selinux/genheaders/.gitignore | 1 + scripts/selinux/mdp/.gitignore | 1 + security/apparmor/.gitignore | 1 + security/selinux/.gitignore | 1 + security/tomoyo/.gitignore | 1 + sound/oss/.gitignore | 1 + tools/accounting/.gitignore | 1 + tools/bootconfig/.gitignore | 1 + tools/bpf/.gitignore | 1 + tools/bpf/bpftool/.gitignore | 1 + tools/bpf/runqslower/.gitignore | 1 + tools/build/.gitignore | 1 + tools/build/feature/.gitignore | 1 + tools/cgroup/.gitignore | 1 + tools/gpio/.gitignore | 1 + tools/iio/.gitignore | 1 + tools/laptop/dslm/.gitignore | 1 + tools/leds/.gitignore | 1 + tools/lib/bpf/.gitignore | 1 + tools/lib/lockdep/.gitignore | 1 + tools/lib/traceevent/.gitignore | 1 + tools/memory-model/.gitignore | 1 + tools/memory-model/litmus-tests/.gitignore | 1 + tools/objtool/.gitignore | 1 + tools/pcmcia/.gitignore | 1 + tools/perf/.gitignore | 1 + tools/perf/tests/.gitignore | 1 + tools/power/acpi/.gitignore | 1 + tools/power/cpupower/.gitignore | 1 + tools/power/x86/intel-speed-select/.gitignore | 1 + tools/power/x86/turbostat/.gitignore | 1 + tools/spi/.gitignore | 1 + tools/testing/kunit/.gitignore | 1 + tools/testing/radix-tree/.gitignore | 1 + tools/testing/selftests/.gitignore | 1 + tools/testing/selftests/android/ion/.gitignore | 1 + tools/testing/selftests/arm64/signal/.gitignore | 1 + tools/testing/selftests/arm64/tags/.gitignore | 1 + tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/map_tests/.gitignore | 1 + tools/testing/selftests/bpf/prog_tests/.gitignore | 1 + tools/testing/selftests/bpf/verifier/.gitignore | 1 + tools/testing/selftests/breakpoints/.gitignore | 1 + tools/testing/selftests/capabilities/.gitignore | 1 + tools/testing/selftests/cgroup/.gitignore | 1 + tools/testing/selftests/clone3/.gitignore | 1 + tools/testing/selftests/drivers/.gitignore | 1 + tools/testing/selftests/efivarfs/.gitignore | 1 + tools/testing/selftests/exec/.gitignore | 1 + tools/testing/selftests/filesystems/.gitignore | 1 + tools/testing/selftests/filesystems/binderfs/.gitignore | 1 + tools/testing/selftests/filesystems/epoll/.gitignore | 1 + tools/testing/selftests/ftrace/.gitignore | 1 + tools/testing/selftests/futex/functional/.gitignore | 1 + tools/testing/selftests/gpio/.gitignore | 1 + tools/testing/selftests/ia64/.gitignore | 1 + tools/testing/selftests/intel_pstate/.gitignore | 1 + tools/testing/selftests/ipc/.gitignore | 1 + tools/testing/selftests/ir/.gitignore | 1 + tools/testing/selftests/kcmp/.gitignore | 1 + tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/media_tests/.gitignore | 1 + tools/testing/selftests/membarrier/.gitignore | 1 + tools/testing/selftests/memfd/.gitignore | 1 + tools/testing/selftests/mount/.gitignore | 1 + tools/testing/selftests/mqueue/.gitignore | 1 + tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/forwarding/.gitignore | 1 + tools/testing/selftests/net/mptcp/.gitignore | 1 + tools/testing/selftests/networking/timestamping/.gitignore | 1 + tools/testing/selftests/nsfs/.gitignore | 1 + tools/testing/selftests/openat2/.gitignore | 1 + tools/testing/selftests/pidfd/.gitignore | 1 + tools/testing/selftests/powerpc/alignment/.gitignore | 1 + tools/testing/selftests/powerpc/benchmarks/.gitignore | 1 + tools/testing/selftests/powerpc/cache_shape/.gitignore | 1 + tools/testing/selftests/powerpc/copyloops/.gitignore | 1 + tools/testing/selftests/powerpc/dscr/.gitignore | 1 + tools/testing/selftests/powerpc/math/.gitignore | 1 + tools/testing/selftests/powerpc/mm/.gitignore | 1 + tools/testing/selftests/powerpc/pmu/.gitignore | 1 + tools/testing/selftests/powerpc/pmu/ebb/.gitignore | 1 + tools/testing/selftests/powerpc/primitives/.gitignore | 1 + tools/testing/selftests/powerpc/ptrace/.gitignore | 1 + tools/testing/selftests/powerpc/security/.gitignore | 1 + tools/testing/selftests/powerpc/signal/.gitignore | 1 + tools/testing/selftests/powerpc/stringloops/.gitignore | 1 + tools/testing/selftests/powerpc/switch_endian/.gitignore | 1 + tools/testing/selftests/powerpc/syscalls/.gitignore | 1 + tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/vphn/.gitignore | 1 + tools/testing/selftests/prctl/.gitignore | 1 + tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/pstore/.gitignore | 1 + tools/testing/selftests/ptp/.gitignore | 1 + tools/testing/selftests/ptrace/.gitignore | 1 + tools/testing/selftests/rcutorture/.gitignore | 1 + tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore | 1 + .../selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore | 1 + .../rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore | 1 + tools/testing/selftests/rseq/.gitignore | 1 + tools/testing/selftests/rtc/.gitignore | 1 + tools/testing/selftests/safesetid/.gitignore | 1 + tools/testing/selftests/seccomp/.gitignore | 1 + tools/testing/selftests/sigaltstack/.gitignore | 1 + tools/testing/selftests/size/.gitignore | 1 + tools/testing/selftests/sparc64/drivers/.gitignore | 1 + tools/testing/selftests/splice/.gitignore | 1 + tools/testing/selftests/sync/.gitignore | 1 + tools/testing/selftests/tc-testing/.gitignore | 1 + tools/testing/selftests/timens/.gitignore | 1 + tools/testing/selftests/timers/.gitignore | 1 + tools/testing/selftests/tmpfs/.gitignore | 1 + tools/testing/selftests/vDSO/.gitignore | 1 + tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/watchdog/.gitignore | 1 + tools/testing/selftests/wireguard/qemu/.gitignore | 1 + tools/testing/selftests/x86/.gitignore | 1 + tools/testing/vsock/.gitignore | 1 + tools/thermal/tmon/.gitignore | 1 + tools/usb/.gitignore | 1 + tools/usb/usbip/.gitignore | 1 + tools/virtio/.gitignore | 1 + tools/vm/.gitignore | 1 + usr/.gitignore | 1 + usr/include/.gitignore | 1 + 246 files changed, 246 insertions(+) (limited to 'tools') diff --git a/.gitignore b/.gitignore index 72ef86a5570d..2258e906f01c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # NOTE! Don't add files that are generated in specific # subdirectories here. Add them in the ".gitignore" file diff --git a/Documentation/.gitignore b/Documentation/.gitignore index e74fec8693b2..d6dc7c9b8e25 100644 --- a/Documentation/.gitignore +++ b/Documentation/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only output *.pyc diff --git a/Documentation/devicetree/bindings/.gitignore b/Documentation/devicetree/bindings/.gitignore index ef82fcfcccab..66878559822b 100644 --- a/Documentation/devicetree/bindings/.gitignore +++ b/Documentation/devicetree/bindings/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only *.example.dts processed-schema.yaml diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore index 09b164a5700f..bc74f5643008 100644 --- a/Documentation/vm/.gitignore +++ b/Documentation/vm/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only page-types slabinfo diff --git a/arch/.gitignore b/arch/.gitignore index 741468920320..4191da401dbb 100644 --- a/arch/.gitignore +++ b/arch/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only i386 x86_64 diff --git a/arch/alpha/kernel/.gitignore b/arch/alpha/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/alpha/kernel/.gitignore +++ b/arch/alpha/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore index c4c5fd529c25..675db1494028 100644 --- a/arch/arc/boot/.gitignore +++ b/arch/arc/boot/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only uImage diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/arc/kernel/.gitignore +++ b/arch/arc/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/arm/boot/.gitignore b/arch/arm/boot/.gitignore index ce1c5ff746e7..8c759326baf4 100644 --- a/arch/arm/boot/.gitignore +++ b/arch/arm/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only Image zImage xipImage diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index 86b2f5d28240..db05c6ef3e31 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ashldi3.S bswapsdi2.S font.c diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore index 31e1f538df7d..790e204050ba 100644 --- a/arch/arm/crypto/.gitignore +++ b/arch/arm/crypto/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only aesbs-core.S sha256-core.S sha512-core.S diff --git a/arch/arm/kernel/.gitignore b/arch/arm/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/arm/kernel/.gitignore +++ b/arch/arm/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/arm/mach-at91/.gitignore b/arch/arm/mach-at91/.gitignore index 2ecd6f51c8a9..f6d47389675e 100644 --- a/arch/arm/mach-at91/.gitignore +++ b/arch/arm/mach-at91/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only pm_data-offsets.h diff --git a/arch/arm/mach-omap2/.gitignore b/arch/arm/mach-omap2/.gitignore index 79a8d6ea7152..dc7be7556736 100644 --- a/arch/arm/mach-omap2/.gitignore +++ b/arch/arm/mach-omap2/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only pm-asm-offsets.h diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore index 6b47f6e0b032..dfa06f5365cf 100644 --- a/arch/arm/vdso/.gitignore +++ b/arch/arm/vdso/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds vdso.so.raw vdsomunge diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 8dab0bb6ae66..9a7a9009d43a 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only Image Image.gz diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore index 879df8781ed5..a11a86217ffb 100644 --- a/arch/arm64/crypto/.gitignore +++ b/arch/arm64/crypto/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only sha256-core.S sha512-core.S diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/arm64/kernel/.gitignore +++ b/arch/arm64/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore index f8b69d84238e..652e31d82582 100644 --- a/arch/arm64/kernel/vdso/.gitignore +++ b/arch/arm64/kernel/vdso/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore index 4fea950fa5ed..3542fa24e26b 100644 --- a/arch/arm64/kernel/vdso32/.gitignore +++ b/arch/arm64/kernel/vdso32/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds vdso.so.raw diff --git a/arch/ia64/kernel/.gitignore b/arch/ia64/kernel/.gitignore index 21cb0da5ded8..0374827206e7 100644 --- a/arch/ia64/kernel/.gitignore +++ b/arch/ia64/kernel/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only gate.lds vmlinux.lds diff --git a/arch/m68k/kernel/.gitignore b/arch/m68k/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/m68k/kernel/.gitignore +++ b/arch/m68k/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/microblaze/boot/.gitignore b/arch/microblaze/boot/.gitignore index 679502d64a97..11a9e229f3c0 100644 --- a/arch/microblaze/boot/.gitignore +++ b/arch/microblaze/boot/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only linux.bin* simpleImage.* diff --git a/arch/microblaze/kernel/.gitignore b/arch/microblaze/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/microblaze/kernel/.gitignore +++ b/arch/microblaze/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/mips/boot/.gitignore b/arch/mips/boot/.gitignore index a73d6e2c4f64..2adc8581a175 100644 --- a/arch/mips/boot/.gitignore +++ b/arch/mips/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only mkboot elf2ecoff vmlinux.* diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore index ebae133f1d00..d358395614c9 100644 --- a/arch/mips/boot/compressed/.gitignore +++ b/arch/mips/boot/compressed/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only ashldi3.c bswapsi.c diff --git a/arch/mips/boot/tools/.gitignore b/arch/mips/boot/tools/.gitignore index be0ed065249b..d36dc7cf9115 100644 --- a/arch/mips/boot/tools/.gitignore +++ b/arch/mips/boot/tools/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only relocs diff --git a/arch/mips/kernel/.gitignore b/arch/mips/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/mips/kernel/.gitignore +++ b/arch/mips/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore index b0209450d9ff..794817dfb389 100644 --- a/arch/mips/tools/.gitignore +++ b/arch/mips/tools/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only elf-entry loongson3-llsc-check diff --git a/arch/mips/vdso/.gitignore b/arch/mips/vdso/.gitignore index 5286a7d73d79..1f43f6dd8142 100644 --- a/arch/mips/vdso/.gitignore +++ b/arch/mips/vdso/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.so* vdso-*image.c genvdso diff --git a/arch/nds32/kernel/.gitignore b/arch/nds32/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/nds32/kernel/.gitignore +++ b/arch/nds32/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/nds32/kernel/vdso/.gitignore b/arch/nds32/kernel/vdso/.gitignore index f8b69d84238e..652e31d82582 100644 --- a/arch/nds32/kernel/vdso/.gitignore +++ b/arch/nds32/kernel/vdso/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds diff --git a/arch/nios2/boot/.gitignore b/arch/nios2/boot/.gitignore index 64386a8dedd8..ef37cac5bcc0 100644 --- a/arch/nios2/boot/.gitignore +++ b/arch/nios2/boot/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmImage diff --git a/arch/nios2/kernel/.gitignore b/arch/nios2/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/nios2/kernel/.gitignore +++ b/arch/nios2/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/openrisc/kernel/.gitignore b/arch/openrisc/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/openrisc/kernel/.gitignore +++ b/arch/openrisc/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/parisc/boot/.gitignore b/arch/parisc/boot/.gitignore index 017d5912ad2d..adf2ae0e7eda 100644 --- a/arch/parisc/boot/.gitignore +++ b/arch/parisc/boot/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only image bzImage diff --git a/arch/parisc/boot/compressed/.gitignore b/arch/parisc/boot/compressed/.gitignore index 926cd41c1069..b9853a356ab2 100644 --- a/arch/parisc/boot/compressed/.gitignore +++ b/arch/parisc/boot/compressed/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only firmware.c real2.S sizes.h diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/parisc/kernel/.gitignore +++ b/arch/parisc/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 6610665fcf5e..1eee61b82341 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only addnote decompress_inflate.c empty.c diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore index 67ebd3003c05..d71179d3ffe9 100644 --- a/arch/powerpc/kernel/.gitignore +++ b/arch/powerpc/kernel/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only prom_init_check vmlinux.lds diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso32/.gitignore index fea5809857a5..824b863ec6bd 100644 --- a/arch/powerpc/kernel/vdso32/.gitignore +++ b/arch/powerpc/kernel/vdso32/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso32.lds vdso32.so.dbg diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore index 77a0b423642c..84151a7ba31d 100644 --- a/arch/powerpc/kernel/vdso64/.gitignore +++ b/arch/powerpc/kernel/vdso64/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso64.lds vdso64.so.dbg diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore index a09ee8d84d6c..5f3eb224f653 100644 --- a/arch/powerpc/platforms/cell/spufs/.gitignore +++ b/arch/powerpc/platforms/cell/spufs/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only spu_save_dump.h spu_restore_dump.h diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore index e9e66f178a6d..b8dc6ff34254 100644 --- a/arch/powerpc/purgatory/.gitignore +++ b/arch/powerpc/purgatory/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only kexec-purgatory.c purgatory.ro diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore index 8a45a37d2af4..574c10f8ff68 100644 --- a/arch/riscv/boot/.gitignore +++ b/arch/riscv/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only Image Image.gz loader diff --git a/arch/riscv/kernel/.gitignore b/arch/riscv/kernel/.gitignore index b51634f6a7cd..e052ed331cc1 100644 --- a/arch/riscv/kernel/.gitignore +++ b/arch/riscv/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /vmlinux.lds diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore index 97c2d69d0289..11ebee9e4c1d 100644 --- a/arch/riscv/kernel/vdso/.gitignore +++ b/arch/riscv/kernel/vdso/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds *.tmp diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index 16ff906e4610..b265bfede188 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only image bzImage section_cmp.* diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index e72fcd7ecebb..765a08f1bd77 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux vmlinux.lds diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/s390/kernel/.gitignore +++ b/arch/s390/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore index 3fd18cf9fec2..4ec80685fecc 100644 --- a/arch/s390/kernel/vdso64/.gitignore +++ b/arch/s390/kernel/vdso64/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso64.lds diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore index c82157f46b18..97ca52779457 100644 --- a/arch/s390/purgatory/.gitignore +++ b/arch/s390/purgatory/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only purgatory purgatory.chk purgatory.lds diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore index 71bd6f8eebaf..ea62f37b79ef 100644 --- a/arch/s390/tools/.gitignore +++ b/arch/s390/tools/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only gen_facilities gen_opcode_table diff --git a/arch/sh/boot/.gitignore b/arch/sh/boot/.gitignore index f50fdd9975c5..6603bbbc917d 100644 --- a/arch/sh/boot/.gitignore +++ b/arch/sh/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only zImage vmlinux* uImage* diff --git a/arch/sh/boot/compressed/.gitignore b/arch/sh/boot/compressed/.gitignore index edff113f1b85..37aa53057369 100644 --- a/arch/sh/boot/compressed/.gitignore +++ b/arch/sh/boot/compressed/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ashiftrt.S ashldi3.c ashlsi3.S diff --git a/arch/sh/kernel/.gitignore b/arch/sh/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/sh/kernel/.gitignore +++ b/arch/sh/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/sh/kernel/vsyscall/.gitignore b/arch/sh/kernel/vsyscall/.gitignore index 40836ad9079c..530a3031a88d 100644 --- a/arch/sh/kernel/vsyscall/.gitignore +++ b/arch/sh/kernel/vsyscall/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vsyscall.lds diff --git a/arch/sparc/boot/.gitignore b/arch/sparc/boot/.gitignore index fc6f3986c76c..f3d8569a21d1 100644 --- a/arch/sparc/boot/.gitignore +++ b/arch/sparc/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only btfix.S btfixupprep image diff --git a/arch/sparc/kernel/.gitignore b/arch/sparc/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/sparc/kernel/.gitignore +++ b/arch/sparc/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/arch/sparc/vdso/.gitignore b/arch/sparc/vdso/.gitignore index ef925b998222..8d4ebc990bf3 100644 --- a/arch/sparc/vdso/.gitignore +++ b/arch/sparc/vdso/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds vdso-image-*.c vdso2c diff --git a/arch/sparc/vdso/vdso32/.gitignore b/arch/sparc/vdso/vdso32/.gitignore index e45fba9d0ced..5167384843b9 100644 --- a/arch/sparc/vdso/vdso32/.gitignore +++ b/arch/sparc/vdso/vdso32/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso32.lds diff --git a/arch/um/.gitignore b/arch/um/.gitignore index a73d3a1cc746..6323e5571887 100644 --- a/arch/um/.gitignore +++ b/arch/um/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only kernel/config.c kernel/config.tmp kernel/vmlinux.lds diff --git a/arch/unicore32/.gitignore b/arch/unicore32/.gitignore index 947e99c2a957..e82f3fb57ba0 100644 --- a/arch/unicore32/.gitignore +++ b/arch/unicore32/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Generated include files # diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore index 5a82bac5e0bc..677111acbaa3 100644 --- a/arch/x86/.gitignore +++ b/arch/x86/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only boot/compressed/vmlinux tools/test_get_len tools/insn_sanity diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 09d25dd09307..9cc7f1357b9b 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only bootsect bzImage cpustr.h diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index 4a46fab7162e..25805199a506 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only relocs vmlinux.bin.all vmlinux.relocs diff --git a/arch/x86/boot/tools/.gitignore b/arch/x86/boot/tools/.gitignore index 378eac25d311..ae91f4d0d78b 100644 --- a/arch/x86/boot/tools/.gitignore +++ b/arch/x86/boot/tools/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only build diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore index 30be0400a439..580c839bb177 100644 --- a/arch/x86/crypto/.gitignore +++ b/arch/x86/crypto/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only poly1305-x86_64-cryptogams.S diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore index aae8ffdd5880..37a6129d597b 100644 --- a/arch/x86/entry/vdso/.gitignore +++ b/arch/x86/entry/vdso/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds vdsox32.lds vdso32-syscall-syms.lds diff --git a/arch/x86/entry/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore index e45fba9d0ced..5167384843b9 100644 --- a/arch/x86/entry/vdso/vdso32/.gitignore +++ b/arch/x86/entry/vdso/vdso32/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso32.lds diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore index 08f4fd731469..ef66569e7e22 100644 --- a/arch/x86/kernel/.gitignore +++ b/arch/x86/kernel/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only vsyscall.lds vsyscall_32.lds vmlinux.lds diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore index 667df55a4399..0bca7ef7426a 100644 --- a/arch/x86/kernel/cpu/.gitignore +++ b/arch/x86/kernel/cpu/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only capflags.c diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore index 8df89f0a3fe6..8ae0f93ecbfd 100644 --- a/arch/x86/lib/.gitignore +++ b/arch/x86/lib/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only inat-tables.c diff --git a/arch/x86/realmode/rm/.gitignore b/arch/x86/realmode/rm/.gitignore index b6ed3a2555cb..6c3464f46166 100644 --- a/arch/x86/realmode/rm/.gitignore +++ b/arch/x86/realmode/rm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only pasyms.h realmode.lds realmode.relocs diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore index be0ed065249b..d36dc7cf9115 100644 --- a/arch/x86/tools/.gitignore +++ b/arch/x86/tools/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only relocs diff --git a/arch/x86/um/vdso/.gitignore b/arch/x86/um/vdso/.gitignore index f8b69d84238e..652e31d82582 100644 --- a/arch/x86/um/vdso/.gitignore +++ b/arch/x86/um/vdso/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds diff --git a/arch/xtensa/boot/.gitignore b/arch/xtensa/boot/.gitignore index 38177c7ebcab..615f1f741a03 100644 --- a/arch/xtensa/boot/.gitignore +++ b/arch/xtensa/boot/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only uImage zImage.redboot diff --git a/arch/xtensa/boot/boot-elf/.gitignore b/arch/xtensa/boot/boot-elf/.gitignore index 5ff8fbb8561b..7473404500cc 100644 --- a/arch/xtensa/boot/boot-elf/.gitignore +++ b/arch/xtensa/boot/boot-elf/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only boot.lds diff --git a/arch/xtensa/boot/lib/.gitignore b/arch/xtensa/boot/lib/.gitignore index 1629a6167755..805a8249252a 100644 --- a/arch/xtensa/boot/lib/.gitignore +++ b/arch/xtensa/boot/lib/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only inffast.c inflate.c inftrees.c diff --git a/arch/xtensa/kernel/.gitignore b/arch/xtensa/kernel/.gitignore index c5f676c3c224..bbb90f92d051 100644 --- a/arch/xtensa/kernel/.gitignore +++ b/arch/xtensa/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vmlinux.lds diff --git a/certs/.gitignore b/certs/.gitignore index 4d58ba042b37..2a2483990686 100644 --- a/certs/.gitignore +++ b/certs/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only x509_certificate_list diff --git a/drivers/atm/.gitignore b/drivers/atm/.gitignore index 19f3ffbd1d65..ddd374e91965 100644 --- a/drivers/atm/.gitignore +++ b/drivers/atm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only fore200e_mkfirm fore200e_pca_fw.c pca200e.bin diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore index af4a7ce4738d..7aa71d83f739 100644 --- a/drivers/crypto/vmx/.gitignore +++ b/drivers/crypto/vmx/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only aesp8-ppc.S ghashp8-ppc.S diff --git a/drivers/eisa/.gitignore b/drivers/eisa/.gitignore index 4b335c0aedb0..7d0a2ad5abe2 100644 --- a/drivers/eisa/.gitignore +++ b/drivers/eisa/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only devlist.h diff --git a/drivers/gpu/drm/i915/.gitignore b/drivers/gpu/drm/i915/.gitignore index d9a77f3b59b2..81972dce1aff 100644 --- a/drivers/gpu/drm/i915/.gitignore +++ b/drivers/gpu/drm/i915/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only *.hdrtest diff --git a/drivers/gpu/drm/radeon/.gitignore b/drivers/gpu/drm/radeon/.gitignore index 403eb3a5891f..9c1a94153983 100644 --- a/drivers/gpu/drm/radeon/.gitignore +++ b/drivers/gpu/drm/radeon/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only mkregtable *_reg_safe.h diff --git a/drivers/memory/.gitignore b/drivers/memory/.gitignore index cbca8b028437..caedc4c7d2db 100644 --- a/drivers/memory/.gitignore +++ b/drivers/memory/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only ti-emif-asm-offsets.h diff --git a/drivers/net/wan/.gitignore b/drivers/net/wan/.gitignore index dae3ea6bb18c..247bfbf10912 100644 --- a/drivers/net/wan/.gitignore +++ b/drivers/net/wan/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only wanxlfw.inc diff --git a/drivers/scsi/.gitignore b/drivers/scsi/.gitignore index e2956741fbd1..5f65cb75f534 100644 --- a/drivers/scsi/.gitignore +++ b/drivers/scsi/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only 53c700_d.h scsi_devinfo_tbl.c diff --git a/drivers/scsi/aic7xxx/.gitignore b/drivers/scsi/aic7xxx/.gitignore index b8ee24d5748a..9aa780221718 100644 --- a/drivers/scsi/aic7xxx/.gitignore +++ b/drivers/scsi/aic7xxx/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only aic79xx_reg.h aic79xx_reg_print.c aic79xx_seq.h diff --git a/drivers/staging/comedi/drivers/ni_routing/tools/.gitignore b/drivers/staging/comedi/drivers/ni_routing/tools/.gitignore index ef38008280a9..e3ebffcd900e 100644 --- a/drivers/staging/comedi/drivers/ni_routing/tools/.gitignore +++ b/drivers/staging/comedi/drivers/ni_routing/tools/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only comedi_h.py *.pyc ni_values.py diff --git a/drivers/staging/greybus/tools/.gitignore b/drivers/staging/greybus/tools/.gitignore index 023654c83068..1fd364aba774 100644 --- a/drivers/staging/greybus/tools/.gitignore +++ b/drivers/staging/greybus/tools/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only loopback_test diff --git a/drivers/video/logo/.gitignore b/drivers/video/logo/.gitignore index 1551a75afdbd..5311d207c0b9 100644 --- a/drivers/video/logo/.gitignore +++ b/drivers/video/logo/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *_mono.c *_vga16.c *_clut224.c diff --git a/drivers/zorro/.gitignore b/drivers/zorro/.gitignore index 34f980bd8ff6..acd6ffb8d77d 100644 --- a/drivers/zorro/.gitignore +++ b/drivers/zorro/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only devlist.h gen-devlist diff --git a/fs/unicode/.gitignore b/fs/unicode/.gitignore index 0381e2221480..9b2467e77b2d 100644 --- a/fs/unicode/.gitignore +++ b/fs/unicode/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only mkutf8data utf8data.h diff --git a/kernel/.gitignore b/kernel/.gitignore index 0a423a3ca2e1..78701ea37c97 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only kheaders.md5 timeconst.h hz.bc diff --git a/kernel/debug/kdb/.gitignore b/kernel/debug/kdb/.gitignore index 396d12eda9e8..df259542a236 100644 --- a/kernel/debug/kdb/.gitignore +++ b/kernel/debug/kdb/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only gen-kdb_cmds.c diff --git a/lib/.gitignore b/lib/.gitignore index 9af73655a239..327cb2c7f2c9 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only gen_crc32table gen_crc64table crc32table.h diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index 3de0d8921286..6be57745afd1 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only mktables altivec*.c int*.c diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore index e97084e3eea2..f34e85ee8204 100644 --- a/net/bpfilter/.gitignore +++ b/net/bpfilter/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only bpfilter_umh diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore index 61cbc304a3d3..1a29cd69d6cf 100644 --- a/net/wireless/.gitignore +++ b/net/wireless/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only shipped-certs.c extra-certs.c diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore index 7af222860a96..2ed744c0e741 100644 --- a/samples/auxdisplay/.gitignore +++ b/samples/auxdisplay/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only cfag12864b-example diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 74d31fd3c99c..23837f2ed458 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only cpustat fds_example hbm diff --git a/samples/connector/.gitignore b/samples/connector/.gitignore index d2b9c32accd4..d86f2ff9c947 100644 --- a/samples/connector/.gitignore +++ b/samples/connector/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only ucon diff --git a/samples/hidraw/.gitignore b/samples/hidraw/.gitignore index 05e51a685242..d7a6074ebcf9 100644 --- a/samples/hidraw/.gitignore +++ b/samples/hidraw/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only hid-example diff --git a/samples/mei/.gitignore b/samples/mei/.gitignore index f356b81ca1ec..db5e802f041e 100644 --- a/samples/mei/.gitignore +++ b/samples/mei/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only mei-amt-version diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore index 8b7c72f07c92..aa03f1eb37a0 100644 --- a/samples/mic/mpssd/.gitignore +++ b/samples/mic/mpssd/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only mpssd diff --git a/samples/pidfd/.gitignore b/samples/pidfd/.gitignore index be52b3ba6e4b..eea857fca736 100644 --- a/samples/pidfd/.gitignore +++ b/samples/pidfd/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only pidfd-metadata diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore index d1e2e817d556..4a5a5b7db30b 100644 --- a/samples/seccomp/.gitignore +++ b/samples/seccomp/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only bpf-direct bpf-fancy dropper diff --git a/samples/timers/.gitignore b/samples/timers/.gitignore index c5c45d7ec0df..40510c33cf08 100644 --- a/samples/timers/.gitignore +++ b/samples/timers/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only hpet_example diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore index 0806eb0be62d..8fdabf7e5373 100644 --- a/samples/vfs/.gitignore +++ b/samples/vfs/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only test-fsmount test-statx diff --git a/samples/watchdog/.gitignore b/samples/watchdog/.gitignore index ff0ebb540333..74153b831244 100644 --- a/samples/watchdog/.gitignore +++ b/samples/watchdog/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only watchdog-simple diff --git a/scripts/.gitignore b/scripts/.gitignore index 9fe29efbcb95..0d1c8e217cd7 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only bin2c kallsyms unifdef diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore index a776371a3502..98ae1f509592 100644 --- a/scripts/basic/.gitignore +++ b/scripts/basic/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only fixdep diff --git a/scripts/dtc/.gitignore b/scripts/dtc/.gitignore index 2e6e60d64ede..b814e6076bdb 100644 --- a/scripts/dtc/.gitignore +++ b/scripts/dtc/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only dtc diff --git a/scripts/gcc-plugins/.gitignore b/scripts/gcc-plugins/.gitignore index de92ed9e3d83..b04e0f0f033e 100644 --- a/scripts/gcc-plugins/.gitignore +++ b/scripts/gcc-plugins/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only randomize_layout_seed.h diff --git a/scripts/gdb/linux/.gitignore b/scripts/gdb/linux/.gitignore index 2573543842d0..43234cbcb529 100644 --- a/scripts/gdb/linux/.gitignore +++ b/scripts/gdb/linux/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.pyc *.pyo constants.py diff --git a/scripts/genksyms/.gitignore b/scripts/genksyms/.gitignore index b119c7da2863..999af710f83d 100644 --- a/scripts/genksyms/.gitignore +++ b/scripts/genksyms/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only genksyms diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore index 588988711e07..12a67fdab541 100644 --- a/scripts/kconfig/.gitignore +++ b/scripts/kconfig/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.moc *conf-cfg diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore index 3bd11b603173..07e4a39f90a6 100644 --- a/scripts/mod/.gitignore +++ b/scripts/mod/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only elfconfig.h mk_elfconfig modpost diff --git a/scripts/selinux/genheaders/.gitignore b/scripts/selinux/genheaders/.gitignore index 4c0b646ff8d5..5fcadd307908 100644 --- a/scripts/selinux/genheaders/.gitignore +++ b/scripts/selinux/genheaders/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only genheaders diff --git a/scripts/selinux/mdp/.gitignore b/scripts/selinux/mdp/.gitignore index 0d9f827dc14b..a7482287e77f 100644 --- a/scripts/selinux/mdp/.gitignore +++ b/scripts/selinux/mdp/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only mdp diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index 0ace1d1dec44..6d1eb1c15c18 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only net_names.h capability_names.h rlim_names.h diff --git a/security/selinux/.gitignore b/security/selinux/.gitignore index 2e5040a3d48b..168fae13ca5a 100644 --- a/security/selinux/.gitignore +++ b/security/selinux/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only av_permissions.h flask.h diff --git a/security/tomoyo/.gitignore b/security/tomoyo/.gitignore index dc0f220a210b..9f300cdce362 100644 --- a/security/tomoyo/.gitignore +++ b/security/tomoyo/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only builtin-policy.h policy/*.conf diff --git a/sound/oss/.gitignore b/sound/oss/.gitignore index 8fd8fd3eff62..ac678430408b 100644 --- a/sound/oss/.gitignore +++ b/sound/oss/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only pss_boot.h trix_boot.h diff --git a/tools/accounting/.gitignore b/tools/accounting/.gitignore index 86485203c4ae..c45fb4ed4309 100644 --- a/tools/accounting/.gitignore +++ b/tools/accounting/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only getdelays diff --git a/tools/bootconfig/.gitignore b/tools/bootconfig/.gitignore index e7644dfaa4a7..b77513cae685 100644 --- a/tools/bootconfig/.gitignore +++ b/tools/bootconfig/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only bootconfig diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore index 59024197e71d..cf53342175e7 100644 --- a/tools/bpf/.gitignore +++ b/tools/bpf/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only FEATURE-DUMP.bpf feature bpf_asm diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index b13926432b84..5c232659a98b 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.d /bpftool bpftool*.8 diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore index 90a456a2a72f..ffdb70230c8b 100644 --- a/tools/bpf/runqslower/.gitignore +++ b/tools/bpf/runqslower/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /.output diff --git a/tools/build/.gitignore b/tools/build/.gitignore index a776371a3502..98ae1f509592 100644 --- a/tools/build/.gitignore +++ b/tools/build/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only fixdep diff --git a/tools/build/feature/.gitignore b/tools/build/feature/.gitignore index 09b335b98842..15fcd34acdb9 100644 --- a/tools/build/feature/.gitignore +++ b/tools/build/feature/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.d *.bin *.output diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore index 633cd9b874f9..46a82775f2ca 100644 --- a/tools/cgroup/.gitignore +++ b/tools/cgroup/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only cgroup_event_listener diff --git a/tools/gpio/.gitignore b/tools/gpio/.gitignore index a94c0e83b209..d0a66c48865c 100644 --- a/tools/gpio/.gitignore +++ b/tools/gpio/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only gpio-event-mon gpio-hammer lsgpio diff --git a/tools/iio/.gitignore b/tools/iio/.gitignore index 3758202618bd..5bd6f4df98b7 100644 --- a/tools/iio/.gitignore +++ b/tools/iio/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only iio_event_monitor iio_generic_buffer lsiio diff --git a/tools/laptop/dslm/.gitignore b/tools/laptop/dslm/.gitignore index 9fc984e64386..f7f1296b96ae 100644 --- a/tools/laptop/dslm/.gitignore +++ b/tools/laptop/dslm/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only dslm diff --git a/tools/leds/.gitignore b/tools/leds/.gitignore index ac96d9f53dfc..06bd3ee1b7c9 100644 --- a/tools/leds/.gitignore +++ b/tools/leds/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only uledmon diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index e97c2ebcf447..8a81b3679d2b 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only libbpf_version.h libbpf.pc FEATURE-DUMP.libbpf diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore index cc0e7a9f99e3..6c308ac4388c 100644 --- a/tools/lib/lockdep/.gitignore +++ b/tools/lib/lockdep/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only liblockdep.so.* diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore index 9e9f25fb1922..7123c70b9ebc 100644 --- a/tools/lib/traceevent/.gitignore +++ b/tools/lib/traceevent/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only TRACEEVENT-CFLAGS libtraceevent-dynamic-list libtraceevent.so.* diff --git a/tools/memory-model/.gitignore b/tools/memory-model/.gitignore index b1d34c52f3c3..cf4cd66d8fbf 100644 --- a/tools/memory-model/.gitignore +++ b/tools/memory-model/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only litmus diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore index 6e2ddc54152f..c492a1ddad91 100644 --- a/tools/memory-model/litmus-tests/.gitignore +++ b/tools/memory-model/litmus-tests/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only *.litmus.out diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore index 914cff12899b..45cefda24c7b 100644 --- a/tools/objtool/.gitignore +++ b/tools/objtool/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only arch/x86/lib/inat-tables.c objtool fixdep diff --git a/tools/pcmcia/.gitignore b/tools/pcmcia/.gitignore index 53d081336757..94cb97b77f06 100644 --- a/tools/pcmcia/.gitignore +++ b/tools/pcmcia/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only crc32hash diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index bf1252dc2cb0..f3f84781fd74 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only PERF-CFLAGS PERF-GUI-VARS PERF-VERSION-FILE diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore index 8cc30e731c73..d053b325f728 100644 --- a/tools/perf/tests/.gitignore +++ b/tools/perf/tests/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only llvm-src-base.c llvm-src-kbuild.c llvm-src-prologue.c diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore index f698a0e5bfa6..0b319fc8bb17 100644 --- a/tools/power/acpi/.gitignore +++ b/tools/power/acpi/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only /acpidbg /acpidump /ec diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore index 1f9977cc609c..7677329c42a6 100644 --- a/tools/power/cpupower/.gitignore +++ b/tools/power/cpupower/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only .libs libcpupower.so libcpupower.so.* diff --git a/tools/power/x86/intel-speed-select/.gitignore b/tools/power/x86/intel-speed-select/.gitignore index f61145925ce9..a814f89fe75f 100644 --- a/tools/power/x86/intel-speed-select/.gitignore +++ b/tools/power/x86/intel-speed-select/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only include/ intel-speed-select diff --git a/tools/power/x86/turbostat/.gitignore b/tools/power/x86/turbostat/.gitignore index 7521370d3568..e13109b43cd1 100644 --- a/tools/power/x86/turbostat/.gitignore +++ b/tools/power/x86/turbostat/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only turbostat diff --git a/tools/spi/.gitignore b/tools/spi/.gitignore index 4280576397e8..14ddba3d2195 100644 --- a/tools/spi/.gitignore +++ b/tools/spi/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only spidev_fdx spidev_test diff --git a/tools/testing/kunit/.gitignore b/tools/testing/kunit/.gitignore index c791ff59a37a..1c63e31f7edf 100644 --- a/tools/testing/kunit/.gitignore +++ b/tools/testing/kunit/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] \ No newline at end of file diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore index 3834899b6693..d971516401e6 100644 --- a/tools/testing/radix-tree/.gitignore +++ b/tools/testing/radix-tree/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only generated/map-shift.h idr.c idr-test diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index 61df01cdf0b2..ac0505cef4ae 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only gpiogpio-event-mon gpiogpio-hammer gpioinclude/ diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore index 95e8f4561474..78eae9972bb1 100644 --- a/tools/testing/selftests/android/ion/.gitignore +++ b/tools/testing/selftests/android/ion/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ionapp_export ionapp_import ionmap_test diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index 3c5b4e8ff894..78c902045ca7 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only mangle_* fake_sigreturn_* !*.[ch] diff --git a/tools/testing/selftests/arm64/tags/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore index e8fae8d61ed6..f4f6c5112463 100644 --- a/tools/testing/selftests/arm64/tags/.gitignore +++ b/tools/testing/selftests/arm64/tags/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only tags_test diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index ec464859c6b6..e759d7eb1297 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only test_verifier test_maps test_lru_map diff --git a/tools/testing/selftests/bpf/map_tests/.gitignore b/tools/testing/selftests/bpf/map_tests/.gitignore index 45984a364647..89c4a3d37544 100644 --- a/tools/testing/selftests/bpf/map_tests/.gitignore +++ b/tools/testing/selftests/bpf/map_tests/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only tests.h diff --git a/tools/testing/selftests/bpf/prog_tests/.gitignore b/tools/testing/selftests/bpf/prog_tests/.gitignore index 45984a364647..89c4a3d37544 100644 --- a/tools/testing/selftests/bpf/prog_tests/.gitignore +++ b/tools/testing/selftests/bpf/prog_tests/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only tests.h diff --git a/tools/testing/selftests/bpf/verifier/.gitignore b/tools/testing/selftests/bpf/verifier/.gitignore index 45984a364647..89c4a3d37544 100644 --- a/tools/testing/selftests/bpf/verifier/.gitignore +++ b/tools/testing/selftests/bpf/verifier/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only tests.h diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore index a23bb4a6f06c..def2e97dab9a 100644 --- a/tools/testing/selftests/breakpoints/.gitignore +++ b/tools/testing/selftests/breakpoints/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only breakpoint_test step_after_suspend_test diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore index b732dd0d4738..426d9adca67c 100644 --- a/tools/testing/selftests/capabilities/.gitignore +++ b/tools/testing/selftests/capabilities/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only test_execve validate_cap diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 7f9835624793..aa6de65b0838 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only test_memcontrol test_core test_freezer diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore index 0dc4f32c6cb8..a81085742d40 100644 --- a/tools/testing/selftests/clone3/.gitignore +++ b/tools/testing/selftests/clone3/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only clone3 clone3_clear_sighand clone3_set_tid diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore index f6aebcc27b76..ca74f2e1c719 100644 --- a/tools/testing/selftests/drivers/.gitignore +++ b/tools/testing/selftests/drivers/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /dma-buf/udmabuf diff --git a/tools/testing/selftests/efivarfs/.gitignore b/tools/testing/selftests/efivarfs/.gitignore index 33618493562b..807407f7f58b 100644 --- a/tools/testing/selftests/efivarfs/.gitignore +++ b/tools/testing/selftests/efivarfs/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only create-read open-unlink diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index b02279da6fa1..c078ece12ff0 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir* script* execveat diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 8449cf6716ce..f0c0ff20d6cf 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore index 8a5d9bf63dd4..8e5cf9084894 100644 --- a/tools/testing/selftests/filesystems/binderfs/.gitignore +++ b/tools/testing/selftests/filesystems/binderfs/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only binderfs_test diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore index 9ae8db44ec14..9090157258b1 100644 --- a/tools/testing/selftests/filesystems/epoll/.gitignore +++ b/tools/testing/selftests/filesystems/epoll/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only epoll_wakeup_test diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore index 98d8a5a63049..2659417cb2c7 100644 --- a/tools/testing/selftests/ftrace/.gitignore +++ b/tools/testing/selftests/ftrace/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only logs diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index a09f57061902..0efcd494daab 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only futex_requeue_pi futex_requeue_pi_mismatched_ops futex_requeue_pi_signal_restart diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore index 7d14f743d1a4..4c69408f3e84 100644 --- a/tools/testing/selftests/gpio/.gitignore +++ b/tools/testing/selftests/gpio/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only gpio-mockup-chardev diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore index ab806edc8732..e962fb2a08d5 100644 --- a/tools/testing/selftests/ia64/.gitignore +++ b/tools/testing/selftests/ia64/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only aliasing-test diff --git a/tools/testing/selftests/intel_pstate/.gitignore b/tools/testing/selftests/intel_pstate/.gitignore index 3bfcbae5fa13..862de222a3f3 100644 --- a/tools/testing/selftests/intel_pstate/.gitignore +++ b/tools/testing/selftests/intel_pstate/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only aperf msr diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore index 9af04c9353c0..9ed280e4c704 100644 --- a/tools/testing/selftests/ipc/.gitignore +++ b/tools/testing/selftests/ipc/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only msgque_test msgque diff --git a/tools/testing/selftests/ir/.gitignore b/tools/testing/selftests/ir/.gitignore index 070ea0c75fb8..0bbada8c1811 100644 --- a/tools/testing/selftests/ir/.gitignore +++ b/tools/testing/selftests/ir/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only ir_loopback diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore index 5a9b3732b2de..38ccdfe80ef7 100644 --- a/tools/testing/selftests/kcmp/.gitignore +++ b/tools/testing/selftests/kcmp/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only kcmp_test kcmp-test-file diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 30072c3f52fb..2f85dc944fbd 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only /s390x/sync_regs_test /s390x/memop /x86_64/cr4_cpuid_sync_test diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore index 8745eba39012..da438e780ffe 100644 --- a/tools/testing/selftests/media_tests/.gitignore +++ b/tools/testing/selftests/media_tests/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only media_device_test media_device_open video_device_test diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore index f2f7ec0a99b4..f2fbba178601 100644 --- a/tools/testing/selftests/membarrier/.gitignore +++ b/tools/testing/selftests/membarrier/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only membarrier_test_multi_thread membarrier_test_single_thread diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore index afe87c40ac80..dd9a051f608e 100644 --- a/tools/testing/selftests/memfd/.gitignore +++ b/tools/testing/selftests/memfd/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only fuse_mnt fuse_test memfd_test diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore index 856ad4107eb3..0bc64a6d4c18 100644 --- a/tools/testing/selftests/mount/.gitignore +++ b/tools/testing/selftests/mount/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only unprivileged-remount-test diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore index d8d42377205a..72ad8ca691c9 100644 --- a/tools/testing/selftests/mqueue/.gitignore +++ b/tools/testing/selftests/mqueue/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ecc52d4c034d..08ad1a7109e2 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only msg_zerocopy socket psock_fanout diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore index a793eef5b876..2dea317f12e7 100644 --- a/tools/testing/selftests/net/forwarding/.gitignore +++ b/tools/testing/selftests/net/forwarding/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only forwarding.config diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index d72f07642738..beea6541fb21 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only mptcp_connect *.pcap diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore index d9355035e746..f4f031db8bbf 100644 --- a/tools/testing/selftests/networking/timestamping/.gitignore +++ b/tools/testing/selftests/networking/timestamping/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only timestamping rxtimestamp txtimestamp diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/nsfs/.gitignore index 2ab2c824ce86..ed79ebdf286e 100644 --- a/tools/testing/selftests/nsfs/.gitignore +++ b/tools/testing/selftests/nsfs/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only owner pidns diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/openat2/.gitignore index bd68f6c3fd07..82a4846cbc4b 100644 --- a/tools/testing/selftests/openat2/.gitignore +++ b/tools/testing/selftests/openat2/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /*_test diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 39559d723c41..2d4db5afb142 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only pidfd_open_test pidfd_poll_test pidfd_test diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore index 6d4fd014511c..28bc6ca13cc6 100644 --- a/tools/testing/selftests/powerpc/alignment/.gitignore +++ b/tools/testing/selftests/powerpc/alignment/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only copy_first_unaligned alignment_handler diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore index 9161679b1e1a..c9ce13983c99 100644 --- a/tools/testing/selftests/powerpc/benchmarks/.gitignore +++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only gettimeofday context_switch fork diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore index ec1848434be5..b385eee3012c 100644 --- a/tools/testing/selftests/powerpc/cache_shape/.gitignore +++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only cache_shape diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 12ef5b031974..ddaf140b8255 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore index b585c6c1564a..1d08b15af697 100644 --- a/tools/testing/selftests/powerpc/dscr/.gitignore +++ b/tools/testing/selftests/powerpc/dscr/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only dscr_default_test dscr_explicit_test dscr_inherit_exec_test diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index 50ded63e25b7..e31ca6f453ed 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only fpu_syscall vmx_syscall fpu_preempt diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 0ebeaea22641..7cf7ad261d02 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only hugetlb_vs_thp_test subpage_prot tempfile diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore index e748f336eed3..ff7896903d7b 100644 --- a/tools/testing/selftests/powerpc/pmu/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only count_instructions l3_bank_test per_event_excludes diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore index 42bddbed8b64..2920fb39439b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only reg_access_test event_attributes_test cycles_test diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore index 4cc4e31bed1d..1e5c04e24254 100644 --- a/tools/testing/selftests/powerpc/primitives/.gitignore +++ b/tools/testing/selftests/powerpc/primitives/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only load_unaligned_zeropad diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index dce19f221c46..0e96150b7c7e 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 0b969fba3beb..f795e06f5ae3 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only rfi_flush diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index dca5852a1546..f897b55a44dd 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only signal signal_tm sigfuz diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore index 31a17e0ba884..b0dfc74aa57e 100644 --- a/tools/testing/selftests/powerpc/stringloops/.gitignore +++ b/tools/testing/selftests/powerpc/stringloops/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only memcmp_64 memcmp_32 strlen diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore index 89e762eab676..30e962cf84d1 100644 --- a/tools/testing/selftests/powerpc/switch_endian/.gitignore +++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only switch_endian_test check-reversed.S diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore index f0f3fcc9d802..b00cab225476 100644 --- a/tools/testing/selftests/powerpc/syscalls/.gitignore +++ b/tools/testing/selftests/powerpc/syscalls/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only ipc_unmuxed diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 98f2708d86cc..7baf2a46002f 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only tm-resched-dscr tm-syscall tm-signal-msr-resv diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore index 7c04395010cb..b744aedfd1f2 100644 --- a/tools/testing/selftests/powerpc/vphn/.gitignore +++ b/tools/testing/selftests/powerpc/vphn/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only test-vphn diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore index 0b5c27447bf6..91af2b631bc9 100644 --- a/tools/testing/selftests/prctl/.gitignore +++ b/tools/testing/selftests/prctl/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test disable-tsc-test diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 66fab4c58ed4..4bca5a9327a4 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only /fd-001-lookup /fd-002-posix-eq /fd-003-kthread diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore index 5a4a26e5464b..9938fb406389 100644 --- a/tools/testing/selftests/pstore/.gitignore +++ b/tools/testing/selftests/pstore/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only logs *uuid diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore index f562e49d6917..534ca26eee48 100644 --- a/tools/testing/selftests/ptp/.gitignore +++ b/tools/testing/selftests/ptp/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only testptp diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index cfcc49a7def7..7bebf9534a86 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only get_syscall_info peeksiginfo diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore index ccc240275d1c..f6cbce77460b 100644 --- a/tools/testing/selftests/rcutorture/.gitignore +++ b/tools/testing/selftests/rcutorture/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only initrd b[0-9]* res diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore index 712a3d41a325..24e27957efcc 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only srcu.c diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore index 1d016e66980a..57d296341304 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only srcu.h diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore index f47cb2045f13..d65462d64816 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only *.out diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index cc610da7e369..5910888ebfe1 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only basic_percpu_ops_test basic_test basic_rseq_op_test diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore index d0ad44f6294a..fb2d533aa575 100644 --- a/tools/testing/selftests/rtc/.gitignore +++ b/tools/testing/selftests/rtc/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only rtctest setdate diff --git a/tools/testing/selftests/safesetid/.gitignore b/tools/testing/selftests/safesetid/.gitignore index 9c1a629bca01..25d3db172907 100644 --- a/tools/testing/selftests/safesetid/.gitignore +++ b/tools/testing/selftests/safesetid/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only safesetid-test diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore index 5af29d3a1b0a..dec678577f9c 100644 --- a/tools/testing/selftests/seccomp/.gitignore +++ b/tools/testing/selftests/seccomp/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only seccomp_bpf seccomp_benchmark diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/sigaltstack/.gitignore index 35897b0a3f44..50a19a8888ce 100644 --- a/tools/testing/selftests/sigaltstack/.gitignore +++ b/tools/testing/selftests/sigaltstack/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only sas diff --git a/tools/testing/selftests/size/.gitignore b/tools/testing/selftests/size/.gitignore index 189b7818de34..923e18eed1a0 100644 --- a/tools/testing/selftests/size/.gitignore +++ b/tools/testing/selftests/size/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only get_size diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore index 90e835ed74e6..0331f77373b5 100644 --- a/tools/testing/selftests/sparc64/drivers/.gitignore +++ b/tools/testing/selftests/sparc64/drivers/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only adi-test diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore index 1e23fefd68e8..d5a2da428752 100644 --- a/tools/testing/selftests/splice/.gitignore +++ b/tools/testing/selftests/splice/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only default_file_splice_read diff --git a/tools/testing/selftests/sync/.gitignore b/tools/testing/selftests/sync/.gitignore index f5091e7792f2..f1152357712f 100644 --- a/tools/testing/selftests/sync/.gitignore +++ b/tools/testing/selftests/sync/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only sync_test diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index c26d72e0166f..d52f65de23b4 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only __pycache__/ *.pyc plugins/ diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index 789f21e81028..2e43851b47c1 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only clock_nanosleep exec gettime_perf diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore index 32a9eadb2d4e..bb5326ff900b 100644 --- a/tools/testing/selftests/timers/.gitignore +++ b/tools/testing/selftests/timers/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only alarmtimer-suspend change_skew clocksource-switch diff --git a/tools/testing/selftests/tmpfs/.gitignore b/tools/testing/selftests/tmpfs/.gitignore index a96838fad74d..b1afaa925905 100644 --- a/tools/testing/selftests/tmpfs/.gitignore +++ b/tools/testing/selftests/tmpfs/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /bug-link-o-tmpfile diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 133bf9ee986c..382cfb39a1a3 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso_test vdso_standalone_test_x86 diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 31b3c98b6d34..876c48d7f41d 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only hugepage-mmap hugepage-shm map_hugetlb diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore index 5aac51575c7e..61d7b89cdbca 100644 --- a/tools/testing/selftests/watchdog/.gitignore +++ b/tools/testing/selftests/watchdog/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only watchdog-test diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore index 415b542a9d59..bfa15e6feb2f 100644 --- a/tools/testing/selftests/wireguard/qemu/.gitignore +++ b/tools/testing/selftests/wireguard/qemu/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only build/ distfiles/ diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore index 7757f73ff9a3..022a1f3b64ef 100644 --- a/tools/testing/selftests/x86/.gitignore +++ b/tools/testing/selftests/x86/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *_32 *_64 single_step_syscall diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore index 7f7a2ccc30c4..87ca2731cff9 100644 --- a/tools/testing/vsock/.gitignore +++ b/tools/testing/vsock/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.d vsock_test vsock_diag_test diff --git a/tools/thermal/tmon/.gitignore b/tools/thermal/tmon/.gitignore index 06e96be65276..d9e97a0308f5 100644 --- a/tools/thermal/tmon/.gitignore +++ b/tools/thermal/tmon/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /tmon diff --git a/tools/usb/.gitignore b/tools/usb/.gitignore index 1b7448981435..fce1ef5a9267 100644 --- a/tools/usb/.gitignore +++ b/tools/usb/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only ffs-test testusb diff --git a/tools/usb/usbip/.gitignore b/tools/usb/usbip/.gitignore index 03b892c8bd8c..597361a96dbb 100644 --- a/tools/usb/usbip/.gitignore +++ b/tools/usb/usbip/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only Makefile Makefile.in aclocal.m4 diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore index 1cfbb0157a46..075588c4da08 100644 --- a/tools/virtio/.gitignore +++ b/tools/virtio/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only *.d virtio_test vringh_test diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore index 44f095fa2604..79bb92ae1bb3 100644 --- a/tools/vm/.gitignore +++ b/tools/vm/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only slabinfo page-types diff --git a/usr/.gitignore b/usr/.gitignore index 610de736b75e..935442ed1eb2 100644 --- a/usr/.gitignore +++ b/usr/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only gen_init_cpio initramfs_data.cpio /initramfs_inc_data diff --git a/usr/include/.gitignore b/usr/include/.gitignore index a0991ff4402b..d2fab782cb7d 100644 --- a/usr/include/.gitignore +++ b/usr/include/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only * !.gitignore !Makefile -- cgit v1.2.3 From 6952a4f646446fde9e190b62c5e45f84c6cf91aa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 8 Mar 2020 17:26:32 +0100 Subject: selftests: add pid namespace ENOMEM regression test We recently regressed (cf. [1] and its corresponding fix in [2]) returning ENOMEM when trying to create a process in a pid namespace whose init process/child subreaper has already died. This has caused confusion at least once before that (cf. [3]). Let's add a simple regression test to catch this in the future. [1]: 49cb2fc42ce4 ("fork: extend clone3() to support setting a PID") [2]: b26ebfe12f34 ("pid: Fix error return value in some cases") [3]: 35f71bc0a09a ("fork: report pid reservation failure properly") Cc: Corey Minyard Cc: Oleg Nesterov Cc: Adrian Reber Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Andrei Vagin Signed-off-by: Christian Brauner --- MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/pid_namespace/.gitignore | 1 + tools/testing/selftests/pid_namespace/Makefile | 8 ++++ tools/testing/selftests/pid_namespace/config | 2 + .../selftests/pid_namespace/regression_enomem.c | 45 ++++++++++++++++++++++ tools/testing/selftests/pidfd/pidfd.h | 2 + 7 files changed, 60 insertions(+) create mode 100644 tools/testing/selftests/pid_namespace/.gitignore create mode 100644 tools/testing/selftests/pid_namespace/Makefile create mode 100644 tools/testing/selftests/pid_namespace/config create mode 100644 tools/testing/selftests/pid_namespace/regression_enomem.c (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index a6fbdf354d34..8cc9e6a7c1fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13158,6 +13158,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git F: samples/pidfd/ F: tools/testing/selftests/pidfd/ +F: tools/testing/selftests/pid_namespace/ F: tools/testing/selftests/clone3/ K: (?i)pidfd K: (?i)clone3 diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6ec503912bea..5fc587b7136f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -38,6 +38,7 @@ TARGETS += netfilter TARGETS += networking/timestamping TARGETS += nsfs TARGETS += pidfd +TARGETS += pid_namespace TARGETS += powerpc TARGETS += proc TARGETS += pstore diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore new file mode 100644 index 000000000000..93ab9d7e5b7e --- /dev/null +++ b/tools/testing/selftests/pid_namespace/.gitignore @@ -0,0 +1 @@ +regression_enomem diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile new file mode 100644 index 000000000000..dcaefa224ca0 --- /dev/null +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -g -I../../../../usr/include/ + +TEST_GEN_PROGS := regression_enomem + +include ../lib.mk + +$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h diff --git a/tools/testing/selftests/pid_namespace/config b/tools/testing/selftests/pid_namespace/config new file mode 100644 index 000000000000..26cdb27e7dbb --- /dev/null +++ b/tools/testing/selftests/pid_namespace/config @@ -0,0 +1,2 @@ +CONFIG_PID_NS=y +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c new file mode 100644 index 000000000000..73d532556d17 --- /dev/null +++ b/tools/testing/selftests/pid_namespace/regression_enomem.c @@ -0,0 +1,45 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" +#include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" + +/* + * Regression test for: + * 35f71bc0a09a ("fork: report pid reservation failure properly") + * b26ebfe12f34 ("pid: Fix error return value in some cases") + */ +TEST(regression_enomem) +{ + pid_t pid; + + if (geteuid()) + EXPECT_EQ(0, unshare(CLONE_NEWUSER)); + + EXPECT_EQ(0, unshare(CLONE_NEWPID)); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(EXIT_SUCCESS); + + EXPECT_EQ(0, wait_for_pid(pid)); + + pid = fork(); + ASSERT_LT(pid, 0); + ASSERT_EQ(errno, ENOMEM); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index d482515604db..c1921a53dbed 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "../kselftest.h" -- cgit v1.2.3 From 2de4e82318c7f9d34f4b08599a612cd4cd10bf0b Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Fri, 20 Mar 2020 21:26:19 +0100 Subject: selftests/ptrace: add test cases for dead-locks This adds test cases for ptrace deadlocks. Additionally fixes a compile problem in get_syscall_info.c, observed with gcc-4.8.4: get_syscall_info.c: In function 'get_syscall_info': get_syscall_info.c:93:3: error: 'for' loop initial declarations are only allowed in C99 mode for (unsigned int i = 0; i < ARRAY_SIZE(args); ++i) { ^ get_syscall_info.c:93:3: note: use option -std=c99 or -std=gnu99 to compile your code Signed-off-by: Bernd Edlinger Reviewed-by: Kees Cook Signed-off-by: Eric W. Biederman --- tools/testing/selftests/ptrace/Makefile | 4 +- tools/testing/selftests/ptrace/vmaccess.c | 86 +++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/ptrace/vmaccess.c (limited to 'tools') diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index c0b7f89f0930..2f1f532c39db 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -iquote../../../../include/uapi -Wall +CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall -TEST_GEN_PROGS := get_syscall_info peeksiginfo +TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess include ../lib.mk diff --git a/tools/testing/selftests/ptrace/vmaccess.c b/tools/testing/selftests/ptrace/vmaccess.c new file mode 100644 index 000000000000..4db327b44586 --- /dev/null +++ b/tools/testing/selftests/ptrace/vmaccess.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2020 Bernd Edlinger + * All rights reserved. + * + * Check whether /proc/$pid/mem can be accessed without causing deadlocks + * when de_thread is blocked with ->cred_guard_mutex held. + */ + +#include "../kselftest_harness.h" +#include +#include +#include +#include +#include +#include + +static void *thread(void *arg) +{ + ptrace(PTRACE_TRACEME, 0, 0L, 0L); + return NULL; +} + +TEST(vmaccess) +{ + int f, pid = fork(); + char mm[64]; + + if (!pid) { + pthread_t pt; + + pthread_create(&pt, NULL, thread, NULL); + pthread_join(pt, NULL); + execlp("true", "true", NULL); + } + + sleep(1); + sprintf(mm, "/proc/%d/mem", pid); + f = open(mm, O_RDONLY); + ASSERT_GE(f, 0); + close(f); + f = kill(pid, SIGCONT); + ASSERT_EQ(f, 0); +} + +TEST(attach) +{ + int s, k, pid = fork(); + + if (!pid) { + pthread_t pt; + + pthread_create(&pt, NULL, thread, NULL); + pthread_join(pt, NULL); + execlp("sleep", "sleep", "2", NULL); + } + + sleep(1); + k = ptrace(PTRACE_ATTACH, pid, 0L, 0L); + ASSERT_EQ(errno, EAGAIN); + ASSERT_EQ(k, -1); + k = waitpid(-1, &s, WNOHANG); + ASSERT_NE(k, -1); + ASSERT_NE(k, 0); + ASSERT_NE(k, pid); + ASSERT_EQ(WIFEXITED(s), 1); + ASSERT_EQ(WEXITSTATUS(s), 0); + sleep(1); + k = ptrace(PTRACE_ATTACH, pid, 0L, 0L); + ASSERT_EQ(k, 0); + k = waitpid(-1, &s, 0); + ASSERT_EQ(k, pid); + ASSERT_EQ(WIFSTOPPED(s), 1); + ASSERT_EQ(WSTOPSIG(s), SIGSTOP); + k = ptrace(PTRACE_DETACH, pid, 0L, 0L); + ASSERT_EQ(k, 0); + k = waitpid(-1, &s, 0); + ASSERT_EQ(k, pid); + ASSERT_EQ(WIFEXITED(s), 1); + ASSERT_EQ(WEXITSTATUS(s), 0); + k = waitpid(-1, NULL, 0); + ASSERT_EQ(k, -1); + ASSERT_EQ(errno, ECHILD); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From a92e92d1a749e9bae9828f34f632d56ac2c6d2c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Mar 2020 18:07:44 +0100 Subject: objtool: Introduce validate_return() Trivial 'cleanup' to save one indentation level and match validate_call(). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160923.963996225@infradead.org --- tools/objtool/check.c | 64 +++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6b6178ef4464..da17b5acc504 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1975,6 +1975,41 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st return validate_call(insn, state); } +static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state) +{ + if (state->uaccess && !func_uaccess_safe(func)) { + WARN_FUNC("return with UACCESS enabled", + insn->sec, insn->offset); + return 1; + } + + if (!state->uaccess && func_uaccess_safe(func)) { + WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function", + insn->sec, insn->offset); + return 1; + } + + if (state->df) { + WARN_FUNC("return with DF set", + insn->sec, insn->offset); + return 1; + } + + if (func && has_modified_stack_frame(state)) { + WARN_FUNC("return with modified stack frame", + insn->sec, insn->offset); + return 1; + } + + if (state->bp_scratch) { + WARN("%s uses BP as a scratch register", + func->name); + return 1; + } + + return 0; +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -2090,34 +2125,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: - if (state.uaccess && !func_uaccess_safe(func)) { - WARN_FUNC("return with UACCESS enabled", sec, insn->offset); - return 1; - } - - if (!state.uaccess && func_uaccess_safe(func)) { - WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function", sec, insn->offset); - return 1; - } - - if (state.df) { - WARN_FUNC("return with DF set", sec, insn->offset); - return 1; - } - - if (func && has_modified_stack_frame(&state)) { - WARN_FUNC("return with modified stack frame", - sec, insn->offset); - return 1; - } - - if (state.bp_scratch) { - WARN("%s uses BP as a scratch register", - func->name); - return 1; - } - - return 0; + return validate_return(func, insn, &state); case INSN_CALL: case INSN_CALL_DYNAMIC: -- cgit v1.2.3 From dbf4aeb0a494020644507459c2446e632cba1a05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Mar 2020 18:24:59 +0100 Subject: objtool: Rename func_for_each_insn() There is func_for_each_insn() and func_for_each_insn_all(), the both iterate the instructions, but the first uses symbol offset/length while the second uses insn->func. Rename func_for_each_insn() to sym_for_eac_insn() because it iterates on symbol information. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.024341229@infradead.org --- tools/objtool/check.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index da17b5acc504..564ea1dcc85e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -77,17 +77,17 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, insn; \ insn = next_insn_same_func(file, insn)) -#define func_for_each_insn(file, func, insn) \ - for (insn = find_insn(file, func->sec, func->offset); \ +#define sym_for_each_insn(file, sym, insn) \ + for (insn = find_insn(file, sym->sec, sym->offset); \ insn && &insn->list != &file->insn_list && \ - insn->sec == func->sec && \ - insn->offset < func->offset + func->len; \ + insn->sec == sym->sec && \ + insn->offset < sym->offset + sym->len; \ insn = list_next_entry(insn, list)) -#define func_for_each_insn_continue_reverse(file, func, insn) \ +#define sym_for_each_insn_continue_reverse(file, sym, insn) \ for (insn = list_prev_entry(insn, list); \ &insn->list != &file->insn_list && \ - insn->sec == func->sec && insn->offset >= func->offset; \ + insn->sec == sym->sec && insn->offset >= sym->offset; \ insn = list_prev_entry(insn, list)) #define sec_for_each_insn_from(file, insn) \ @@ -286,7 +286,7 @@ static int decode_instructions(struct objtool_file *file) return -1; } - func_for_each_insn(file, func, insn) + sym_for_each_insn(file, func, insn) insn->func = func; } } @@ -2064,7 +2064,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, func, i) { + sym_for_each_insn_continue_reverse(file, func, i) { if (i->save) { save_insn = i; break; -- cgit v1.2.3 From f0f70adb78108a0cbc321a07133cd78ea4f84699 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Mar 2020 18:27:24 +0100 Subject: objtool: Rename func_for_each_insn_all() Now that func_for_each_insn() is available, rename func_for_each_insn_all(). This gets us: sym_for_each_insn() - iterate on symbol offset/len func_for_each_insn() - iterate on insn->func Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.083720147@infradead.org --- tools/objtool/check.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 564ea1dcc85e..43f7d3c2e8b2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -72,7 +72,7 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, return find_insn(file, func->cfunc->sec, func->cfunc->offset); } -#define func_for_each_insn_all(file, func, insn) \ +#define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn; \ insn = next_insn_same_func(file, insn)) @@ -170,7 +170,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, if (!insn->func) return false; - func_for_each_insn_all(file, func, insn) { + func_for_each_insn(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) @@ -185,7 +185,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, * case, the function's dead-end status depends on whether the target * of the sibling call returns. */ - func_for_each_insn_all(file, func, insn) { + func_for_each_insn(file, func, insn) { if (is_sibling_call(insn)) { struct instruction *dest = insn->jump_dest; @@ -430,7 +430,7 @@ static void add_ignores(struct objtool_file *file) continue; } - func_for_each_insn_all(file, func, insn) + func_for_each_insn(file, func, insn) insn->ignore = true; } } @@ -1122,7 +1122,7 @@ static void mark_func_jump_tables(struct objtool_file *file, struct instruction *insn, *last = NULL; struct rela *rela; - func_for_each_insn_all(file, func, insn) { + func_for_each_insn(file, func, insn) { if (!last) last = insn; @@ -1157,7 +1157,7 @@ static int add_func_jump_tables(struct objtool_file *file, struct instruction *insn; int ret; - func_for_each_insn_all(file, func, insn) { + func_for_each_insn(file, func, insn) { if (!insn->jump_table) continue; -- cgit v1.2.3 From 65fb11a7f6aeae678043738d06248a4e21f4e4e4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Mar 2020 18:39:45 +0100 Subject: objtool: Optimize find_symbol_by_index() The symbol index is object wide, not per section, so it makes no sense to have the symbol_hash be part of the section object. By moving it to the elf object we avoid the linear sections iteration. This reduces the runtime of objtool on vmlinux.o from over 3 hours (I gave up) to a few minutes. The defconfig vmlinux.o has around 20k sections. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.261852348@infradead.org --- tools/objtool/elf.c | 13 +++++-------- tools/objtool/elf.h | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index cc4601c879ce..b188b3e075be 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -46,13 +46,11 @@ static struct section *find_section_by_index(struct elf *elf, static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) { - struct section *sec; struct symbol *sym; - list_for_each_entry(sec, &elf->sections, list) - hash_for_each_possible(sec->symbol_hash, sym, hash, idx) - if (sym->idx == idx) - return sym; + hash_for_each_possible(elf->symbol_hash, sym, hash, idx) + if (sym->idx == idx) + return sym; return NULL; } @@ -166,7 +164,6 @@ static int read_sections(struct elf *elf) INIT_LIST_HEAD(&sec->symbol_list); INIT_LIST_HEAD(&sec->rela_list); hash_init(sec->rela_hash); - hash_init(sec->symbol_hash); list_add_tail(&sec->list, &elf->sections); @@ -299,7 +296,7 @@ static int read_symbols(struct elf *elf) } sym->alias = alias; list_add(&sym->list, entry); - hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); + hash_add(elf->symbol_hash, &sym->hash, sym->idx); } /* Create parent/child links for any cold subfunctions */ @@ -425,6 +422,7 @@ struct elf *elf_read(const char *name, int flags) } memset(elf, 0, sizeof(*elf)); + hash_init(elf->symbol_hash); INIT_LIST_HEAD(&elf->sections); elf->fd = open(name, flags); @@ -486,7 +484,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, INIT_LIST_HEAD(&sec->symbol_list); INIT_LIST_HEAD(&sec->rela_list); hash_init(sec->rela_hash); - hash_init(sec->symbol_hash); list_add_tail(&sec->list, &elf->sections); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index a1963259b930..12229801b13f 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -27,7 +27,6 @@ struct section { struct list_head list; GElf_Shdr sh; struct list_head symbol_list; - DECLARE_HASHTABLE(symbol_hash, 8); struct list_head rela_list; DECLARE_HASHTABLE(rela_hash, 16); struct section *base, *rela; @@ -71,7 +70,7 @@ struct elf { int fd; char *name; struct list_head sections; - DECLARE_HASHTABLE(rela_hash, 16); + DECLARE_HASHTABLE(symbol_hash, 20); }; -- cgit v1.2.3 From 1e11f3fdc326d7466e43185ea943b6156143387c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 09:26:29 +0100 Subject: objtool: Add a statistics mode Have it print a few numbers which can be used to size the hashtables. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.321381240@infradead.org --- tools/objtool/builtin-check.c | 3 ++- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 5 +++++ tools/objtool/elf.c | 18 +++++++++++++++++- 4 files changed, 25 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c807984a03c1..10fbe75ab43d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -17,7 +17,7 @@ #include "builtin.h" #include "check.h" -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats; static const char * const check_usage[] = { "objtool check [] file.o", @@ -31,6 +31,7 @@ const struct option check_options[] = { OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), + OPT_BOOLEAN('s', "stats", &stats, "print statistics"), OPT_END(), }; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index a32736f8d2a4..0b907902ee79 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -8,7 +8,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 43f7d3c2e8b2..6df1bae2f961 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -239,6 +239,7 @@ static int decode_instructions(struct objtool_file *file) struct symbol *func; unsigned long offset; struct instruction *insn; + unsigned long nr_insns = 0; int ret; for_each_sec(file, sec) { @@ -274,6 +275,7 @@ static int decode_instructions(struct objtool_file *file) hash_add(file->insn_hash, &insn->hash, insn->offset); list_add_tail(&insn->list, &file->insn_list); + nr_insns++; } list_for_each_entry(func, &sec->symbol_list, list) { @@ -291,6 +293,9 @@ static int decode_instructions(struct objtool_file *file) } } + if (stats) + printf("nr_insns: %lu\n", nr_insns); + return 0; err: diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index b188b3e075be..ff293064b469 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -15,6 +15,7 @@ #include #include #include +#include "builtin.h" #include "elf.h" #include "warn.h" @@ -202,6 +203,9 @@ static int read_sections(struct elf *elf) sec->len = sec->sh.sh_size; } + if (stats) + printf("nr_sections: %lu\n", (unsigned long)sections_nr); + /* sanity check, one more call to elf_nextscn() should return NULL */ if (elf_nextscn(elf->elf, s)) { WARN("section entry mismatch"); @@ -299,6 +303,9 @@ static int read_symbols(struct elf *elf) hash_add(elf->symbol_hash, &sym->hash, sym->idx); } + if (stats) + printf("nr_symbols: %lu\n", (unsigned long)symbols_nr); + /* Create parent/child links for any cold subfunctions */ list_for_each_entry(sec, &elf->sections, list) { list_for_each_entry(sym, &sec->symbol_list, list) { @@ -360,6 +367,7 @@ static int read_relas(struct elf *elf) struct rela *rela; int i; unsigned int symndx; + unsigned long nr_rela, max_rela = 0, tot_rela = 0; list_for_each_entry(sec, &elf->sections, list) { if (sec->sh.sh_type != SHT_RELA) @@ -374,6 +382,7 @@ static int read_relas(struct elf *elf) sec->base->rela = sec; + nr_rela = 0; for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) { rela = malloc(sizeof(*rela)); if (!rela) { @@ -401,8 +410,15 @@ static int read_relas(struct elf *elf) list_add_tail(&rela->list, &sec->rela_list); hash_add(sec->rela_hash, &rela->hash, rela->offset); - + nr_rela++; } + max_rela = max(max_rela, nr_rela); + tot_rela += nr_rela; + } + + if (stats) { + printf("max_rela: %lu\n", max_rela); + printf("tot_rela: %lu\n", tot_rela); } return 0; -- cgit v1.2.3 From 530389968739883a61192767e1c215653ba4ba2b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Mar 2020 18:43:35 +0100 Subject: objtool: Optimize find_section_by_index() In order to avoid a linear search (over 20k entries), add an section_hash to the elf object. This reduces objtool on vmlinux.o from a few minutes to around 45 seconds. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.381249993@infradead.org --- tools/objtool/elf.c | 13 ++++++++----- tools/objtool/elf.h | 2 ++ 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index ff293064b469..900771365ae3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -38,7 +38,7 @@ static struct section *find_section_by_index(struct elf *elf, { struct section *sec; - list_for_each_entry(sec, &elf->sections, list) + hash_for_each_possible(elf->section_hash, sec, hash, idx) if (sec->idx == idx) return sec; @@ -166,8 +166,6 @@ static int read_sections(struct elf *elf) INIT_LIST_HEAD(&sec->rela_list); hash_init(sec->rela_hash); - list_add_tail(&sec->list, &elf->sections); - s = elf_getscn(elf->elf, i); if (!s) { WARN_ELF("elf_getscn"); @@ -201,6 +199,9 @@ static int read_sections(struct elf *elf) } } sec->len = sec->sh.sh_size; + + list_add_tail(&sec->list, &elf->sections); + hash_add(elf->section_hash, &sec->hash, sec->idx); } if (stats) @@ -439,6 +440,7 @@ struct elf *elf_read(const char *name, int flags) memset(elf, 0, sizeof(*elf)); hash_init(elf->symbol_hash); + hash_init(elf->section_hash); INIT_LIST_HEAD(&elf->sections); elf->fd = open(name, flags); @@ -501,8 +503,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, INIT_LIST_HEAD(&sec->rela_list); hash_init(sec->rela_hash); - list_add_tail(&sec->list, &elf->sections); - s = elf_newscn(elf->elf); if (!s) { WARN_ELF("elf_newscn"); @@ -579,6 +579,9 @@ struct section *elf_create_section(struct elf *elf, const char *name, shstrtab->len += strlen(name) + 1; shstrtab->changed = true; + list_add_tail(&sec->list, &elf->sections); + hash_add(elf->section_hash, &sec->hash, sec->idx); + return sec; } diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 12229801b13f..8c272eb515c8 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -25,6 +25,7 @@ struct section { struct list_head list; + struct hlist_node hash; GElf_Shdr sh; struct list_head symbol_list; struct list_head rela_list; @@ -71,6 +72,7 @@ struct elf { char *name; struct list_head sections; DECLARE_HASHTABLE(symbol_hash, 20); + DECLARE_HASHTABLE(section_hash, 16); }; -- cgit v1.2.3 From ae358196fac3a0b4d2a7d47a4f401e3421027b03 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 09:32:10 +0100 Subject: objtool: Optimize find_section_by_name() In order to avoid yet another linear search of (20k) sections, add a name based hash. This reduces objtool runtime on vmlinux.o by some 10s to around 35s. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.440174280@infradead.org --- tools/objtool/elf.c | 10 +++++++++- tools/objtool/elf.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 900771365ae3..20fe40d5101d 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -22,11 +22,16 @@ #define MAX_NAME_LEN 128 +static inline u32 str_hash(const char *str) +{ + return jhash(str, strlen(str), 0); +} + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; - list_for_each_entry(sec, &elf->sections, list) + hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name)) if (!strcmp(sec->name, name)) return sec; @@ -202,6 +207,7 @@ static int read_sections(struct elf *elf) list_add_tail(&sec->list, &elf->sections); hash_add(elf->section_hash, &sec->hash, sec->idx); + hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); } if (stats) @@ -441,6 +447,7 @@ struct elf *elf_read(const char *name, int flags) hash_init(elf->symbol_hash); hash_init(elf->section_hash); + hash_init(elf->section_name_hash); INIT_LIST_HEAD(&elf->sections); elf->fd = open(name, flags); @@ -581,6 +588,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, list_add_tail(&sec->list, &elf->sections); hash_add(elf->section_hash, &sec->hash, sec->idx); + hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); return sec; } diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 8c272eb515c8..ac7c46f7d9ab 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef LIBELF_USE_DEPRECATED # define elf_getshdrnum elf_getshnum @@ -26,6 +27,7 @@ struct section { struct list_head list; struct hlist_node hash; + struct hlist_node name_hash; GElf_Shdr sh; struct list_head symbol_list; struct list_head rela_list; @@ -73,6 +75,7 @@ struct elf { struct list_head sections; DECLARE_HASHTABLE(symbol_hash, 20); DECLARE_HASHTABLE(section_hash, 16); + DECLARE_HASHTABLE(section_name_hash, 16); }; -- cgit v1.2.3 From 2a362ecc3ec9632aeea4b9a9062db91b2bd9975a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 09:34:42 +0100 Subject: objtool: Optimize find_symbol_*() and read_symbols() All of: read_symbols(), find_symbol_by_offset(), find_symbol_containing(), find_containing_func() do a linear search of the symbols. Add an RB tree to make it go faster. This about halves objtool runtime on vmlinux.o, from 34s to 18s. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.499016559@infradead.org --- tools/objtool/Build | 5 ++ tools/objtool/elf.c | 194 ++++++++++++++++++++++++++++++++++++---------------- tools/objtool/elf.h | 3 + 3 files changed, 144 insertions(+), 58 deletions(-) (limited to 'tools') diff --git a/tools/objtool/Build b/tools/objtool/Build index 8dc4f0848362..66f44f5cd2a6 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -11,6 +11,7 @@ objtool-y += objtool.o objtool-y += libstring.o objtool-y += libctype.o objtool-y += str_error_r.o +objtool-y += librbtree.o CFLAGS += -I$(srctree)/tools/lib @@ -25,3 +26,7 @@ $(OUTPUT)libctype.o: ../lib/ctype.c FORCE $(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)librbtree.o: ../lib/rbtree.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 20fe40d5101d..3a8b42672655 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -27,6 +27,90 @@ static inline u32 str_hash(const char *str) return jhash(str, strlen(str), 0); } +static void rb_add(struct rb_root *tree, struct rb_node *node, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + + while (*link) { + parent = *link; + if (cmp(node, parent) < 0) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node(node, parent, link); + rb_insert_color(node, tree); +} + +static struct rb_node *rb_find_first(struct rb_root *tree, const void *key, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + struct rb_node *match = NULL; + + while (node) { + int c = cmp(key, node); + if (c <= 0) { + if (!c) + match = node; + node = node->rb_left; + } else if (c > 0) { + node = node->rb_right; + } + } + + return match; +} + +static struct rb_node *rb_next_match(struct rb_node *node, const void *key, + int (*cmp)(const void *key, const struct rb_node *)) +{ + node = rb_next(node); + if (node && cmp(key, node)) + node = NULL; + return node; +} + +#define rb_for_each(tree, node, key, cmp) \ + for ((node) = rb_find_first((tree), (key), (cmp)); \ + (node); (node) = rb_next_match((node), (key), (cmp))) + +static int symbol_to_offset(struct rb_node *a, const struct rb_node *b) +{ + struct symbol *sa = rb_entry(a, struct symbol, node); + struct symbol *sb = rb_entry(b, struct symbol, node); + + if (sa->offset < sb->offset) + return -1; + if (sa->offset > sb->offset) + return 1; + + if (sa->len < sb->len) + return -1; + if (sa->len > sb->len) + return 1; + + sa->alias = sb; + + return 0; +} + +static int symbol_by_offset(const void *key, const struct rb_node *node) +{ + const struct symbol *s = rb_entry(node, struct symbol, node); + const unsigned long *o = key; + + if (*o < s->offset) + return -1; + if (*o > s->offset + s->len) + return 1; + + return 0; +} + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; @@ -63,47 +147,69 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) { - struct symbol *sym; + struct rb_node *node; - list_for_each_entry(sym, &sec->symbol_list, list) - if (sym->type != STT_SECTION && sym->offset == offset) - return sym; + rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + struct symbol *s = rb_entry(node, struct symbol, node); + + if (s->offset == offset && s->type != STT_SECTION) + return s; + } return NULL; } struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) { - struct symbol *sym; + struct rb_node *node; - list_for_each_entry(sym, &sec->symbol_list, list) - if (sym->type == STT_FUNC && sym->offset == offset) - return sym; + rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + struct symbol *s = rb_entry(node, struct symbol, node); + + if (s->offset == offset && s->type == STT_FUNC) + return s; + } return NULL; } -struct symbol *find_symbol_by_name(struct elf *elf, const char *name) +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) { - struct section *sec; - struct symbol *sym; + struct rb_node *node; - list_for_each_entry(sec, &elf->sections, list) - list_for_each_entry(sym, &sec->symbol_list, list) - if (!strcmp(sym->name, name)) - return sym; + rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + struct symbol *s = rb_entry(node, struct symbol, node); + + if (s->type != STT_SECTION) + return s; + } return NULL; } -struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) +struct symbol *find_containing_func(struct section *sec, unsigned long offset) +{ + struct rb_node *node; + + rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) { + struct symbol *s = rb_entry(node, struct symbol, node); + + if (s->type == STT_FUNC) + return s; + } + + return NULL; +} + +struct symbol *find_symbol_by_name(struct elf *elf, const char *name) { + struct section *sec; struct symbol *sym; - list_for_each_entry(sym, &sec->symbol_list, list) - if (sym->type != STT_SECTION && - offset >= sym->offset && offset < sym->offset + sym->len) - return sym; + list_for_each_entry(sec, &elf->sections, list) + list_for_each_entry(sym, &sec->symbol_list, list) + if (!strcmp(sym->name, name)) + return sym; return NULL; } @@ -130,18 +236,6 @@ struct rela *find_rela_by_dest(struct section *sec, unsigned long offset) return find_rela_by_dest_range(sec, offset, 1); } -struct symbol *find_containing_func(struct section *sec, unsigned long offset) -{ - struct symbol *func; - - list_for_each_entry(func, &sec->symbol_list, list) - if (func->type == STT_FUNC && offset >= func->offset && - offset < func->offset + func->len) - return func; - - return NULL; -} - static int read_sections(struct elf *elf) { Elf_Scn *s = NULL; @@ -225,8 +319,9 @@ static int read_sections(struct elf *elf) static int read_symbols(struct elf *elf) { struct section *symtab, *sec; - struct symbol *sym, *pfunc, *alias; - struct list_head *entry, *tmp; + struct symbol *sym, *pfunc; + struct list_head *entry; + struct rb_node *pnode; int symbols_nr, i; char *coldstr; @@ -245,7 +340,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); - alias = sym; + sym->alias = sym; sym->idx = i; @@ -283,29 +378,12 @@ static int read_symbols(struct elf *elf) sym->offset = sym->sym.st_value; sym->len = sym->sym.st_size; - /* sorted insert into a per-section list */ - entry = &sym->sec->symbol_list; - list_for_each_prev(tmp, &sym->sec->symbol_list) { - struct symbol *s; - - s = list_entry(tmp, struct symbol, list); - - if (sym->offset > s->offset) { - entry = tmp; - break; - } - - if (sym->offset == s->offset) { - if (sym->len && sym->len == s->len && alias == sym) - alias = s; - - if (sym->len >= s->len) { - entry = tmp; - break; - } - } - } - sym->alias = alias; + rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset); + pnode = rb_prev(&sym->node); + if (pnode) + entry = &rb_entry(pnode, struct symbol, node)->list; + else + entry = &sym->sec->symbol_list; list_add(&sym->list, entry); hash_add(elf->symbol_hash, &sym->hash, sym->idx); } diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index ac7c46f7d9ab..e4a8d68f2ef7 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #ifdef LIBELF_USE_DEPRECATED @@ -29,6 +30,7 @@ struct section { struct hlist_node hash; struct hlist_node name_hash; GElf_Shdr sh; + struct rb_root symbol_tree; struct list_head symbol_list; struct list_head rela_list; DECLARE_HASHTABLE(rela_hash, 16); @@ -43,6 +45,7 @@ struct section { struct symbol { struct list_head list; + struct rb_node node; struct hlist_node hash; GElf_Sym sym; struct section *sec; -- cgit v1.2.3 From 53d20720bbc8718ef86fdfe53dec0accfb593ef8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Mar 2020 10:36:53 +0100 Subject: objtool: Rename find_containing_func() For consistency; we have: find_symbol_by_offset() / find_symbol_containing() find_func_by_offset() / find_containing_func() fix that. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.558470724@infradead.org --- tools/objtool/elf.c | 2 +- tools/objtool/elf.h | 2 +- tools/objtool/warn.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3a8b42672655..07db4df18588 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -187,7 +187,7 @@ struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) return NULL; } -struct symbol *find_containing_func(struct section *sec, unsigned long offset) +struct symbol *find_func_containing(struct section *sec, unsigned long offset) { struct rb_node *node; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index e4a8d68f2ef7..d18f466829b0 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -91,7 +91,7 @@ struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len); -struct symbol *find_containing_func(struct section *sec, unsigned long offset); +struct symbol *find_func_containing(struct section *sec, unsigned long offset); struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); struct section *elf_create_rela_section(struct elf *elf, struct section *base); diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h index cbb0a02b7480..7799f60de80a 100644 --- a/tools/objtool/warn.h +++ b/tools/objtool/warn.h @@ -21,7 +21,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) char *name, *str; unsigned long name_off; - func = find_containing_func(sec, offset); + func = find_func_containing(sec, offset); if (func) { name = func->name; name_off = offset - func->offset; -- cgit v1.2.3 From 513b5ca6b5fbeb766999fb1a4bc9a55c9fb7c9ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 10:20:46 +0100 Subject: objtool: Resize insn_hash Perf shows we're spending a lot of time in find_insn() and the statistics show we have around 3.2 million instruction. Increase the hash table size to reduce the bucket load from around 50 to 3. This shaves about 2s off of objtool on vmlinux.o runtime, down to 16s. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.617882545@infradead.org --- tools/objtool/check.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 6d875ca6fce0..f0ce8ffe7135 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -50,7 +50,7 @@ struct instruction { struct objtool_file { struct elf *elf; struct list_head insn_list; - DECLARE_HASHTABLE(insn_hash, 16); + DECLARE_HASHTABLE(insn_hash, 20); bool ignore_unreachables, c_file, hints, rodata; }; -- cgit v1.2.3 From cdb3d057a17d56363a831e486ea39e4c389a6cf9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 10:17:38 +0100 Subject: objtool: Optimize find_symbol_by_name() Perf showed that find_symbol_by_name() takes time; add a symbol name hash. This shaves another second off of objtool on vmlinux.o runtime, down to 15 seconds. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.676865656@infradead.org --- tools/objtool/elf.c | 10 +++++----- tools/objtool/elf.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 07db4df18588..43abae763b3b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -203,13 +203,11 @@ struct symbol *find_func_containing(struct section *sec, unsigned long offset) struct symbol *find_symbol_by_name(struct elf *elf, const char *name) { - struct section *sec; struct symbol *sym; - list_for_each_entry(sec, &elf->sections, list) - list_for_each_entry(sym, &sec->symbol_list, list) - if (!strcmp(sym->name, name)) - return sym; + hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name)) + if (!strcmp(sym->name, name)) + return sym; return NULL; } @@ -386,6 +384,7 @@ static int read_symbols(struct elf *elf) entry = &sym->sec->symbol_list; list_add(&sym->list, entry); hash_add(elf->symbol_hash, &sym->hash, sym->idx); + hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); } if (stats) @@ -524,6 +523,7 @@ struct elf *elf_read(const char *name, int flags) memset(elf, 0, sizeof(*elf)); hash_init(elf->symbol_hash); + hash_init(elf->symbol_name_hash); hash_init(elf->section_hash); hash_init(elf->section_name_hash); INIT_LIST_HEAD(&elf->sections); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index d18f466829b0..3088d925d640 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -47,6 +47,7 @@ struct symbol { struct list_head list; struct rb_node node; struct hlist_node hash; + struct hlist_node name_hash; GElf_Sym sym; struct section *sec; char *name; @@ -77,6 +78,7 @@ struct elf { char *name; struct list_head sections; DECLARE_HASHTABLE(symbol_hash, 20); + DECLARE_HASHTABLE(symbol_name_hash, 20); DECLARE_HASHTABLE(section_hash, 16); DECLARE_HASHTABLE(section_name_hash, 16); }; -- cgit v1.2.3 From 8b5fa6bc326bf02f293b5a39a8f5b3de816265d3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 11:23:36 +0100 Subject: objtool: Optimize read_sections() Perf showed that __hash_init() is a significant portion of read_sections(), so instead of doing a per section rela_hash, use an elf-wide rela_hash. Statistics show us there are about 1.1 million relas, so size it accordingly. This reduces the objtool on vmlinux.o runtime to a third, from 15 to 5 seconds. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.739153726@infradead.org --- tools/objtool/check.c | 18 +++++++++--------- tools/objtool/elf.c | 24 ++++++++++++++---------- tools/objtool/elf.h | 21 +++++++++++++++++---- tools/objtool/orc_gen.c | 9 +++++---- tools/objtool/special.c | 4 ++-- 5 files changed, 47 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6df1bae2f961..54a604381a03 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -569,8 +569,8 @@ static int add_jump_destinations(struct objtool_file *file) if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET) continue; - rela = find_rela_by_dest_range(insn->sec, insn->offset, - insn->len); + rela = find_rela_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); if (!rela) { dest_sec = insn->sec; dest_off = insn->offset + insn->len + insn->immediate; @@ -666,8 +666,8 @@ static int add_call_destinations(struct objtool_file *file) if (insn->type != INSN_CALL) continue; - rela = find_rela_by_dest_range(insn->sec, insn->offset, - insn->len); + rela = find_rela_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); if (!rela) { dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_func_by_offset(insn->sec, dest_off); @@ -796,7 +796,7 @@ static int handle_group_alt(struct objtool_file *file, */ if ((insn->offset != special_alt->new_off || (insn->type != INSN_CALL && !is_static_jump(insn))) && - find_rela_by_dest_range(insn->sec, insn->offset, insn->len)) { + find_rela_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) { WARN_FUNC("unsupported relocation in alternatives section", insn->sec, insn->offset); @@ -1066,8 +1066,8 @@ static struct rela *find_jump_table(struct objtool_file *file, break; /* look for a relocation which references .rodata */ - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, - insn->len); + text_rela = find_rela_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); if (!text_rela || text_rela->sym->type != STT_SECTION || !text_rela->sym->sec->rodata) continue; @@ -1096,7 +1096,7 @@ static struct rela *find_jump_table(struct objtool_file *file, * should reference text in the same function as the original * instruction. */ - table_rela = find_rela_by_dest(table_sec, table_offset); + table_rela = find_rela_by_dest(file->elf, table_sec, table_offset); if (!table_rela) continue; dest_insn = find_insn(file, table_rela->sym->sec, table_rela->addend); @@ -1232,7 +1232,7 @@ static int read_unwind_hints(struct objtool_file *file) for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { hint = (struct unwind_hint *)sec->data->d_buf + i; - rela = find_rela_by_dest(sec, i * sizeof(*hint)); + rela = find_rela_by_dest(file->elf, sec, i * sizeof(*hint)); if (!rela) { WARN("can't find rela for unwind_hints[%d]", i); return -1; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 43abae763b3b..8a0a1bc18cd7 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -212,8 +212,8 @@ struct symbol *find_symbol_by_name(struct elf *elf, const char *name) return NULL; } -struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, - unsigned int len) +struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int len) { struct rela *rela; unsigned long o; @@ -221,17 +221,22 @@ struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, if (!sec->rela) return NULL; - for (o = offset; o < offset + len; o++) - hash_for_each_possible(sec->rela->rela_hash, rela, hash, o) - if (rela->offset == o) + sec = sec->rela; + + for (o = offset; o < offset + len; o++) { + hash_for_each_possible(elf->rela_hash, rela, hash, + sec_offset_hash(sec, o)) { + if (rela->sec == sec && rela->offset == o) return rela; + } + } return NULL; } -struct rela *find_rela_by_dest(struct section *sec, unsigned long offset) +struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset) { - return find_rela_by_dest_range(sec, offset, 1); + return find_rela_by_dest_range(elf, sec, offset, 1); } static int read_sections(struct elf *elf) @@ -261,7 +266,6 @@ static int read_sections(struct elf *elf) INIT_LIST_HEAD(&sec->symbol_list); INIT_LIST_HEAD(&sec->rela_list); - hash_init(sec->rela_hash); s = elf_getscn(elf->elf, i); if (!s) { @@ -493,7 +497,7 @@ static int read_relas(struct elf *elf) } list_add_tail(&rela->list, &sec->rela_list); - hash_add(sec->rela_hash, &rela->hash, rela->offset); + hash_add(elf->rela_hash, &rela->hash, rela_hash(rela)); nr_rela++; } max_rela = max(max_rela, nr_rela); @@ -526,6 +530,7 @@ struct elf *elf_read(const char *name, int flags) hash_init(elf->symbol_name_hash); hash_init(elf->section_hash); hash_init(elf->section_name_hash); + hash_init(elf->rela_hash); INIT_LIST_HEAD(&elf->sections); elf->fd = open(name, flags); @@ -586,7 +591,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, INIT_LIST_HEAD(&sec->symbol_list); INIT_LIST_HEAD(&sec->rela_list); - hash_init(sec->rela_hash); s = elf_newscn(elf->elf); if (!s) { diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 3088d925d640..dfd2431ef693 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -33,7 +33,6 @@ struct section { struct rb_root symbol_tree; struct list_head symbol_list; struct list_head rela_list; - DECLARE_HASHTABLE(rela_hash, 16); struct section *base, *rela; struct symbol *sym; Elf_Data *data; @@ -81,8 +80,22 @@ struct elf { DECLARE_HASHTABLE(symbol_name_hash, 20); DECLARE_HASHTABLE(section_hash, 16); DECLARE_HASHTABLE(section_name_hash, 16); + DECLARE_HASHTABLE(rela_hash, 20); }; +static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) +{ + u32 ol = offset, oh = offset >> 32, idx = sec->idx; + + __jhash_mix(ol, oh, idx); + + return ol; +} + +static inline u32 rela_hash(struct rela *rela) +{ + return sec_offset_hash(rela->sec, rela->offset); +} struct elf *elf_read(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); @@ -90,9 +103,9 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); -struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); -struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, - unsigned int len); +struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 27a4112848c2..41e4a2754da4 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -81,7 +81,7 @@ int create_orc(struct objtool_file *file) return 0; } -static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, +static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relasec, unsigned int idx, struct section *insn_sec, unsigned long insn_off, struct orc_entry *o) { @@ -109,9 +109,10 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, rela->addend = insn_off; rela->type = R_X86_64_PC32; rela->offset = idx * sizeof(int); + rela->sec = ip_relasec; list_add_tail(&rela->list, &ip_relasec->rela_list); - hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset); + hash_add(elf->rela_hash, &rela->hash, rela_hash(rela)); return 0; } @@ -182,7 +183,7 @@ int create_orc_sections(struct objtool_file *file) if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, sizeof(struct orc_entry))) { - if (create_orc_entry(u_sec, ip_relasec, idx, + if (create_orc_entry(file->elf, u_sec, ip_relasec, idx, insn->sec, insn->offset, &insn->orc)) return -1; @@ -194,7 +195,7 @@ int create_orc_sections(struct objtool_file *file) /* section terminator */ if (prev_insn) { - if (create_orc_entry(u_sec, ip_relasec, idx, + if (create_orc_entry(file->elf, u_sec, ip_relasec, idx, prev_insn->sec, prev_insn->offset + prev_insn->len, &empty)) diff --git a/tools/objtool/special.c b/tools/objtool/special.c index fdbaa611146d..e74e0189de22 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -118,7 +118,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, } } - orig_rela = find_rela_by_dest(sec, offset + entry->orig); + orig_rela = find_rela_by_dest(elf, sec, offset + entry->orig); if (!orig_rela) { WARN_FUNC("can't find orig rela", sec, offset + entry->orig); return -1; @@ -133,7 +133,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, alt->orig_off = orig_rela->addend; if (!entry->group || alt->new_len) { - new_rela = find_rela_by_dest(sec, offset + entry->new); + new_rela = find_rela_by_dest(elf, sec, offset + entry->new); if (!new_rela) { WARN_FUNC("can't find new rela", sec, offset + entry->new); -- cgit v1.2.3 From 8887a86eddd93ca396ca35f7b41fb14ed412f85d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Mar 2020 23:07:42 +0100 Subject: objtool: Delete cleanup() Perf shows we spend a measurable amount of time spend cleaning up right before we exit anyway. Avoid the needsless work and just terminate. This reduces objtool on vmlinux.o runtime from 5.4s to 4.8s Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.800720170@infradead.org --- tools/objtool/check.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 54a604381a03..0c9c9ad47467 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2458,23 +2458,6 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; } -static void cleanup(struct objtool_file *file) -{ - struct instruction *insn, *tmpinsn; - struct alternative *alt, *tmpalt; - - list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { - list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { - list_del(&alt->list); - free(alt); - } - list_del(&insn->list); - hash_del(&insn->hash); - free(insn); - } - elf_close(file->elf); -} - static struct objtool_file file; int check(const char *_objname, bool orc) @@ -2542,8 +2525,6 @@ int check(const char *_objname, bool orc) } out: - cleanup(&file); - if (ret < 0) { /* * Fatal error. The binary is corrupt or otherwise broken in -- cgit v1.2.3 From 74b873e49d92f90deb41d1a2a8fbb70328aebd67 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Mar 2020 11:30:50 +0100 Subject: objtool: Optimize find_rela_by_dest_range() Perf shows there is significant time in find_rela_by_dest(); this is because we have to iterate the address space per byte, looking for relocation entries. Optimize this by reducing the address space granularity. This reduces objtool on vmlinux.o runtime from 4.8 to 4.4 seconds. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.861321325@infradead.org --- tools/objtool/elf.c | 15 +++++++++++---- tools/objtool/elf.h | 16 +++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 8a0a1bc18cd7..09ddc8f1def3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -215,7 +215,7 @@ struct symbol *find_symbol_by_name(struct elf *elf, const char *name) struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len) { - struct rela *rela; + struct rela *rela, *r = NULL; unsigned long o; if (!sec->rela) @@ -223,12 +223,19 @@ struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, sec = sec->rela; - for (o = offset; o < offset + len; o++) { + for_offset_range(o, offset, offset + len) { hash_for_each_possible(elf->rela_hash, rela, hash, sec_offset_hash(sec, o)) { - if (rela->sec == sec && rela->offset == o) - return rela; + if (rela->sec != sec) + continue; + + if (rela->offset >= offset && rela->offset < offset + len) { + if (!r || rela->offset < r->offset) + r = rela; + } } + if (r) + return r; } return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index dfd2431ef693..ebbb10c61e24 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -83,9 +83,23 @@ struct elf { DECLARE_HASHTABLE(rela_hash, 20); }; +#define OFFSET_STRIDE_BITS 4 +#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS) +#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1)) + +#define for_offset_range(_offset, _start, _end) \ + for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ + _offset <= ((_end) & OFFSET_STRIDE_MASK); \ + _offset += OFFSET_STRIDE) + static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) { - u32 ol = offset, oh = offset >> 32, idx = sec->idx; + u32 ol, oh, idx = sec->idx; + + offset &= OFFSET_STRIDE_MASK; + + ol = offset; + oh = offset >> 32; __jhash_mix(ol, oh, idx); -- cgit v1.2.3 From 350994bf95414d6da67a72f27d7ac3832ce3725d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2020 20:57:13 +0100 Subject: objtool: Re-arrange validate_functions() In preparation to adding a vmlinux.o specific pass, rearrange some code. No functional changes intended. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200324160924.924304616@infradead.org --- tools/objtool/check.c | 52 +++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0c9c9ad47467..0bfcb390ca73 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2395,9 +2395,8 @@ static bool ignore_unreachable_insn(struct instruction *insn) return false; } -static int validate_functions(struct objtool_file *file) +static int validate_section(struct objtool_file *file, struct section *sec) { - struct section *sec; struct symbol *func; struct instruction *insn; struct insn_state state; @@ -2410,36 +2409,45 @@ static int validate_functions(struct objtool_file *file) CFI_NUM_REGS * sizeof(struct cfi_reg)); state.stack_size = initial_func_cfi.cfa.offset; - for_each_sec(file, sec) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; - if (!func->len) { - WARN("%s() is missing an ELF size annotation", - func->name); - warnings++; - } + if (!func->len) { + WARN("%s() is missing an ELF size annotation", + func->name); + warnings++; + } - if (func->pfunc != func || func->alias != func) - continue; + if (func->pfunc != func || func->alias != func) + continue; - insn = find_insn(file, sec, func->offset); - if (!insn || insn->ignore || insn->visited) - continue; + insn = find_insn(file, sec, func->offset); + if (!insn || insn->ignore || insn->visited) + continue; - state.uaccess = func->uaccess_safe; + state.uaccess = func->uaccess_safe; - ret = validate_branch(file, func, insn, state); - if (ret && backtrace) - BT_FUNC("<=== (func)", insn); - warnings += ret; - } + ret = validate_branch(file, func, insn, state); + if (ret && backtrace) + BT_FUNC("<=== (func)", insn); + warnings += ret; } return warnings; } +static int validate_functions(struct objtool_file *file) +{ + struct section *sec; + int warnings = 0; + + for_each_sec(file, sec) + warnings += validate_section(file, sec); + + return warnings; +} + static int validate_reachable_instructions(struct objtool_file *file) { struct instruction *insn; -- cgit v1.2.3 From 97752c39bdaeea990260d3111250f6c861519808 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 23 Mar 2020 19:43:33 -0700 Subject: kunit: kunit_tool: Allow .kunitconfig to disable config items Rework kunit_tool in order to allow .kunitconfig files to better enforce that disabled items in .kunitconfig are disabled in the generated .config. Previously, kunit_tool simply enforced that any line present in .kunitconfig was also present in .config, but this could cause problems if a config option was disabled in .kunitconfig, but not listed in .config due to (for example) having disabled dependencies. To fix this, re-work the parser to track config names and values, and require values to match unless they are explicitly disabled with the "CONFIG_x is not set" comment (or by setting its value to 'n'). Those "disabled" values will pass validation if omitted from the .config, but not if they have a different value. Signed-off-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_config.py | 41 ++++++++++++++++++++++++++-------- tools/testing/kunit/kunit_tool_test.py | 22 +++++++++--------- 2 files changed, 43 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index ebf3942b23f5..e75063d603b5 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -9,16 +9,18 @@ import collections import re -CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_\w+ is not set$' -CONFIG_PATTERN = r'^CONFIG_\w+=\S+$' - -KconfigEntryBase = collections.namedtuple('KconfigEntry', ['raw_entry']) +CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' +CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+)$' +KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value']) class KconfigEntry(KconfigEntryBase): def __str__(self) -> str: - return self.raw_entry + if self.value == 'n': + return r'# CONFIG_%s is not set' % (self.name) + else: + return r'CONFIG_%s=%s' % (self.name, self.value) class KconfigParseError(Exception): @@ -38,7 +40,17 @@ class Kconfig(object): self._entries.append(entry) def is_subset_of(self, other: 'Kconfig') -> bool: - return self.entries().issubset(other.entries()) + for a in self.entries(): + found = False + for b in other.entries(): + if a.name != b.name: + continue + if a.value != b.value: + return False + found = True + if a.value != 'n' and found == False: + return False + return True def write_to_file(self, path: str) -> None: with open(path, 'w') as f: @@ -54,9 +66,20 @@ class Kconfig(object): line = line.strip() if not line: continue - elif config_matcher.match(line) or is_not_set_matcher.match(line): - self._entries.append(KconfigEntry(line)) - elif line[0] == '#': + + match = config_matcher.match(line) + if match: + entry = KconfigEntry(match.group(1), match.group(2)) + self.add_entry(entry) + continue + + empty_match = is_not_set_matcher.match(line) + if empty_match: + entry = KconfigEntry(empty_match.group(1), 'n') + self.add_entry(entry) + continue + + if line[0] == '#': continue else: raise KconfigParseError('Failed to parse: ' + line) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index ce47e87b633a..984588d6ba95 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -37,7 +37,7 @@ class KconfigTest(unittest.TestCase): self.assertTrue(kconfig0.is_subset_of(kconfig0)) kconfig1 = kunit_config.Kconfig() - kconfig1.add_entry(kunit_config.KconfigEntry('CONFIG_TEST=y')) + kconfig1.add_entry(kunit_config.KconfigEntry('TEST', 'y')) self.assertTrue(kconfig1.is_subset_of(kconfig1)) self.assertTrue(kconfig0.is_subset_of(kconfig1)) self.assertFalse(kconfig1.is_subset_of(kconfig0)) @@ -51,15 +51,15 @@ class KconfigTest(unittest.TestCase): expected_kconfig = kunit_config.Kconfig() expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_UML=y')) + kunit_config.KconfigEntry('UML', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_MMU=y')) + kunit_config.KconfigEntry('MMU', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_TEST=y')) + kunit_config.KconfigEntry('TEST', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y')) + kunit_config.KconfigEntry('EXAMPLE_TEST', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('# CONFIG_MK8 is not set')) + kunit_config.KconfigEntry('MK8', 'n')) self.assertEqual(kconfig.entries(), expected_kconfig.entries()) @@ -68,15 +68,15 @@ class KconfigTest(unittest.TestCase): expected_kconfig = kunit_config.Kconfig() expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_UML=y')) + kunit_config.KconfigEntry('UML', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_MMU=y')) + kunit_config.KconfigEntry('MMU', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_TEST=y')) + kunit_config.KconfigEntry('TEST', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y')) + kunit_config.KconfigEntry('EXAMPLE_TEST', 'y')) expected_kconfig.add_entry( - kunit_config.KconfigEntry('# CONFIG_MK8 is not set')) + kunit_config.KconfigEntry('MK8', 'n')) expected_kconfig.write_to_file(kconfig_path) -- cgit v1.2.3 From 919a23e9d6ccf8b30e6b4c7aa36f8904c99d2dd7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 25 Mar 2020 16:07:01 +0800 Subject: selftests/net: add missing tests to Makefile Find some tests are missed in Makefile by running: for file in $(ls *.sh); do grep -q $file Makefile || echo $file; done Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 287ae916ec0b..4c1bd03ffa1c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,7 +11,9 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh +TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh +TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh +TEST_PROGS += route_localnet.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any -- cgit v1.2.3 From c085dbfb1cfcf74e2ef2ef435291e7e63f046d6a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 25 Mar 2020 16:41:01 +0800 Subject: selftests/net/forwarding: define libs as TEST_PROGS_EXTENDED The lib files should not be defined as TEST_PROGS, or we will run them in run_kselftest.sh. Also remove ethtool_lib.sh exec permission. Fixes: 81573b18f26d ("selftests/net/forwarding: add Makefile to install tests") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/Makefile | 31 +++++++++++----------- .../selftests/net/forwarding/ethtool_lib.sh | 0 2 files changed, 16 insertions(+), 15 deletions(-) mode change 100755 => 100644 tools/testing/selftests/net/forwarding/ethtool_lib.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 44616103508b..250fbb2d1625 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -5,11 +5,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ bridge_vlan_unaware.sh \ - devlink_lib.sh \ - ethtool_lib.sh \ ethtool.sh \ - fib_offload_lib.sh \ - forwarding.config.sample \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath.sh \ @@ -21,8 +17,6 @@ TEST_PROGS = bridge_igmp.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ - ipip_lib.sh \ - lib.sh \ loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ @@ -32,15 +26,11 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_changes.sh \ mirror_gre_flower.sh \ mirror_gre_lag_lacp.sh \ - mirror_gre_lib.sh \ mirror_gre_neigh.sh \ mirror_gre_nh.sh \ mirror_gre.sh \ - mirror_gre_topo_lib.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ - mirror_lib.sh \ - mirror_topo_lib.sh \ mirror_vlan.sh \ router_bridge.sh \ router_bridge_vlan.sh \ @@ -50,17 +40,12 @@ TEST_PROGS = bridge_igmp.sh \ router_multipath.sh \ router.sh \ router_vid_1.sh \ - sch_ets_core.sh \ sch_ets.sh \ - sch_ets_tests.sh \ - sch_tbf_core.sh \ - sch_tbf_etsprio.sh \ sch_tbf_ets.sh \ sch_tbf_prio.sh \ sch_tbf_root.sh \ tc_actions.sh \ tc_chains.sh \ - tc_common.sh \ tc_flower_router.sh \ tc_flower.sh \ tc_shblocks.sh \ @@ -72,4 +57,20 @@ TEST_PROGS = bridge_igmp.sh \ vxlan_bridge_1q.sh \ vxlan_symmetric.sh +TEST_PROGS_EXTENDED := devlink_lib.sh \ + ethtool_lib.sh \ + fib_offload_lib.sh \ + forwarding.config.sample \ + ipip_lib.sh \ + lib.sh \ + mirror_gre_lib.sh \ + mirror_gre_topo_lib.sh \ + mirror_lib.sh \ + mirror_topo_lib.sh \ + sch_ets_core.sh \ + sch_ets_tests.sh \ + sch_tbf_core.sh \ + sch_tbf_etsprio.sh \ + tc_common.sh + include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh old mode 100755 new mode 100644 -- cgit v1.2.3 From eadcaa3dfd706bbf46682c8b8b5979262443c3c3 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Wed, 25 Mar 2020 09:40:53 -0700 Subject: perf callchain: Update docs regarding kernel/user space unwinding The method of unwinding for kernel space is defined by the kernel config, not by the value of --call-graph. Improve the documentation to reflect this. Signed-off-by: Tony Jones Link: http://lore.kernel.org/lkml/20200325164053.10177-1-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 14 ++++++++------ tools/perf/Documentation/perf-record.txt | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 8ead55593984..f16d8a71d3f5 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -405,14 +405,16 @@ ui.*:: This option is only applied to TUI. call-graph.*:: - When sub-commands 'top' and 'report' work with -g/—-children - there're options in control of call-graph. + The following controls the handling of call-graphs (obtained via the + -g/--call-graph options). call-graph.record-mode:: - The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. - The value of 'dwarf' is effective only if perf detect needed library - (libunwind or a recent version of libdw). - 'lbr' only work for cpus that support it. + The mode for user space can be 'fp' (frame pointer), 'dwarf' + and 'lbr'. The value 'dwarf' is effective only if libunwind + (or a recent version of libdw) is present on the system; + the value 'lbr' only works for certain cpus. The method for + kernel space is controlled not by this option but by the + kernel config (CONFIG_UNWINDER_*). call-graph.dump-size:: The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7f4db7592467..b25e028458e2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -237,16 +237,22 @@ OPTIONS option and remains only for backward compatibility. See --event. -g:: - Enables call-graph (stack chain/backtrace) recording. + Enables call-graph (stack chain/backtrace) recording for both + kernel space and user space. --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. Default is "fp". + implies -g. Default is "fp" (for user space). - Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) or "lbr" - (Hardware Last Branch Record facility) as the method to collect - the information used to show the call graphs. + The unwinding method used for kernel space is dependent on the + unwinder used by the active kernel configuration, i.e + CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc) + + Any option specified here controls the method used for user space. + + Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI - + Call Frame Information) or "lbr" (Hardware Last Branch Record + facility). In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus -- cgit v1.2.3 From 9fc9aad99e5d654bd3ea48861bda57f03b118ca9 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 25 Mar 2020 12:36:55 +0100 Subject: libbpf: Remove unused parameter `def` to get_map_field_int Has been unused since commit ef99b02b23ef ("libbpf: capture value in BTF type info for BTF-defined map defs"). Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200325113655.19341-1-tklauser@distanz.ch --- tools/lib/bpf/libbpf.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 085e41f9b68e..e9479ad9dd51 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1845,7 +1845,6 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) * type definition, while using only sizeof(void *) space in ELF data section. */ static bool get_map_field_int(const char *map_name, const struct btf *btf, - const struct btf_type *def, const struct btf_member *m, __u32 *res) { const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); @@ -1972,19 +1971,19 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return -EINVAL; } if (strcmp(name, "type") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, + if (!get_map_field_int(map_name, obj->btf, m, &map->def.type)) return -EINVAL; pr_debug("map '%s': found type = %u.\n", map_name, map->def.type); } else if (strcmp(name, "max_entries") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, + if (!get_map_field_int(map_name, obj->btf, m, &map->def.max_entries)) return -EINVAL; pr_debug("map '%s': found max_entries = %u.\n", map_name, map->def.max_entries); } else if (strcmp(name, "map_flags") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, + if (!get_map_field_int(map_name, obj->btf, m, &map->def.map_flags)) return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", @@ -1992,8 +1991,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } else if (strcmp(name, "key_size") == 0) { __u32 sz; - if (!get_map_field_int(map_name, obj->btf, def, m, - &sz)) + if (!get_map_field_int(map_name, obj->btf, m, &sz)) return -EINVAL; pr_debug("map '%s': found key_size = %u.\n", map_name, sz); @@ -2035,8 +2033,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } else if (strcmp(name, "value_size") == 0) { __u32 sz; - if (!get_map_field_int(map_name, obj->btf, def, m, - &sz)) + if (!get_map_field_int(map_name, obj->btf, m, &sz)) return -EINVAL; pr_debug("map '%s': found value_size = %u.\n", map_name, sz); @@ -2079,8 +2076,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, __u32 val; int err; - if (!get_map_field_int(map_name, obj->btf, def, m, - &val)) + if (!get_map_field_int(map_name, obj->btf, m, &val)) return -EINVAL; pr_debug("map '%s': found pinning = %u.\n", map_name, val); -- cgit v1.2.3 From 8395f320b407509819cc112f61a1de05780c8cba Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 25 Mar 2020 12:55:21 -0700 Subject: libbpf: Don't allocate 16M for log buffer by default For each prog/btf load we allocate and free 16 megs of verifier buffer. On production systems it doesn't really make sense because the programs/btf have gone through extensive testing and (mostly) guaranteed to successfully load. Let's assume successful case by default and skip buffer allocation on the first try. If there is an error, start with BPF_LOG_BUF_SIZE and double it on each ENOSPC iteration. v3: * Return -ENOMEM when can't allocate log buffer (Andrii Nakryiko) v2: * Don't allocate the buffer at all on the first try (Andrii Nakryiko) Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200325195521.112210-1-sdf@google.com --- tools/lib/bpf/btf.c | 20 +++++++++++++++----- tools/lib/bpf/libbpf.c | 22 ++++++++++++++-------- 2 files changed, 29 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3d1c25fc97ae..bfef3d606b54 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -657,22 +657,32 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) int btf__load(struct btf *btf) { - __u32 log_buf_size = BPF_LOG_BUF_SIZE; + __u32 log_buf_size = 0; char *log_buf = NULL; int err = 0; if (btf->fd >= 0) return -EEXIST; - log_buf = malloc(log_buf_size); - if (!log_buf) - return -ENOMEM; +retry_load: + if (log_buf_size) { + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; - *log_buf = 0; + *log_buf = 0; + } btf->fd = bpf_load_btf(btf->data, btf->data_size, log_buf, log_buf_size, false); if (btf->fd < 0) { + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, + log_buf_size << 1); + free(log_buf); + goto retry_load; + } + err = -errno; pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); if (*log_buf) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9479ad9dd51..62903302935e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4851,8 +4851,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; - int log_buf_size = BPF_LOG_BUF_SIZE; - char *log_buf; + size_t log_buf_size = 0; + char *log_buf = NULL; int btf_fd, ret; if (!insns || !insns_cnt) @@ -4892,22 +4892,28 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_flags = prog->prog_flags; retry_load: - log_buf = malloc(log_buf_size); - if (!log_buf) - pr_warn("Alloc log buffer for bpf loader error, continue without log\n"); + if (log_buf_size) { + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + } ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); if (ret >= 0) { - if (load_attr.log_level) + if (log_buf && load_attr.log_level) pr_debug("verifier log:\n%s", log_buf); *pfd = ret; ret = 0; goto out; } - if (errno == ENOSPC) { - log_buf_size <<= 1; + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, + log_buf_size << 1); + free(log_buf); goto retry_load; } -- cgit v1.2.3 From aa131ed44ae1d76637f0dbec33cfcf9115af9bc3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 24 Mar 2020 10:40:14 -0700 Subject: bpf: Test_verifier, #70 error message updates for 32-bit right shift After changes to add update_reg_bounds after ALU ops and adding ALU32 bounds tracking the error message is changed in the 32-bit right shift tests. Test "#70/u bounds check after 32-bit right shift with 64-bit input FAIL" now fails with, Unexpected error message! EXP: R0 invalid mem access RES: func#0 @0 7: (b7) r1 = 2 8: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=invP2 R10=fp0 fp-8_w=mmmmmmmm 8: (67) r1 <<= 31 9: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=invP4294967296 R10=fp0 fp-8_w=mmmmmmmm 9: (74) w1 >>= 31 10: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=invP0 R10=fp0 fp-8_w=mmmmmmmm 10: (14) w1 -= 2 11: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=invP4294967294 R10=fp0 fp-8_w=mmmmmmmm 11: (0f) r0 += r1 math between map_value pointer and 4294967294 is not allowed And test "#70/p bounds check after 32-bit right shift with 64-bit input FAIL" now fails with, Unexpected error message! EXP: R0 invalid mem access RES: func#0 @0 7: (b7) r1 = 2 8: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=inv2 R10=fp0 fp-8_w=mmmmmmmm 8: (67) r1 <<= 31 9: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=inv4294967296 R10=fp0 fp-8_w=mmmmmmmm 9: (74) w1 >>= 31 10: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=inv0 R10=fp0 fp-8_w=mmmmmmmm 10: (14) w1 -= 2 11: R0_w=map_value(id=0,off=0,ks=8,vs=8,imm=0) R1_w=inv4294967294 R10=fp0 fp-8_w=mmmmmmmm 11: (0f) r0 += r1 last_idx 11 first_idx 0 regs=2 stack=0 before 10: (14) w1 -= 2 regs=2 stack=0 before 9: (74) w1 >>= 31 regs=2 stack=0 before 8: (67) r1 <<= 31 regs=2 stack=0 before 7: (b7) r1 = 2 math between map_value pointer and 4294967294 is not allowed Before this series we did not trip the "math between map_value pointer..." error because check_reg_sane_offset is never called in adjust_ptr_min_max_vals(). Instead we have a register state that looks like this at line 11*, 11: R0_w=map_value(id=0,off=0,ks=8,vs=8, smin_value=0,smax_value=0, umin_value=0,umax_value=0, var_off=(0x0; 0x0)) R1_w=invP(id=0, smin_value=0,smax_value=4294967295, umin_value=0,umax_value=4294967295, var_off=(0xfffffffe; 0x0)) R10=fp(id=0,off=0, smin_value=0,smax_value=0, umin_value=0,umax_value=0, var_off=(0x0; 0x0)) fp-8_w=mmmmmmmm 11: (0f) r0 += r1 In R1 'smin_val != smax_val' yet we have a tnum_const as seen by 'var_off(0xfffffffe; 0x0))' with a 0x0 mask. So we hit this check in adjust_ptr_min_max_vals() if ((known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val || umin_val > umax_val) { /* Taint dst register if offset had invalid bounds derived from * e.g. dead branches. */ __mark_reg_unknown(env, dst_reg); return 0; } So we don't throw an error here and instead only throw an error later in the verification when the memory access is made. The root cause in verifier without alu32 bounds tracking is having 'umin_value = 0' and 'umax_value = U64_MAX' from BPF_SUB which we set when 'umin_value < umax_val' here, if (dst_reg->umin_value < umax_val) { /* Overflow possible, we know nothing */ dst_reg->umin_value = 0; dst_reg->umax_value = U64_MAX; } else { ...} Later in adjust_calar_min_max_vals we previously did a coerce_reg_to_size() which will clamp the U64_MAX to U32_MAX by truncating to 32bits. But either way without a call to update_reg_bounds the less precise bounds tracking will fall out of the alu op verification. After latest changes we now exit adjust_scalar_min_max_vals with the more precise umin value, due to zero extension propogating bounds from alu32 bounds into alu64 bounds and then calling update_reg_bounds. This then causes the verifier to trigger an earlier error and we get the error in the output above. This patch updates tests to reflect new error message. * I have a local patch to print entire verifier state regardless if we believe it is a constant so we can get a full picture of the state. Usually if tnum_is_const() then bounds are also smin=smax, etc. but this is not always true and is a bit subtle. Being able to see these states helps understand dataflow imo. Let me know if we want something similar upstream. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/158507161475.15666.3061518385241144063.stgit@john-Precision-5820-Tower --- tools/testing/selftests/bpf/verifier/bounds.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index d55f476f2237..7c9b659095e9 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -411,16 +411,14 @@ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), /* r1 = 0xffff'fffe (NOT 0!) */ BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), - /* computes OOB pointer */ + /* error on computing OOB pointer */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), /* exit */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access", + .errstr = "math between map_value pointer and 4294967294 is not allowed", .result = REJECT, }, { -- cgit v1.2.3 From 4b547a869db980cd32936bd4ad8f6a33371419ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 20 Mar 2020 13:55:41 -0700 Subject: KVM: selftests: Fix cosmetic copy-paste error in vm_mem_region_move() Fix a copy-paste typo in a comment and error message. Signed-off-by: Sean Christopherson Message-Id: <20200320205546.2396-3-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 0cf98ad59e32..8a3523d4434f 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -764,7 +764,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) * Input Args: * vm - Virtual Machine * slot - Slot of the memory region to move - * flags - Starting guest physical address + * new_gpa - Starting guest physical address * * Output Args: None * @@ -784,7 +784,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" - "ret: %i errno: %i slot: %u flags: 0x%lx", + "ret: %i errno: %i slot: %u new_gpa: 0x%lx", ret, errno, slot, new_gpa); } -- cgit v1.2.3 From 5790921bc18b1eb5c0c61371e31114fd4c4b0154 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Tue, 4 Feb 2020 09:14:24 -0800 Subject: x86/insn: Add Control-flow Enforcement (CET) instructions to the opcode map Add the following CET instructions to the opcode map: INCSSP: Increment Shadow Stack pointer (SSP). RDSSP: Read SSP into a GPR. SAVEPREVSSP: Use "previous ssp" token at top of current Shadow Stack (SHSTK) to create a "restore token" on the previous (outgoing) SHSTK. RSTORSSP: Restore from a "restore token" to SSP. WRSS: Write to kernel-mode SHSTK (kernel-mode instruction). WRUSS: Write to user-mode SHSTK (kernel-mode instruction). SETSSBSY: Verify the "supervisor token" pointed by MSR_IA32_PL0_SSP, set the token busy, and set then Shadow Stack pointer(SSP) to the value of MSR_IA32_PL0_SSP. CLRSSBSY: Verify the "supervisor token" and clear its busy bit. ENDBR64/ENDBR32: Mark a valid 64/32 bit control transfer endpoint. Detailed information of CET instructions can be found in Intel Software Developer's Manual. Signed-off-by: Yu-cheng Yu Signed-off-by: Borislav Petkov Reviewed-by: Adrian Hunter Reviewed-by: Tony Luck Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200204171425.28073-2-yu-cheng.yu@intel.com --- arch/x86/lib/x86-opcode-map.txt | 17 +++++++++++------ tools/arch/x86/lib/x86-opcode-map.txt | 17 +++++++++++------ 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 53adc1762ec0..ec31f5b60323 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -366,7 +366,7 @@ AVXcode: 1 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: Grp20 (1A),(1C) 1d: -1e: +1e: Grp21 (1A) 1f: NOP Ev # 0x0f 0x20-0x2f 20: MOV Rd,Cd @@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) f3: Grp17 (1A) -f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) f9: MOVDIRI My,Gy @@ -970,7 +970,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -1041,8 +1041,8 @@ GrpTable: Grp15 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) -5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) +5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1077,6 +1077,11 @@ GrpTable: Grp20 0: cldemote Mb EndTable +GrpTable: Grp21 +1: RDSSPD/Q Ry (F3),(11B) +7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 53adc1762ec0..ec31f5b60323 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -366,7 +366,7 @@ AVXcode: 1 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: Grp20 (1A),(1C) 1d: -1e: +1e: Grp21 (1A) 1f: NOP Ev # 0x0f 0x20-0x2f 20: MOV Rd,Cd @@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) f3: Grp17 (1A) -f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) f9: MOVDIRI My,Gy @@ -970,7 +970,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -1041,8 +1041,8 @@ GrpTable: Grp15 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) -5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) +5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1077,6 +1077,11 @@ GrpTable: Grp20 0: cldemote Mb EndTable +GrpTable: Grp21 +1: RDSSPD/Q Ry (F3),(11B) +7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH -- cgit v1.2.3 From 1032f32645f8a650edb0134d52fa085642d0a492 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Feb 2020 09:14:25 -0800 Subject: perf/tests: Add CET instructions to the new instructions test Add to the "x86 instruction decoder - new instructions" test the following instructions: incsspd incsspq rdsspd rdsspq saveprevssp rstorssp wrssd wrssq wrussd wrussq setssbsy clrssbsy endbr32 endbr64 And the notrack prefix for indirect calls and jumps. For information about the instructions, refer Intel Control-flow Enforcement Technology Specification May 2019 (334525-003). Signed-off-by: Adrian Hunter Signed-off-by: Yu-cheng Yu Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200204171425.28073-3-yu-cheng.yu@intel.com --- tools/perf/arch/x86/tests/insn-x86-dat-32.c | 112 +++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-64.c | 196 ++++++++++++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-src.c | 236 +++++++++++++++++++++++++++ 3 files changed, 544 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index e6461abc9e7b..9708ae892061 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2085,6 +2085,118 @@ "67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "", "67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%eax)",}, +{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f ae 2d 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%eax)",}, +{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%eax)",}, +{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 f6 15 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%eax)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%eax)",}, +{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcall *%eax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcall *(%eax)",}, +{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect", +"ff 15 78 56 34 12 \tcall *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcall *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd call *%eax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd call *(%eax)",}, +{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack call *%eax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack call *(%eax)",}, +{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd call *%eax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd call *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmp *%eax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmp *(%eax)",}, +{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect", +"ff 25 78 56 34 12 \tjmp *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmp *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmp *%eax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmp *(%eax)",}, +{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmp *%eax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmp *(%eax)",}, +{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmp *%eax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmp *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 567ecccfad7c..5da17d41d302 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2263,6 +2263,202 @@ "67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", "67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 41 0f ae e8 \tincsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 48 0f ae e8 \tincsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 49 0f ae e8 \tincsspq %r8",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%rax)",}, +{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "", +"41 0f ae 28 \txrstor (%r8)",}, +{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 41 0f 1e c8 \trdsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 48 0f 1e c8 \trdsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 49 0f 1e c8 \trdsspq %r8",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%rax)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "", +"f3 41 0f 01 28 \trstorssp (%r8)",}, +{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%rax)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"41 0f 38 f6 10 \twrssd %edx,(%r8)",}, +{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 14 25 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"41 0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "", +"48 0f 38 f6 08 \twrssq %rcx,(%rax)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"49 0f 38 f6 10 \twrssq %rdx,(%r8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 14 25 78 56 34 12 \twrssq %rdx,0x12345678",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"49 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%rax)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 41 0f 38 f5 10 \twrussd %edx,(%r8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "", +"66 48 0f 38 f5 08 \twrussq %rcx,(%rax)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 49 0f 38 f5 10 \twrussq %rdx,(%r8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%rax)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "", +"f3 41 0f ae 30 \tclrssbsy (%r8)",}, +{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcallq *%rax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcallq *(%rax)",}, +{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect", +"41 ff 10 \tcallq *(%r8)",}, +{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 14 25 78 56 34 12 \tcallq *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcallq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"41 ff 94 c8 78 56 34 12 \tcallq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd callq *%rax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd callq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"f2 41 ff 10 \tbnd callq *(%r8)",}, +{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack callq *%rax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack callq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e 41 ff 10 \tnotrack callq *(%r8)",}, +{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd callq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd callq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect", +"3e f2 41 ff 10 \tnotrack bnd callq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect", +"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmpq *%rax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmpq *(%rax)",}, +{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"41 ff 20 \tjmpq *(%r8)",}, +{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff 24 25 78 56 34 12 \tjmpq *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"41 ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmpq *%rax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmpq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"f2 41 ff 20 \tbnd jmpq *(%r8)",}, +{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmpq *%rax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmpq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e 41 ff 20 \tnotrack jmpq *(%r8)",}, +{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmpq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmpq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect", +"3e f2 41 ff 20 \tnotrack bnd jmpq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", +"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index ddbf07c50bb8..c3808e94c46e 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1771,6 +1771,145 @@ int main(void) asm volatile("enqcmds (%eax),%ebx"); asm volatile("enqcmds 0x12345678(%eax),%ecx"); + /* incsspd/q */ + + asm volatile("incsspd %eax"); + asm volatile("incsspd %r8d"); + asm volatile("incsspq %rax"); + asm volatile("incsspq %r8"); + /* Also check instructions in the same group encoding as incsspd/q */ + asm volatile("xrstor (%rax)"); + asm volatile("xrstor (%r8)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%rax,%rcx,8)"); + asm volatile("xrstor 0x12345678(%r8,%rcx,8)"); + asm volatile("lfence"); + + /* rdsspd/q */ + + asm volatile("rdsspd %eax"); + asm volatile("rdsspd %r8d"); + asm volatile("rdsspq %rax"); + asm volatile("rdsspq %r8"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%rax)"); + asm volatile("rstorssp (%r8)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%rax,%rcx,8)"); + asm volatile("rstorssp 0x12345678(%r8,%rcx,8)"); + + /* wrssd/q */ + + asm volatile("wrssd %ecx,(%rax)"); + asm volatile("wrssd %edx,(%r8)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrssq %rcx,(%rax)"); + asm volatile("wrssq %rdx,(%r8)"); + asm volatile("wrssq %rdx,(0x12345678)"); + asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)"); + + /* wrussd/q */ + + asm volatile("wrussd %ecx,(%rax)"); + asm volatile("wrussd %edx,(%r8)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrussq %rcx,(%rax)"); + asm volatile("wrussq %rdx,(%r8)"); + asm volatile("wrussq %rdx,(0x12345678)"); + asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%rax)"); + asm volatile("clrssbsy (%r8)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)"); + asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3434,6 +3573,103 @@ int main(void) asm volatile("enqcmds (%si),%bx"); asm volatile("enqcmds 0x1234(%si),%cx"); + /* incsspd */ + + asm volatile("incsspd %eax"); + /* Also check instructions in the same group encoding as incsspd */ + asm volatile("xrstor (%eax)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%eax,%ecx,8)"); + asm volatile("lfence"); + + /* rdsspd */ + + asm volatile("rdsspd %eax"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%eax)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%eax,%ecx,8)"); + + /* wrssd */ + + asm volatile("wrssd %ecx,(%eax)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)"); + + /* wrussd */ + + asm volatile("wrussd %ecx,(%eax)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%eax)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + #endif /* #ifndef __x86_64__ */ /* SGX */ -- cgit v1.2.3 From 2a3d252dffe14582f238e21b09923e3772263123 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 25 Mar 2020 09:40:22 -0700 Subject: perf parse-events: Add defensive NULL check Terms may have a NULL config in which case a strcmp will SEGV. This can be reproduced with: perf stat -e '*/event=?,nr/' sleep 1 Add a NULL check to avoid this. This was caught by LLVM's libfuzzer. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200325164022.41385-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 616fbda7c3fc..ef6a63f3d386 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -984,12 +984,11 @@ static int pmu_resolve_param_term(struct parse_events_term *term, struct parse_events_term *t; list_for_each_entry(t, head_terms, list) { - if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { - if (!strcmp(t->config, term->config)) { - t->used = true; - *value = t->val.num; - return 0; - } + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM && + t->config && !strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; } } -- cgit v1.2.3 From e4ffd066ff440a57097e9140fa9e16ceef905de8 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 14 Feb 2020 02:21:06 +0800 Subject: perf: Normalize gcc parameter when generating arch errno table The $(CC) passed to arch_errno_names.sh may include a series of parameters along with gcc itself. To avoid overwriting the following parameters of arch_errno_names.sh and break the build like below, we just pick up the first word of the $(CC). find: unknown predicate `-m64/arch' x86_64-wrs-linux-gcc: warning: '-x c' after last input file has no effect x86_64-wrs-linux-gcc: error: unrecognized command line option '-m64/include/uapi/asm-generic/errno.h' x86_64-wrs-linux-gcc: fatal error: no input files Signed-off-by: He Zhe Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1581618066-187262-2-git-send-email-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a02aca9b21f4..d15a311408f1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -574,7 +574,7 @@ arch_errno_hdr_dir := $(srctree)/tools arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) - $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ + $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh -- cgit v1.2.3 From 226657ba2389d9ac147e5ae21dfab0020d7a045e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 26 Mar 2020 16:01:13 +0200 Subject: selftests: forwarding: Add a forwarding test for pedit munge dsfield Add a test that runs packets with dsfield set, and test that pedit adjusts the DSCP or ECN parts or the whole field. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/pedit_dsfield.sh | 238 +++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/pedit_dsfield.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh new file mode 100755 index 000000000000..b50081855913 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by a pedit action. An ingress +# filter installed on $h2 verifies that the packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ip_dsfield + test_ip_dscp + test_ip_ecn + test_ip_dscp_ecn +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +do_test_pedit_dsfield_common() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local mz_flags=$1; shift + + RET=0 + + # TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is + # overwritten when zero is selected. + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345 + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + log_test "$pedit_locus pedit $pedit_action" +} + +do_test_pedit_dsfield() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + local saddr=$1; shift + local daddr=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_ip_dsfield() +{ + local locus=$1; shift + local dsfield + + for dsfield in 0 1 2 3 128 252 253 254 255; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $dsfield" \ + ip "ip_tos $dsfield" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_dsfield() +{ + do_test_ip_dsfield "dev $swp1 ingress" + do_test_ip_dsfield "dev $swp2 egress" +} + +do_test_ip_dscp() +{ + local locus=$1; shift + local dscp + + for dscp in 0 1 2 3 32 61 62 63; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $((dscp << 2)) retain 0xfc" \ + ip "ip_tos $(((dscp << 2) | 1))" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_dscp() +{ + do_test_ip_dscp "dev $swp1 ingress" + do_test_ip_dscp "dev $swp2 egress" +} + +do_test_ip_ecn() +{ + local locus=$1; shift + local ecn + + for ecn in 0 1 2 3; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $ecn retain 0x03" \ + ip "ip_tos $((124 | $ecn))" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_ecn() +{ + do_test_ip_ecn "dev $swp1 ingress" + do_test_ip_ecn "dev $swp2 egress" +} + +do_test_ip_dscp_ecn() +{ + local locus=$1; shift + + tc filter add $locus handle 101 pref 1 \ + flower action pedit ex munge ip dsfield set 124 retain 0xfc \ + action pedit ex munge ip dsfield set 1 retain 0x03 + tc filter add dev $h2 ingress handle 101 pref 1 prot ip \ + flower skip_hw ip_tos 125 action pass + + do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN" \ + "-A 192.0.2.1 -B 192.0.2.2" + + tc filter del dev $h2 ingress pref 1 + tc filter del $locus pref 1 +} + +test_ip_dscp_ecn() +{ + do_test_ip_dscp_ecn "dev $swp1 ingress" + do_test_ip_dscp_ecn "dev $swp2 egress" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 9a9dffcb4ff9b2476dfeda36c4ddd35d7515f6e3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 26 Mar 2020 16:01:14 +0200 Subject: selftests: mlxsw: qos_dscp_router: Test no DSCP rewrite after pedit When DSCP is updated through an offloaded pedit action, DSCP rewrite on egress should be disabled. Add a test that check that it is so. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/qos_dscp_router.sh | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh index c745ce3befee..4cb2aa65278a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh @@ -31,6 +31,7 @@ ALL_TESTS=" ping_ipv4 test_update test_no_update + test_pedit_norewrite test_dscp_leftover " @@ -56,6 +57,11 @@ zero() echo 0 } +three() +{ + echo 3 +} + h1_create() { simple_if_init $h1 192.0.2.1/28 @@ -103,6 +109,9 @@ switch_create() simple_if_init $swp1 192.0.2.2/28 __simple_if_init $swp2 v$swp1 192.0.2.17/28 + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null lldpad_app_wait_set $swp1 @@ -115,6 +124,9 @@ switch_destroy() lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null lldpad_app_wait_del + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + __simple_if_fini $swp2 192.0.2.17/28 simple_if_fini $swp1 192.0.2.2/28 } @@ -223,18 +235,36 @@ __test_update() test_update() { + echo "Test net.ipv4.ip_forward_update_priority=1" __test_update 1 reprioritize } test_no_update() { + echo "Test net.ipv4.ip_forward_update_priority=0" __test_update 0 echo } +# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off. +test_pedit_norewrite() +{ + echo "Test no DSCP rewrite after DSCP is updated by pedit" + + tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \ + action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \ + action skbedit priority 3 + + __test_update 0 three + + tc filter del dev $swp1 ingress pref 1 +} + # Test that when the last APP rule is removed, the prio->DSCP map is properly # set to zeroes, and that the last APP rule does not stay active in the ASIC. test_dscp_leftover() { + echo "Test that last removed DSCP rule is deconfigured correctly" + lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null lldpad_app_wait_del -- cgit v1.2.3 From 2f6bdb05e0b6adbcd05fae2aad7a9b6760fc7af6 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Thu, 26 Mar 2020 09:20:07 -0700 Subject: tools/power/x86/intel_pstate_tracer: fix a broken y-axis scale A fixed y-axis scale was missed during a change to autoscale. Correct it. Fixes: 709bd70d070ee ("tools/power/x86/intel_pstate_tracer: change several graphs to autoscale y-axis") Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index 256199c7a182..3c47865bb247 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -235,7 +235,6 @@ def plot_duration_cpu(): output_png = 'all_cpu_durations.png' g_plot = common_all_gnuplot_settings(output_png) # autoscale this one, no set y range - g_plot('set ytics 0, 500') g_plot('set ylabel "Timer Duration (MilliSeconds)"') g_plot('set title "{} : cpu durations : {:%F %H:%M}"'.format(testname, datetime.now())) -- cgit v1.2.3 From c3bba690a2643245f59a4d5d66e6b687459696d9 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 26 Mar 2020 14:25:09 +0000 Subject: kunit: subtests should be indented 4 spaces according to TAP Introduce KUNIT_SUBTEST_INDENT macro which corresponds to 4-space indentation and KUNIT_SUBSUBTEST_INDENT macro which corresponds to 8-space indentation in line with TAP spec (e.g. see "Subtests" section of https://node-tap.org/tap-protocol/). Use these macros in place of one or two tabs in strings to clarify why we are indenting. Suggested-by: Frank Rowand Signed-off-by: Alan Maguire Reviewed-by: Frank Rowand Signed-off-by: Shuah Khan --- include/kunit/test.h | 11 +++++- lib/kunit/assert.c | 79 +++++++++++++++++++------------------ lib/kunit/test.c | 7 ++-- tools/testing/kunit/kunit_parser.py | 10 ++--- 4 files changed, 59 insertions(+), 48 deletions(-) (limited to 'tools') diff --git a/include/kunit/test.h b/include/kunit/test.h index f7b2ed4c127e..9b0c46a6ca1f 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -84,6 +84,14 @@ struct kunit; /* Size of log associated with test. */ #define KUNIT_LOG_SIZE 512 +/* + * TAP specifies subtest stream indentation of 4 spaces, 8 spaces for a + * sub-subtest. See the "Subtests" section in + * https://node-tap.org/tap-protocol/ + */ +#define KUNIT_SUBTEST_INDENT " " +#define KUNIT_SUBSUBTEST_INDENT " " + /** * struct kunit_case - represents an individual test case. * @@ -395,7 +403,8 @@ void kunit_log_append(char *log, const char *fmt, ...); } while (0) #define kunit_printk(lvl, test, fmt, ...) \ - kunit_log(lvl, test, "\t# %s: " fmt, (test)->name, ##__VA_ARGS__) + kunit_log(lvl, test, KUNIT_SUBTEST_INDENT "# %s: " fmt, \ + (test)->name, ##__VA_ARGS__) /** * kunit_info() - Prints an INFO level message associated with @test. diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c index 02ecd0d7d80a..33acdaa28a7d 100644 --- a/lib/kunit/assert.c +++ b/lib/kunit/assert.c @@ -6,6 +6,7 @@ * Author: Brendan Higgins */ #include +#include #include "string-stream.h" @@ -53,12 +54,12 @@ void kunit_unary_assert_format(const struct kunit_assert *assert, kunit_base_assert_format(assert, stream); if (unary_assert->expected_true) string_stream_add(stream, - "\tExpected %s to be true, but is false\n", - unary_assert->condition); + KUNIT_SUBTEST_INDENT "Expected %s to be true, but is false\n", + unary_assert->condition); else string_stream_add(stream, - "\tExpected %s to be false, but is true\n", - unary_assert->condition); + KUNIT_SUBTEST_INDENT "Expected %s to be false, but is true\n", + unary_assert->condition); kunit_assert_print_msg(assert, stream); } EXPORT_SYMBOL_GPL(kunit_unary_assert_format); @@ -72,13 +73,13 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, kunit_base_assert_format(assert, stream); if (!ptr_assert->value) { string_stream_add(stream, - "\tExpected %s is not null, but is\n", - ptr_assert->text); + KUNIT_SUBTEST_INDENT "Expected %s is not null, but is\n", + ptr_assert->text); } else if (IS_ERR(ptr_assert->value)) { string_stream_add(stream, - "\tExpected %s is not error, but is: %ld\n", - ptr_assert->text, - PTR_ERR(ptr_assert->value)); + KUNIT_SUBTEST_INDENT "Expected %s is not error, but is: %ld\n", + ptr_assert->text, + PTR_ERR(ptr_assert->value)); } kunit_assert_print_msg(assert, stream); } @@ -92,16 +93,16 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, kunit_base_assert_format(assert, stream); string_stream_add(stream, - "\tExpected %s %s %s, but\n", - binary_assert->left_text, - binary_assert->operation, - binary_assert->right_text); - string_stream_add(stream, "\t\t%s == %lld\n", - binary_assert->left_text, - binary_assert->left_value); - string_stream_add(stream, "\t\t%s == %lld", - binary_assert->right_text, - binary_assert->right_value); + KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n", + binary_assert->left_text, + binary_assert->operation, + binary_assert->right_text); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld\n", + binary_assert->left_text, + binary_assert->left_value); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld", + binary_assert->right_text, + binary_assert->right_value); kunit_assert_print_msg(assert, stream); } EXPORT_SYMBOL_GPL(kunit_binary_assert_format); @@ -114,16 +115,16 @@ void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, kunit_base_assert_format(assert, stream); string_stream_add(stream, - "\tExpected %s %s %s, but\n", - binary_assert->left_text, - binary_assert->operation, - binary_assert->right_text); - string_stream_add(stream, "\t\t%s == %px\n", - binary_assert->left_text, - binary_assert->left_value); - string_stream_add(stream, "\t\t%s == %px", - binary_assert->right_text, - binary_assert->right_value); + KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n", + binary_assert->left_text, + binary_assert->operation, + binary_assert->right_text); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %px\n", + binary_assert->left_text, + binary_assert->left_value); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %px", + binary_assert->right_text, + binary_assert->right_value); kunit_assert_print_msg(assert, stream); } EXPORT_SYMBOL_GPL(kunit_binary_ptr_assert_format); @@ -136,16 +137,16 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, kunit_base_assert_format(assert, stream); string_stream_add(stream, - "\tExpected %s %s %s, but\n", - binary_assert->left_text, - binary_assert->operation, - binary_assert->right_text); - string_stream_add(stream, "\t\t%s == %s\n", - binary_assert->left_text, - binary_assert->left_value); - string_stream_add(stream, "\t\t%s == %s", - binary_assert->right_text, - binary_assert->right_value); + KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n", + binary_assert->left_text, + binary_assert->operation, + binary_assert->right_text); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %s\n", + binary_assert->left_text, + binary_assert->left_value); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %s", + binary_assert->right_text, + binary_assert->right_value); kunit_assert_print_msg(assert, stream); } EXPORT_SYMBOL_GPL(kunit_binary_str_assert_format); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index a3fa21f9d918..7a6430a7fca0 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -69,8 +69,9 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases); static void kunit_print_subtest_start(struct kunit_suite *suite) { kunit_print_tap_version(); - kunit_log(KERN_INFO, suite, "\t# Subtest: %s", suite->name); - kunit_log(KERN_INFO, suite, "\t1..%zd", + kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s", + suite->name); + kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd", kunit_suite_num_test_cases(suite)); } @@ -96,7 +97,7 @@ static void kunit_print_ok_not_ok(void *test_or_suite, kunit_status_to_string(is_ok), test_number, description); else - kunit_log(KERN_INFO, test, "\t%s %zd - %s", + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT "%s %zd - %s", kunit_status_to_string(is_ok), test_number, description); } diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 02815062d5a6..64aac9dcd431 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -94,7 +94,7 @@ def print_log(log): for m in log: print_with_timestamp(m) -TAP_ENTRIES = re.compile(r'^(TAP|\t?ok|\t?not ok|\t?[0-9]+\.\.[0-9]+|\t?#).*$') +TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$') def consume_non_diagnositic(lines: List[str]) -> None: while lines and not TAP_ENTRIES.match(lines[0]): @@ -107,7 +107,7 @@ def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None: OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text']) -OK_NOT_OK_SUBTEST = re.compile(r'^\t(ok|not ok) [0-9]+ - (.*)$') +OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$') @@ -134,7 +134,7 @@ def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: else: return False -SUBTEST_DIAGNOSTIC = re.compile(r'^\t# .*?: (.*)$') +SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# .*?: (.*)$') DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!' def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: @@ -161,7 +161,7 @@ def parse_test_case(lines: List[str]) -> TestCase: else: return None -SUBTEST_HEADER = re.compile(r'^\t# Subtest: (.*)$') +SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') def parse_subtest_header(lines: List[str]) -> str: consume_non_diagnositic(lines) @@ -174,7 +174,7 @@ def parse_subtest_header(lines: List[str]) -> str: else: return None -SUBTEST_PLAN = re.compile(r'\t[0-9]+\.\.([0-9]+)') +SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') def parse_subtest_plan(lines: List[str]) -> int: consume_non_diagnositic(lines) -- cgit v1.2.3 From e23349af9ee25a5760112a2f8476b94a4ec86f1c Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Mon, 23 Mar 2020 12:22:36 -0700 Subject: kunit: tool: add missing test data file content Add a missing raw dmesg test log to test the kunit_tool's dmesg parser. test_prefix_poundsign and test_output_with_prefix_isolated_correctly fail without this test log. Signed-off-by: Brendan Higgins Tested-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/test_data/test_pound_sign.log | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log index e69de29bb2d1..28ffa5ba03bf 100644 --- a/tools/testing/kunit/test_data/test_pound_sign.log +++ b/tools/testing/kunit/test_data/test_pound_sign.log @@ -0,0 +1,33 @@ +[ 0.060000] printk: console [mc-1] enabled +[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 +[ 0.060000] TAP version 14 +[ 0.060000] # Subtest: kunit-resource-test +[ 0.060000] 1..5 +[ 0.060000] ok 1 - kunit_resource_test_init_resources +[ 0.060000] ok 2 - kunit_resource_test_alloc_resource +[ 0.060000] ok 3 - kunit_resource_test_destroy_resource +[ 0.060000] foo bar # +[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources +[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering +[ 0.060000] ok 1 - kunit-resource-test +[ 0.060000] foo bar # non-kunit output +[ 0.060000] # Subtest: kunit-try-catch-test +[ 0.060000] 1..2 +[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch +[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch +[ 0.060000] ok 2 - kunit-try-catch-test +[ 0.060000] # Subtest: string-stream-test +[ 0.060000] 1..3 +[ 0.060000] ok 1 - string_stream_test_empty_on_creation +[ 0.060000] ok 2 - string_stream_test_not_empty_after_add +[ 0.060000] ok 3 - string_stream_test_get_string +[ 0.060000] ok 3 - string-stream-test +[ 0.060000] List of all partitions: +[ 0.060000] No filesystem could mount root, tried: +[ 0.060000] +[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0) +[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2 +[ 0.060000] Stack: +[ 0.060000] 602086f8 601bc260 705c0000 705c0000 +[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 -- cgit v1.2.3 From f46f576280595f7c8feba454526b95a54e635798 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Mar 2020 16:12:51 -0700 Subject: selftests/harness: Move test child waiting logic In order to better handle timeout failures, rearrange the child waiting logic into a separate function. This is mostly a copy/paste with an indentation change. To handle pid tracking, a new field is added for the child pid. Also move the alarm() pairing into the function. Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 93 +++++++++++++++-------------- 1 file changed, 49 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 5336b26506ab..c7b67e379219 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -635,6 +635,7 @@ struct __test_metadata { const char *name; void (*fn)(struct __test_metadata *); + pid_t pid; /* pid of test when being run */ int termsig; int passed; int trigger; /* extra handler after the evaluation */ @@ -695,64 +696,68 @@ static inline int __bail(int for_realz, bool no_print, __u8 step) return 0; } -void __run_test(struct __test_metadata *t) +void __wait_for_test(struct __test_metadata *t) { - pid_t child_pid; int status; + alarm(t->timeout); + waitpid(t->pid, &status, 0); + alarm(0); + + if (WIFEXITED(status)) { + t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0; + if (t->termsig != -1) { + fprintf(TH_LOG_STREAM, + "%s: Test exited normally " + "instead of by signal (code: %d)\n", + t->name, + WEXITSTATUS(status)); + } else if (!t->passed) { + fprintf(TH_LOG_STREAM, + "%s: Test failed at step #%d\n", + t->name, + WEXITSTATUS(status)); + } + } else if (WIFSIGNALED(status)) { + t->passed = 0; + if (WTERMSIG(status) == SIGABRT) { + fprintf(TH_LOG_STREAM, + "%s: Test terminated by assertion\n", + t->name); + } else if (WTERMSIG(status) == t->termsig) { + t->passed = 1; + } else { + fprintf(TH_LOG_STREAM, + "%s: Test terminated unexpectedly " + "by signal %d\n", + t->name, + WTERMSIG(status)); + } + } else { + fprintf(TH_LOG_STREAM, + "%s: Test ended in some other way [%u]\n", + t->name, + status); + } +} + +void __run_test(struct __test_metadata *t) +{ t->passed = 1; t->trigger = 0; printf("[ RUN ] %s\n", t->name); - alarm(t->timeout); - child_pid = fork(); - if (child_pid < 0) { + t->pid = fork(); + if (t->pid < 0) { printf("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; - } else if (child_pid == 0) { + } else if (t->pid == 0) { t->fn(t); /* return the step that failed or 0 */ _exit(t->passed ? 0 : t->step); } else { - /* TODO(wad) add timeout support. */ - waitpid(child_pid, &status, 0); - if (WIFEXITED(status)) { - t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0; - if (t->termsig != -1) { - fprintf(TH_LOG_STREAM, - "%s: Test exited normally " - "instead of by signal (code: %d)\n", - t->name, - WEXITSTATUS(status)); - } else if (!t->passed) { - fprintf(TH_LOG_STREAM, - "%s: Test failed at step #%d\n", - t->name, - WEXITSTATUS(status)); - } - } else if (WIFSIGNALED(status)) { - t->passed = 0; - if (WTERMSIG(status) == SIGABRT) { - fprintf(TH_LOG_STREAM, - "%s: Test terminated by assertion\n", - t->name); - } else if (WTERMSIG(status) == t->termsig) { - t->passed = 1; - } else { - fprintf(TH_LOG_STREAM, - "%s: Test terminated unexpectedly " - "by signal %d\n", - t->name, - WTERMSIG(status)); - } - } else { - fprintf(TH_LOG_STREAM, - "%s: Test ended in some other way [%u]\n", - t->name, - status); - } + __wait_for_test(t); } printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name); - alarm(0); } static int test_harness_run(int __attribute__((unused)) argc, -- cgit v1.2.3 From c31801da6e3d7ef6115a0b5af1816a8f7ab120c2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Mar 2020 16:12:52 -0700 Subject: selftests/harness: Handle timeouts cleanly When a selftest would timeout before, the program would just fall over and no accounting of failures would be reported (i.e. it would result in an incomplete TAP report). Instead, add an explicit SIGALRM handler to cleanly catch and report the timeout. Before: [==========] Running 2 tests from 2 test cases. [ RUN ] timeout.finish [ OK ] timeout.finish [ RUN ] timeout.too_long Alarm clock After: [==========] Running 2 tests from 2 test cases. [ RUN ] timeout.finish [ OK ] timeout.finish [ RUN ] timeout.too_long timeout.too_long: Test terminated by timeout [ FAIL ] timeout.too_long [==========] 1 / 2 tests passed. [ FAILED ] Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 53 +++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index c7b67e379219..2902f6a78f8a 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -639,7 +639,8 @@ struct __test_metadata { int termsig; int passed; int trigger; /* extra handler after the evaluation */ - int timeout; + int timeout; /* seconds to wait for test timeout */ + bool timed_out; /* did this test timeout instead of exiting? */ __u8 step; bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ struct __test_metadata *prev, *next; @@ -696,15 +697,63 @@ static inline int __bail(int for_realz, bool no_print, __u8 step) return 0; } +struct __test_metadata *__active_test; +static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) +{ + struct __test_metadata *t = __active_test; + + /* Sanity check handler execution environment. */ + if (!t) { + fprintf(TH_LOG_STREAM, + "no active test in SIGARLM handler!?\n"); + abort(); + } + if (sig != SIGALRM || sig != info->si_signo) { + fprintf(TH_LOG_STREAM, + "%s: SIGALRM handler caught signal %d!?\n", + t->name, sig != SIGALRM ? sig : info->si_signo); + abort(); + } + + t->timed_out = true; + kill(t->pid, SIGKILL); +} + void __wait_for_test(struct __test_metadata *t) { + struct sigaction action = { + .sa_sigaction = __timeout_handler, + .sa_flags = SA_SIGINFO, + }; + struct sigaction saved_action; int status; + if (sigaction(SIGALRM, &action, &saved_action)) { + t->passed = 0; + fprintf(TH_LOG_STREAM, + "%s: unable to install SIGARLM handler\n", + t->name); + return; + } + __active_test = t; + t->timed_out = false; alarm(t->timeout); waitpid(t->pid, &status, 0); alarm(0); + if (sigaction(SIGALRM, &saved_action, NULL)) { + t->passed = 0; + fprintf(TH_LOG_STREAM, + "%s: unable to uninstall SIGARLM handler\n", + t->name); + return; + } + __active_test = NULL; - if (WIFEXITED(status)) { + if (t->timed_out) { + t->passed = 0; + fprintf(TH_LOG_STREAM, + "%s: Test terminated by timeout\n", t->name); + } else if (WIFEXITED(status)) { t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0; if (t->termsig != -1) { fprintf(TH_LOG_STREAM, -- cgit v1.2.3 From 860f0a7792a5abcaae047a114a8b24807ce0880f Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 25 Mar 2020 17:16:02 -0600 Subject: selftests: Fix seccomp to support relocatable build (O=objdir) Fix seccomp relocatable builds. This is a simple fix to use the right lib.mk variable TEST_GEN_PROGS. Local header dependency is addressed in a change to lib.mk as a framework change that enforces the dependency without requiring changes to individual tests. The following use-cases work with this change: In seccomp directory: make all and make clean From top level from main Makefile: make kselftest-install O=objdir ARCH=arm64 HOSTCC=gcc \ CROSS_COMPILE=aarch64-linux-gnu- TARGETS=seccomp Signed-off-by: Shuah Khan Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/Makefile | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index 1760b3e39730..0ebfe8b0e147 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,17 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -all: - -include ../lib.mk - -.PHONY: all clean - -BINARIES := seccomp_bpf seccomp_benchmark CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread -seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@ - -TEST_PROGS += $(BINARIES) -EXTRA_CLEAN := $(BINARIES) - -all: $(BINARIES) +TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark +include ../lib.mk -- cgit v1.2.3 From d3fd949abd3e3005dab7ccb185c3c8bc0ad639f1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 25 Mar 2020 17:17:09 -0600 Subject: selftests: Fix memfd to support relocatable build (O=objdir) Fix memfd to support relocatable build (O=objdir). This calls out source files necessary to build tests and simplfies the dependency enforcement. Tested the following: Note that cross-build for fuse_mnt has dependency on -lfuse. make all make clean make kselftest-install O=/arm64_build/ ARCH=arm64 HOSTCC=gcc \ CROSS_COMPILE=aarch64-linux-gnu- TARGETS=memfd Signed-off-by: Shuah Khan --- tools/testing/selftests/memfd/Makefile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 53a848109f7b..0a15f9e23431 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -4,9 +4,8 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ -TEST_GEN_PROGS := memfd_test +TEST_GEN_PROGS := memfd_test fuse_test fuse_mnt TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh -TEST_GEN_FILES := fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) @@ -14,7 +13,7 @@ include ../lib.mk $(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs) -$(OUTPUT)/memfd_test: memfd_test.c common.o -$(OUTPUT)/fuse_test: fuse_test.c common.o +$(OUTPUT)/memfd_test: memfd_test.c common.c +$(OUTPUT)/fuse_test: fuse_test.c common.c -EXTRA_CLEAN = common.o +EXTRA_CLEAN = $(OUTPUT)/common.o -- cgit v1.2.3 From 1056d3d2c97e47397d0037cbbdf24235ae8f88cb Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 25 Mar 2020 17:16:49 -0600 Subject: selftests: enforce local header dependency in lib.mk Add local header dependency in lib.mk. This enforces the dependency blindly even when a test doesn't include the file, with the benefit of a simpler common logic without requiring individual tests to have special rule for it. Signed-off-by: Shuah Khan Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 1c8a1963d03f..70f03dff5479 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -134,7 +134,8 @@ endif # Selftest makefiles can override those targets by setting # OVERRIDE_TARGETS = 1. ifeq ($(OVERRIDE_TARGETS),) -$(OUTPUT)/%:%.c +LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h +$(OUTPUT)/%:%.c $(LOCAL_HDRS) $(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S -- cgit v1.2.3 From 2a0b1307cb4687bb930baed044900f91c6a2b8c1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 26 Mar 2020 22:45:57 +0200 Subject: selftests: skbedit_priority: Test counters at the skbedit rule Currently the test checks the observable effect of skbedit priority: queueing of packets at the correct qdisc band. It therefore misses the fact that the counters for offloaded rules are not updated. Add an extra check for the counter. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/skbedit_priority.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh index 0e7693297765..e3bd8a6bb8b4 100755 --- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -120,14 +120,19 @@ test_skbedit_priority_one() flower action skbedit priority $prio local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets) + local pkt2=$(tc_rule_handle_stats_get "$locus" 101) $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \ -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q + local pkt1 pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \ qdisc_parent_stats_get $swp2 $classid .packets) + check_err $? "Expected to get 10 packets on class $classid, but got $((pkt1 - pkt0))." + + local pkt3=$(tc_rule_handle_stats_get "$locus" 101) + ((pkt3 >= pkt2 + 10)) + check_err $? "Expected to get 10 packets on skbedit rule but got $((pkt3 - pkt2))." - check_err $? "Expected to get 10 packets on class $classid, but got -$((pkt1 - pkt0))." log_test "$locus skbedit priority $prio -> classid $classid" tc filter del $locus pref 1 -- cgit v1.2.3 From 3fe0fd531a35c229f8309c8b74ef0226de2626ab Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 26 Mar 2020 11:37:17 -0700 Subject: netdevsim: support taking immediate snapshot via devlink Implement the .snapshot region operation for the dummy data region. This enables a region snapshot to be taken upon request via the new DEVLINK_CMD_REGION_SNAPSHOT command. Signed-off-by: Jacob Keller Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 28 +++++++++++++++++----- .../selftests/drivers/net/netdevsim/devlink.sh | 10 ++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index f4f6539f1e17..2b727a7001f6 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -39,23 +39,38 @@ static struct dentry *nsim_dev_ddir; #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) +static int +nsim_dev_take_snapshot(struct devlink *devlink, struct netlink_ext_ack *extack, + u8 **data) +{ + void *dummy_data; + + dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL); + if (!dummy_data) + return -ENOMEM; + + get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE); + + *data = dummy_data; + + return 0; +} + static ssize_t nsim_dev_take_snapshot_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct nsim_dev *nsim_dev = file->private_data; struct devlink *devlink; - void *dummy_data; + u8 *dummy_data; int err; u32 id; devlink = priv_to_devlink(nsim_dev); - dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL); - if (!dummy_data) - return -ENOMEM; - - get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE); + err = nsim_dev_take_snapshot(devlink, NULL, &dummy_data); + if (err) + return err; err = devlink_region_snapshot_id_get(devlink, &id); if (err) { @@ -351,6 +366,7 @@ static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink) static const struct devlink_region_ops dummy_region_ops = { .name = "dummy", .destructor = &kfree, + .snapshot = nsim_dev_take_snapshot, }; static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev, diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 025a84c2ab5a..32cb2a159c70 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -141,6 +141,16 @@ regions_test() check_region_snapshot_count dummy post-first-delete 2 + devlink region new $DL_HANDLE/dummy snapshot 25 + check_err $? "Failed to create a new snapshot with id 25" + + check_region_snapshot_count dummy post-first-request 3 + + devlink region del $DL_HANDLE/dummy snapshot 25 + check_err $? "Failed to delete snapshot with id 25" + + check_region_snapshot_count dummy post-second-delete 2 + log_test "regions test" } -- cgit v1.2.3 From 21114b7feec29e4425a3ac48a037569c016a46c8 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 25 Mar 2020 15:52:33 +0300 Subject: net: macsec: add support for offloading to the MAC This patch adds a new MACsec offloading option, MACSEC_OFFLOAD_MAC, allowing a user to select a MAC as a provider for MACsec offloading operations. Signed-off-by: Antoine Tenart Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/macsec.c | 13 +++++++++++-- include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 35abebe95b79..d29c072e19af 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -339,7 +339,8 @@ static void macsec_set_shortlen(struct macsec_eth_header *h, size_t data_len) /* Checks if a MACsec interface is being offloaded to an hardware engine */ static bool macsec_is_offloaded(struct macsec_dev *macsec) { - if (macsec->offload == MACSEC_OFFLOAD_PHY) + if (macsec->offload == MACSEC_OFFLOAD_MAC || + macsec->offload == MACSEC_OFFLOAD_PHY) return true; return false; @@ -355,6 +356,9 @@ static bool macsec_check_offload(enum macsec_offload offload, if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev && macsec->real_dev->phydev->macsec_ops; + else if (offload == MACSEC_OFFLOAD_MAC) + return macsec->real_dev->features & NETIF_F_HW_MACSEC && + macsec->real_dev->macsec_ops; return false; } @@ -369,9 +373,14 @@ static const struct macsec_ops *__macsec_get_ops(enum macsec_offload offload, if (offload == MACSEC_OFFLOAD_PHY) ctx->phydev = macsec->real_dev->phydev; + else if (offload == MACSEC_OFFLOAD_MAC) + ctx->netdev = macsec->real_dev; } - return macsec->real_dev->phydev->macsec_ops; + if (offload == MACSEC_OFFLOAD_PHY) + return macsec->real_dev->phydev->macsec_ops; + else + return macsec->real_dev->macsec_ops; } /* Returns a pointer to the MACsec ops struct if any and updates the MACsec diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 61e0801c82df..d6ccd0105c05 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -489,6 +489,7 @@ enum macsec_validation_type { enum macsec_offload { MACSEC_OFFLOAD_OFF = 0, MACSEC_OFFLOAD_PHY = 1, + MACSEC_OFFLOAD_MAC = 2, __MACSEC_OFFLOAD_END, MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 024af2d1d0af..771371d5b996 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -489,6 +489,7 @@ enum macsec_validation_type { enum macsec_offload { MACSEC_OFFLOAD_OFF = 0, MACSEC_OFFLOAD_PHY = 1, + MACSEC_OFFLOAD_MAC = 2, __MACSEC_OFFLOAD_END, MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, }; -- cgit v1.2.3 From 315a4af8cd2469fcf5dacc58b5e73944b4f0c561 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Tue, 4 Feb 2020 09:14:24 -0800 Subject: x86/insn: Add Control-flow Enforcement (CET) instructions to the opcode map Add the following CET instructions to the opcode map: INCSSP: Increment Shadow Stack pointer (SSP). RDSSP: Read SSP into a GPR. SAVEPREVSSP: Use "previous ssp" token at top of current Shadow Stack (SHSTK) to create a "restore token" on the previous (outgoing) SHSTK. RSTORSSP: Restore from a "restore token" to SSP. WRSS: Write to kernel-mode SHSTK (kernel-mode instruction). WRUSS: Write to user-mode SHSTK (kernel-mode instruction). SETSSBSY: Verify the "supervisor token" pointed by MSR_IA32_PL0_SSP, set the token busy, and set then Shadow Stack pointer(SSP) to the value of MSR_IA32_PL0_SSP. CLRSSBSY: Verify the "supervisor token" and clear its busy bit. ENDBR64/ENDBR32: Mark a valid 64/32 bit control transfer endpoint. Detailed information of CET instructions can be found in Intel Software Developer's Manual. Signed-off-by: Yu-cheng Yu Reviewed-by: Adrian Hunter Reviewed-by: Tony Luck Acked-by: Masami Hiramatsu Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Ravi v. Shankar Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20200204171425.28073-2-yu-cheng.yu@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/lib/x86-opcode-map.txt | 17 +++++++++++------ tools/arch/x86/lib/x86-opcode-map.txt | 17 +++++++++++------ 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 53adc1762ec0..ec31f5b60323 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -366,7 +366,7 @@ AVXcode: 1 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: Grp20 (1A),(1C) 1d: -1e: +1e: Grp21 (1A) 1f: NOP Ev # 0x0f 0x20-0x2f 20: MOV Rd,Cd @@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) f3: Grp17 (1A) -f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) f9: MOVDIRI My,Gy @@ -970,7 +970,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -1041,8 +1041,8 @@ GrpTable: Grp15 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) -5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) +5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1077,6 +1077,11 @@ GrpTable: Grp20 0: cldemote Mb EndTable +GrpTable: Grp21 +1: RDSSPD/Q Ry (F3),(11B) +7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 53adc1762ec0..ec31f5b60323 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -366,7 +366,7 @@ AVXcode: 1 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: Grp20 (1A),(1C) 1d: -1e: +1e: Grp21 (1A) 1f: NOP Ev # 0x0f 0x20-0x2f 20: MOV Rd,Cd @@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) f3: Grp17 (1A) -f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) f9: MOVDIRI My,Gy @@ -970,7 +970,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -1041,8 +1041,8 @@ GrpTable: Grp15 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) -5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) +5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1077,6 +1077,11 @@ GrpTable: Grp20 0: cldemote Mb EndTable +GrpTable: Grp21 +1: RDSSPD/Q Ry (F3),(11B) +7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH -- cgit v1.2.3 From 26cec7480e7edaa9dead4912055078b98c1d192e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Feb 2020 09:14:25 -0800 Subject: perf test x86: Add CET instructions to the new instructions test Add to the "x86 instruction decoder - new instructions" test the following instructions: incsspd incsspq rdsspd rdsspq saveprevssp rstorssp wrssd wrssq wrussd wrussq setssbsy clrssbsy endbr32 endbr64 And the "notrack" prefix for indirect calls and jumps. For information about the instructions, refer Intel Control-flow Enforcement Technology Specification May 2019 (334525-003). Committer testing: $ perf test instr 67: x86 instruction decoder - new instructions : Ok $ Then use verbose mode and check one of those new instructions: $ perf test -v instr |& grep saveprevssp Decoded ok: f3 0f 01 ea saveprevssp Decoded ok: f3 0f 01 ea saveprevssp $ Signed-off-by: Adrian Hunter Acked-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ravi v. Shankar Cc: Thomas Gleixner Cc: Tony Luck Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20200204171425.28073-3-yu-cheng.yu@intel.com Signed-off-by: Yu-cheng Yu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/insn-x86-dat-32.c | 112 +++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-64.c | 196 ++++++++++++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-src.c | 236 +++++++++++++++++++++++++++ 3 files changed, 544 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index e6461abc9e7b..9708ae892061 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2085,6 +2085,118 @@ "67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "", "67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%eax)",}, +{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f ae 2d 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%eax)",}, +{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%eax)",}, +{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 f6 15 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%eax)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%eax)",}, +{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcall *%eax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcall *(%eax)",}, +{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect", +"ff 15 78 56 34 12 \tcall *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcall *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd call *%eax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd call *(%eax)",}, +{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack call *%eax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack call *(%eax)",}, +{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd call *%eax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd call *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmp *%eax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmp *(%eax)",}, +{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect", +"ff 25 78 56 34 12 \tjmp *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmp *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmp *%eax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmp *(%eax)",}, +{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmp *%eax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmp *(%eax)",}, +{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmp *%eax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmp *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 567ecccfad7c..5da17d41d302 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2263,6 +2263,202 @@ "67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", "67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 41 0f ae e8 \tincsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 48 0f ae e8 \tincsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 49 0f ae e8 \tincsspq %r8",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%rax)",}, +{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "", +"41 0f ae 28 \txrstor (%r8)",}, +{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 41 0f 1e c8 \trdsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 48 0f 1e c8 \trdsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 49 0f 1e c8 \trdsspq %r8",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%rax)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "", +"f3 41 0f 01 28 \trstorssp (%r8)",}, +{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%rax)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"41 0f 38 f6 10 \twrssd %edx,(%r8)",}, +{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 14 25 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"41 0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "", +"48 0f 38 f6 08 \twrssq %rcx,(%rax)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"49 0f 38 f6 10 \twrssq %rdx,(%r8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 14 25 78 56 34 12 \twrssq %rdx,0x12345678",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"49 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%rax)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 41 0f 38 f5 10 \twrussd %edx,(%r8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "", +"66 48 0f 38 f5 08 \twrussq %rcx,(%rax)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 49 0f 38 f5 10 \twrussq %rdx,(%r8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%rax)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "", +"f3 41 0f ae 30 \tclrssbsy (%r8)",}, +{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcallq *%rax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcallq *(%rax)",}, +{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect", +"41 ff 10 \tcallq *(%r8)",}, +{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 14 25 78 56 34 12 \tcallq *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcallq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"41 ff 94 c8 78 56 34 12 \tcallq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd callq *%rax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd callq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"f2 41 ff 10 \tbnd callq *(%r8)",}, +{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack callq *%rax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack callq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e 41 ff 10 \tnotrack callq *(%r8)",}, +{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd callq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd callq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect", +"3e f2 41 ff 10 \tnotrack bnd callq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect", +"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmpq *%rax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmpq *(%rax)",}, +{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"41 ff 20 \tjmpq *(%r8)",}, +{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff 24 25 78 56 34 12 \tjmpq *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"41 ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmpq *%rax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmpq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"f2 41 ff 20 \tbnd jmpq *(%r8)",}, +{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmpq *%rax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmpq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e 41 ff 20 \tnotrack jmpq *(%r8)",}, +{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmpq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmpq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect", +"3e f2 41 ff 20 \tnotrack bnd jmpq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", +"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index ddbf07c50bb8..c3808e94c46e 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1771,6 +1771,145 @@ int main(void) asm volatile("enqcmds (%eax),%ebx"); asm volatile("enqcmds 0x12345678(%eax),%ecx"); + /* incsspd/q */ + + asm volatile("incsspd %eax"); + asm volatile("incsspd %r8d"); + asm volatile("incsspq %rax"); + asm volatile("incsspq %r8"); + /* Also check instructions in the same group encoding as incsspd/q */ + asm volatile("xrstor (%rax)"); + asm volatile("xrstor (%r8)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%rax,%rcx,8)"); + asm volatile("xrstor 0x12345678(%r8,%rcx,8)"); + asm volatile("lfence"); + + /* rdsspd/q */ + + asm volatile("rdsspd %eax"); + asm volatile("rdsspd %r8d"); + asm volatile("rdsspq %rax"); + asm volatile("rdsspq %r8"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%rax)"); + asm volatile("rstorssp (%r8)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%rax,%rcx,8)"); + asm volatile("rstorssp 0x12345678(%r8,%rcx,8)"); + + /* wrssd/q */ + + asm volatile("wrssd %ecx,(%rax)"); + asm volatile("wrssd %edx,(%r8)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrssq %rcx,(%rax)"); + asm volatile("wrssq %rdx,(%r8)"); + asm volatile("wrssq %rdx,(0x12345678)"); + asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)"); + + /* wrussd/q */ + + asm volatile("wrussd %ecx,(%rax)"); + asm volatile("wrussd %edx,(%r8)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrussq %rcx,(%rax)"); + asm volatile("wrussq %rdx,(%r8)"); + asm volatile("wrussq %rdx,(0x12345678)"); + asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%rax)"); + asm volatile("clrssbsy (%r8)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)"); + asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3434,6 +3573,103 @@ int main(void) asm volatile("enqcmds (%si),%bx"); asm volatile("enqcmds 0x1234(%si),%cx"); + /* incsspd */ + + asm volatile("incsspd %eax"); + /* Also check instructions in the same group encoding as incsspd */ + asm volatile("xrstor (%eax)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%eax,%ecx,8)"); + asm volatile("lfence"); + + /* rdsspd */ + + asm volatile("rdsspd %eax"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%eax)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%eax,%ecx,8)"); + + /* wrssd */ + + asm volatile("wrssd %ecx,(%eax)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)"); + + /* wrussd */ + + asm volatile("wrussd %ecx,(%eax)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%eax)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + #endif /* #ifndef __x86_64__ */ /* SGX */ -- cgit v1.2.3 From 26567ed79d13ec54b2c5661ce8a07fd149a23a9b Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Tue, 4 Feb 2020 18:37:09 +0100 Subject: perf script: Introduce --deltatime option For some kind of analysis a deltatime output is more human friendly and reduce the cognitive load for further analysis. The following output demonstrate the new option "deltatime": calculate the time difference in relation to the previous event. $ perf script --deltatime test 2525 [001] 0.000000: sdt_libev:ev_add: (5635e72a5ebd) test 2525 [001] 0.000091: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 1.000051: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000685: sdt_libev:ev_add: (5635e72a5ebd) test 2525 [001] 0.000048: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 1.000104: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.003895: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 0.996034: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000058: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 1.000004: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000064: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 0.999934: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000056: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 0.999930: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 Committer testing: So go from default output to --reltime and then this new --deltatime, to contrast the various timestamp presentation modes for a random perf.data file I had laying around: [root@five ~]# perf script --reltime | head perf 442394 [000] 0.000000: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000002: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000004: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000006: 128 cycles: ffffffff972415a1 perf_event_update_userpage+0x1 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000009: 2597 cycles: ffffffff97463785 cap_task_setscheduler+0x5 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000036: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000038: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000040: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000041: 224 cycles: ffffffff9700a53a perf_ibs_handle_irq+0x1da (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000044: 4439 cycles: ffffffff97120d85 put_prev_entity+0x45 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) [root@five ~]# perf script --deltatime | head perf 442394 [000] 0.000000: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000002: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000001: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000001: 128 cycles: ffffffff972415a1 perf_event_update_userpage+0x1 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000002: 2597 cycles: ffffffff97463785 cap_task_setscheduler+0x5 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000027: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000002: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000001: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000001: 224 cycles: ffffffff9700a53a perf_ibs_handle_irq+0x1da (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000002: 4439 cycles: ffffffff97120d85 put_prev_entity+0x45 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) [root@five ~]# perf script | head perf 442394 [000] 7600.157861: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157864: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157866: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157867: 128 cycles: ffffffff972415a1 perf_event_update_userpage+0x1 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157870: 2597 cycles: ffffffff97463785 cap_task_setscheduler+0x5 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157897: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157900: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157901: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157903: 224 cycles: ffffffff9700a53a perf_ibs_handle_irq+0x1da (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157906: 4439 cycles: ffffffff97120d85 put_prev_entity+0x45 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) [root@five ~]# Andi suggested we better implement it as a new field, i.e. -F deltatime, like: [root@five ~]# perf script -F deltatime Invalid field requested. Usage: perf script [] or: perf script [] record