diff options
Diffstat (limited to 'tools')
325 files changed, 22606 insertions, 2503 deletions
diff --git a/tools/Makefile b/tools/Makefile index 6339f6ac3ccb..60c7e6c8ff17 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -13,6 +13,7 @@ help: @echo ' cpupower - a tool for all things x86 CPU power' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @echo ' freefall - laptop accelerometer program for disk protection' + @echo ' gpio - GPIO tools' @echo ' hv - tools used when in Hyper-V clients' @echo ' iio - IIO tools' @echo ' lguest - a minimal 32-bit x86 hypervisor' @@ -20,6 +21,7 @@ help: @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @echo ' spi - spi tools' + @echo ' objtool - an ELF object analysis tool' @echo ' tmon - thermal monitoring and tuning tool' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @@ -53,7 +55,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm net iio: FORCE +cgroup firewire hv guest spi usb virtio vm net iio gpio objtool: FORCE $(call descend,$@) liblockdep: FORCE @@ -85,7 +87,7 @@ freefall: FORCE all: acpi cgroup cpupower hv firewire lguest \ perf selftests turbostat usb \ virtio vm net x86_energy_perf_policy \ - tmon freefall + tmon freefall objtool acpi_install: $(call descend,power/$(@:_install=),install) @@ -93,7 +95,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install: +cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install: $(call descend,$(@:_install=),install) selftests_install: @@ -111,7 +113,7 @@ freefall_install: install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ - tmon_install freefall_install + tmon_install freefall_install objtool_install acpi_clean: $(call descend,power/acpi,clean) @@ -119,7 +121,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean lguest_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean: +cgroup_clean hv_clean firewire_clean lguest_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -155,6 +157,7 @@ build_clean: clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean lguest_clean \ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ - freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean + freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ + gpio_clean objtool_clean .PHONY: FORCE diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 4a96473b180f..ee566e8bd1cf 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) -$(OUTPUT)%.i: %.S FORCE +$(OUTPUT)%.s: %.S FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 02db3cdff20f..6b7707270aa3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -27,7 +27,7 @@ endef # the rule that uses them - an example for that is the 'bionic' # feature check. ] # -FEATURE_TESTS ?= \ +FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ fortify-source \ @@ -46,6 +46,7 @@ FEATURE_TESTS ?= \ libpython \ libpython-version \ libslang \ + libcrypto \ libunwind \ pthread-attr-setaffinity-np \ stackprotector-all \ @@ -56,6 +57,25 @@ FEATURE_TESTS ?= \ get_cpuid \ bpf +# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list +# of all feature tests +FEATURE_TESTS_EXTRA := \ + bionic \ + compile-32 \ + compile-x32 \ + cplus-demangle \ + hello \ + libbabeltrace \ + liberty \ + liberty-z \ + libunwind-debug-frame + +FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) + +ifeq ($(FEATURE_TESTS),all) + FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA) +endif + FEATURE_DISPLAY ?= \ dwarf \ glibc \ @@ -68,6 +88,7 @@ FEATURE_DISPLAY ?= \ libperl \ libpython \ libslang \ + libcrypto \ libunwind \ libdw-dwarf-unwind \ zlib \ @@ -100,6 +121,14 @@ ifeq ($(feature-all), 1) # test-all.c passed - just set all the core feature flags to 1: # $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) + # + # test-all.c does not comprise these tests, so we need to + # for this case to get features proper values + # + $(call feature_check,compile-32) + $(call feature_check,compile-x32) + $(call feature_check,bionic) + $(call feature_check,libbabeltrace) else $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) endif diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index bf8f0352264d..c5f4c417428d 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -23,6 +23,7 @@ FILES= \ test-libpython.bin \ test-libpython-version.bin \ test-libslang.bin \ + test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ test-pthread-attr-setaffinity-np.bin \ @@ -105,6 +106,9 @@ $(OUTPUT)test-libaudit.bin: $(OUTPUT)test-libslang.bin: $(BUILD) -I/usr/include/slang -lslang +$(OUTPUT)test-libcrypto.bin: + $(BUILD) -lcrypto + $(OUTPUT)test-gtk2.bin: $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 81025cade45f..e499a36c1e4a 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -129,6 +129,10 @@ # include "test-bpf.c" #undef main +#define main main_test_libcrypto +# include "test-libcrypto.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -158,6 +162,7 @@ int main(int argc, char *argv[]) main_test_lzma(); main_test_get_cpuid(); main_test_bpf(); + main_test_libcrypto(); return 0; } diff --git a/tools/build/feature/test-compile.c b/tools/build/feature/test-compile.c index 31dbf45bf99c..c54e6551ae4c 100644 --- a/tools/build/feature/test-compile.c +++ b/tools/build/feature/test-compile.c @@ -1,4 +1,6 @@ +#include <stdio.h> int main(void) { + printf("Hello World!\n"); return 0; } diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c new file mode 100644 index 000000000000..bd79dc7f28d3 --- /dev/null +++ b/tools/build/feature/test-libcrypto.c @@ -0,0 +1,17 @@ +#include <openssl/sha.h> +#include <openssl/md5.h> + +int main(void) +{ + MD5_CTX context; + unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; + unsigned char dat[] = "12345"; + + MD5_Init(&context); + MD5_Update(&context, &dat[0], sizeof(dat)); + MD5_Final(&md[0], &context); + + SHA1(&dat[0], sizeof(dat), &md[0]); + + return 0; +} diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile new file mode 100644 index 000000000000..4d198d5c4203 --- /dev/null +++ b/tools/gpio/Makefile @@ -0,0 +1,12 @@ +CC = $(CROSS_COMPILE)gcc +CFLAGS += -Wall -g -D_GNU_SOURCE + +all: lsgpio + +lsgpio: lsgpio.o gpio-utils.o + +%.o: %.c gpio-utils.h + +.PHONY: clean +clean: + rm -f *.o lsgpio diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c new file mode 100644 index 000000000000..8208718f2c99 --- /dev/null +++ b/tools/gpio/gpio-utils.c @@ -0,0 +1,11 @@ +/* + * GPIO tools - helpers library for the GPIO tools + * + * Copyright (C) 2015 Linus Walleij + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include "gpio-utils.h" diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h new file mode 100644 index 000000000000..5f57133b8c04 --- /dev/null +++ b/tools/gpio/gpio-utils.h @@ -0,0 +1,27 @@ +/* + * GPIO tools - utility helpers library for the GPIO tools + * + * Copyright (C) 2015 Linus Walleij + * + * Portions copied from iio_utils and lssio: + * Copyright (c) 2010 Manuel Stahl <manuel.stahl@iis.fraunhofer.de> + * Copyright (c) 2008 Jonathan Cameron + * * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#ifndef _GPIO_UTILS_H_ +#define _GPIO_UTILS_H_ + +#include <string.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static inline int check_prefix(const char *str, const char *prefix) +{ + return strlen(str) > strlen(prefix) && + strncmp(str, prefix, strlen(prefix)) == 0; +} + +#endif /* _GPIO_UTILS_H_ */ diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c new file mode 100644 index 000000000000..1124da375942 --- /dev/null +++ b/tools/gpio/lsgpio.c @@ -0,0 +1,195 @@ +/* + * lsgpio - example on how to list the GPIO lines on a system + * + * Copyright (C) 2015 Linus Walleij + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * Usage: + * lsgpio <-n device-name> + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <dirent.h> +#include <errno.h> +#include <string.h> +#include <poll.h> +#include <fcntl.h> +#include <getopt.h> +#include <sys/ioctl.h> +#include <linux/gpio.h> + +#include "gpio-utils.h" + +struct gpio_flag { + char *name; + unsigned long mask; +}; + +struct gpio_flag flagnames[] = { + { + .name = "kernel", + .mask = GPIOLINE_FLAG_KERNEL, + }, + { + .name = "output", + .mask = GPIOLINE_FLAG_IS_OUT, + }, + { + .name = "active-low", + .mask = GPIOLINE_FLAG_ACTIVE_LOW, + }, + { + .name = "open-drain", + .mask = GPIOLINE_FLAG_OPEN_DRAIN, + }, + { + .name = "open-source", + .mask = GPIOLINE_FLAG_OPEN_SOURCE, + }, +}; + +void print_flags(unsigned long flags) +{ + int i; + int printed = 0; + + for (i = 0; i < ARRAY_SIZE(flagnames); i++) { + if (flags & flagnames[i].mask) { + if (printed) + fprintf(stdout, " "); + fprintf(stdout, "%s", flagnames[i].name); + printed++; + } + } +} + +int list_device(const char *device_name) +{ + struct gpiochip_info cinfo; + char *chrdev_name; + int fd; + int ret; + int i; + + ret = asprintf(&chrdev_name, "/dev/%s", device_name); + if (ret < 0) + return -ENOMEM; + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stderr, "Failed to open %s\n", chrdev_name); + goto exit_close_error; + } + + /* Inspect this GPIO chip */ + ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &cinfo); + if (ret == -1) { + ret = -errno; + perror("Failed to issue CHIPINFO IOCTL\n"); + goto exit_close_error; + } + fprintf(stdout, "GPIO chip: %s, \"%s\", %u GPIO lines\n", + cinfo.name, cinfo.label, cinfo.lines); + + /* Loop over the lines and print info */ + for (i = 0; i < cinfo.lines; i++) { + struct gpioline_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.line_offset = i; + + ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo); + if (ret == -1) { + ret = -errno; + perror("Failed to issue LINEINFO IOCTL\n"); + goto exit_close_error; + } + fprintf(stdout, "\tline %2d:", linfo.line_offset); + if (linfo.name[0]) + fprintf(stdout, " \"%s\"", linfo.name); + else + fprintf(stdout, " unnamed"); + if (linfo.consumer[0]) + fprintf(stdout, " \"%s\"", linfo.consumer); + else + fprintf(stdout, " unused"); + if (linfo.flags) { + fprintf(stdout, " ["); + print_flags(linfo.flags); + fprintf(stdout, "]"); + } + fprintf(stdout, "\n"); + + } + +exit_close_error: + if (close(fd) == -1) + perror("Failed to close GPIO character device file"); + free(chrdev_name); + return ret; +} + +void print_usage(void) +{ + fprintf(stderr, "Usage: lsgpio [options]...\n" + "List GPIO chips, lines and states\n" + " -n <name> List GPIOs on a named device\n" + " -? This helptext\n" + ); +} + +int main(int argc, char **argv) +{ + const char *device_name; + int ret; + int c; + + while ((c = getopt(argc, argv, "n:")) != -1) { + switch (c) { + case 'n': + device_name = optarg; + break; + case '?': + print_usage(); + return -1; + } + } + + if (device_name) + ret = list_device(device_name); + else { + const struct dirent *ent; + DIR *dp; + + /* List all GPIO devices one at a time */ + dp = opendir("/dev"); + if (!dp) { + ret = -errno; + goto error_out; + } + + ret = -ENOENT; + while (ent = readdir(dp), ent) { + if (check_prefix(ent->d_name, "gpiochip")) { + ret = list_device(ent->d_name); + if (ret) + break; + } + } + + ret = 0; + if (closedir(dp) == -1) { + perror("scanning devices: Failed to close directory"); + ret = -errno; + } + } +error_out: + return ret; +} diff --git a/tools/hv/Makefile b/tools/hv/Makefile index a8ab79556926..a8c4644022a6 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -5,6 +5,8 @@ PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS) +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include + all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon %: %.c $(CC) $(CFLAGS) -o $@ $^ diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index 2218b9add4c1..494c9c615d1c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -1 +1 @@ -#include <../../../../include/asm-generic/bitops/__fls.h> +#include "../../../../include/asm-generic/bitops/__fls.h" diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index dbf711a28f71..0e4995fa0248 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -1 +1 @@ -#include <../../../../include/asm-generic/bitops/fls.h> +#include "../../../../include/asm-generic/bitops/fls.h" diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 980b1f63c047..35bee0071e78 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -1 +1 @@ -#include <../../../../include/asm-generic/bitops/fls64.h> +#include "../../../../include/asm-generic/bitops/fls64.h" diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h new file mode 100644 index 000000000000..c65cc0aa2659 --- /dev/null +++ b/tools/include/linux/hashtable.h @@ -0,0 +1,152 @@ +/* + * Statically sized hash table implementation + * (C) 2012 Sasha Levin <levinsasha928@gmail.com> + */ + +#ifndef _LINUX_HASHTABLE_H +#define _LINUX_HASHTABLE_H + +#include <linux/list.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/hash.h> +#include <linux/log2.h> + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#define DEFINE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + +#define DECLARE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] + +#define HASH_SIZE(name) (ARRAY_SIZE(name)) +#define HASH_BITS(name) ilog2(HASH_SIZE(name)) + +/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ +#define hash_min(val, bits) \ + (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) + +static inline void __hash_init(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + INIT_HLIST_HEAD(&ht[i]); +} + +/** + * hash_init - initialize a hash table + * @hashtable: hashtable to be initialized + * + * Calculates the size of the hashtable from the given parameter, otherwise + * same as hash_init_size. + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_add - add an object to a hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add(hashtable, node, key) \ + hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_hashed - check whether an object is in any hashtable + * @node: the &struct hlist_node of the object to be checked + */ +static inline bool hash_hashed(struct hlist_node *node) +{ + return !hlist_unhashed(node); +} + +static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + if (!hlist_empty(&ht[i])) + return false; + + return true; +} + +/** + * hash_empty - check whether a hashtable is empty + * @hashtable: hashtable to check + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_del - remove an object from a hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del(struct hlist_node *node) +{ + hlist_del_init(node); +} + +/** + * hash_for_each - iterate over a hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, &name[bkt], member) + +/** + * hash_for_each_safe - iterate over a hashtable safe against removal of + * hash entry + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @tmp: a &struct used for temporary storage + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) + +/** + * hash_for_each_possible - iterate over all possible objects hashing to the + * same bucket + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) + +/** + * hash_for_each_possible_safe - iterate over all possible objects hashing to the + * same bucket safe against removals + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @tmp: a &struct used for temporary storage + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ + &name[hash_min(key, HASH_BITS(name))], member) + + +#endif diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h new file mode 100644 index 000000000000..841cec8ed525 --- /dev/null +++ b/tools/include/linux/stringify.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_STRINGIFY_H +#define __LINUX_STRINGIFY_H + +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif /* !__LINUX_STRINGIFY_H */ diff --git a/tools/lib/api/Build b/tools/lib/api/Build index e8b8a23b9bf4..954c644f7ad9 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -1,3 +1,4 @@ libapi-y += fd/ libapi-y += fs/ libapi-y += cpu.o +libapi-y += debug.o diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index d85904dc9b38..316f308a63ea 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../../perf/config/utilities.mak # QUIET_CLEAN +include ../../scripts/utilities.mak # QUIET_CLEAN ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +CFLAGS += -I$(srctree)/tools/lib/api RM = rm -f diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h new file mode 100644 index 000000000000..188f7880eafe --- /dev/null +++ b/tools/lib/api/debug-internal.h @@ -0,0 +1,20 @@ +#ifndef __API_DEBUG_INTERNAL_H__ +#define __API_DEBUG_INTERNAL_H__ + +#include "debug.h" + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libapi: " fmt, ##__VA_ARGS__); \ +} while (0) + +extern libapi_print_fn_t __pr_warning; +extern libapi_print_fn_t __pr_info; +extern libapi_print_fn_t __pr_debug; + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +#endif /* __API_DEBUG_INTERNAL_H__ */ diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c new file mode 100644 index 000000000000..5fa5cf500a1f --- /dev/null +++ b/tools/lib/api/debug.c @@ -0,0 +1,28 @@ +#include <stdio.h> +#include <stdarg.h> +#include "debug.h" +#include "debug-internal.h" + +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +libapi_print_fn_t __pr_warning = __base_pr; +libapi_print_fn_t __pr_info = __base_pr; +libapi_print_fn_t __pr_debug; + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h new file mode 100644 index 000000000000..a0872f68fc56 --- /dev/null +++ b/tools/lib/api/debug.h @@ -0,0 +1,10 @@ +#ifndef __API_DEBUG_H__ +#define __API_DEBUG_H__ + +typedef int (*libapi_print_fn_t)(const char *, ...); + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug); + +#endif /* __API_DEBUG_H__ */ diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 459599d1b6c4..ef78c22ff44d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -13,6 +13,7 @@ #include <sys/mount.h> #include "fs.h" +#include "debug-internal.h" #define _STR(x) #x #define STR(x) _STR(x) @@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value) return err; } +#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ + +int filename__read_str(const char *filename, char **buf, size_t *sizep) +{ + size_t size = 0, alloc_size = 0; + void *bf = NULL, *nbf; + int fd, n, err = 0; + char sbuf[STRERR_BUFSIZE]; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -errno; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (!nbf) { + err = -ENOMEM; + break; + } + + bf = nbf; + } + + n = read(fd, bf + size, alloc_size - size); + if (n < 0) { + if (size) { + pr_warning("read failed %d: %s\n", errno, + strerror_r(errno, sbuf, sizeof(sbuf))); + err = 0; + } else + err = -errno; + + break; + } + + size += n; + } while (n > 0); + + if (!err) { + *sizep = size; + *buf = bf; + } else + free(bf); + + close(fd); + return err; +} + int sysfs__read_ull(const char *entry, unsigned long long *value) { char path[PATH_MAX]; @@ -326,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value) return filename__read_int(path, value); } +int sysfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_str(path, buf, sizep); +} + int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index d024a7f682f6..9f6598098dc5 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -2,6 +2,7 @@ #define __API_FS__ #include <stdbool.h> +#include <unistd.h> /* * On most systems <limits.h> would have given us this, but not on some systems @@ -26,8 +27,10 @@ FS(tracefs) int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); +int filename__read_str(const char *filename, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); +int sysfs__read_str(const char *entry, char **buf, size_t *sizep); #endif /* __API_FS__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8334a5a9d5d7..7e543c3102d4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -201,6 +201,7 @@ struct bpf_object { Elf_Data *data; } *reloc; int nr_reloc; + int maps_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -350,6 +351,7 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; + obj->efile.maps_shndx = -1; obj->loaded = false; @@ -529,12 +531,12 @@ bpf_object__init_maps(struct bpf_object *obj, void *data, } static int -bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx) +bpf_object__init_maps_name(struct bpf_object *obj) { int i; Elf_Data *symbols = obj->efile.symbols; - if (!symbols || maps_shndx < 0) + if (!symbols || obj->efile.maps_shndx < 0) return -EINVAL; for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { @@ -544,7 +546,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx) if (!gelf_getsym(symbols, i, &sym)) continue; - if (sym.st_shndx != maps_shndx) + if (sym.st_shndx != obj->efile.maps_shndx) continue; map_name = elf_strptr(obj->efile.elf, @@ -572,7 +574,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Scn *scn = NULL; - int idx = 0, err = 0, maps_shndx = -1; + int idx = 0, err = 0; /* Elf is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { @@ -625,7 +627,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) else if (strcmp(name, "maps") == 0) { err = bpf_object__init_maps(obj, data->d_buf, data->d_size); - maps_shndx = idx; + obj->efile.maps_shndx = idx; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -674,8 +676,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (maps_shndx >= 0) - err = bpf_object__init_maps_name(obj, maps_shndx); + if (obj->efile.maps_shndx >= 0) + err = bpf_object__init_maps_name(obj); out: return err; } @@ -697,7 +699,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) static int bpf_program__collect_reloc(struct bpf_program *prog, size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols) + Elf_Data *data, Elf_Data *symbols, + int maps_shndx) { int i, nrels; @@ -724,9 +727,6 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u\n", insn_idx); - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { @@ -735,6 +735,15 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__FORMAT; } + if (sym.st_shndx != maps_shndx) { + pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", + prog->section_name, sym.st_shndx); + return -LIBBPF_ERRNO__RELOC; + } + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -863,7 +872,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) err = bpf_program__collect_reloc(prog, nr_maps, shdr, data, - obj->efile.symbols); + obj->efile.symbols, + obj->efile.maps_shndx); if (err) return err; } diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 90d2baeb621a..1d57af56814b 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -100,7 +100,7 @@ include $(srctree)/tools/build/Makefile.include do_compile_shared_library = \ ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so)) + $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so)) do_build_static_lib = \ ($(print_static_lib_build) \ diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c index 9be663340f0a..d1c89cc06f5f 100644 --- a/tools/lib/lockdep/common.c +++ b/tools/lib/lockdep/common.c @@ -11,11 +11,6 @@ static __thread struct task_struct current_obj; bool debug_locks = true; bool debug_locks_silent; -__attribute__((constructor)) static void liblockdep_init(void) -{ - lockdep_init(); -} - __attribute__((destructor)) static void liblockdep_exit(void) { debug_check_no_locks_held(); diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h index a60c14b9662a..6e66277ec437 100644 --- a/tools/lib/lockdep/include/liblockdep/common.h +++ b/tools/lib/lockdep/include/liblockdep/common.h @@ -44,7 +44,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void lockdep_init(void); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c index f42b7e9aa48f..a0a2e3a266af 100644 --- a/tools/lib/lockdep/lockdep.c +++ b/tools/lib/lockdep/lockdep.c @@ -1,2 +1,8 @@ #include <linux/lockdep.h> + +/* Trivial API wrappers, we don't (yet) have RCU in user-space: */ +#define hlist_for_each_entry_rcu hlist_for_each_entry +#define hlist_add_head_rcu hlist_add_head +#define hlist_del_rcu hlist_del + #include "../../../kernel/locking/lockdep.c" diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 21cdf869a01b..52844847569c 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -439,7 +439,5 @@ __attribute__((constructor)) static void init_preload(void) ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); #endif - lockdep_init(); - __init_state = done; } diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh index 5334ad9d39b7..1069d96248c1 100755 --- a/tools/lib/lockdep/run_tests.sh +++ b/tools/lib/lockdep/run_tests.sh @@ -3,7 +3,7 @@ make &> /dev/null for i in `ls tests/*.c`; do - testname=$(basename -s .c "$i") + testname=$(basename "$i" .c) gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null echo -ne "$testname... " if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then @@ -11,11 +11,13 @@ for i in `ls tests/*.c`; do else echo "FAILED!" fi - rm tests/$testname + if [ -f "tests/$testname" ]; then + rm tests/$testname + fi done for i in `ls tests/*.c`; do - testname=$(basename -s .c "$i") + testname=$(basename "$i" .c) gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null echo -ne "(PRELOAD) $testname... " if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then @@ -23,5 +25,7 @@ for i in `ls tests/*.c`; do else echo "FAILED!" fi - rm tests/$testname + if [ -f "tests/$testname" ]; then + rm tests/$testname + fi done diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c index 0f782ff404ac..18211a5f354f 100644 --- a/tools/lib/lockdep/tests/AA.c +++ b/tools/lib/lockdep/tests/AA.c @@ -1,13 +1,13 @@ #include <liblockdep/mutex.h> -void main(void) +int main(void) { - pthread_mutex_t a, b; + pthread_mutex_t a; pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); pthread_mutex_lock(&a); - pthread_mutex_lock(&b); pthread_mutex_lock(&a); + + return 0; } diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c new file mode 100644 index 000000000000..0f782ff404ac --- /dev/null +++ b/tools/lib/lockdep/tests/ABA.c @@ -0,0 +1,13 @@ +#include <liblockdep/mutex.h> + +void main(void) +{ + pthread_mutex_t a, b; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + pthread_mutex_lock(&a); + pthread_mutex_lock(&b); + pthread_mutex_lock(&a); +} diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c new file mode 100644 index 000000000000..cd807d736361 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBA_2threads.c @@ -0,0 +1,46 @@ +#include <stdio.h> +#include <pthread.h> + +pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER; +pthread_barrier_t bar; + +void *ba_lock(void *arg) +{ + int ret, i; + + pthread_mutex_lock(&b); + + if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) + pthread_barrier_destroy(&bar); + + pthread_mutex_lock(&a); + + pthread_mutex_unlock(&a); + pthread_mutex_unlock(&b); +} + +int main(void) +{ + pthread_t t; + + pthread_barrier_init(&bar, NULL, 2); + + if (pthread_create(&t, NULL, ba_lock, NULL)) { + fprintf(stderr, "pthread_create() failed\n"); + return 1; + } + pthread_mutex_lock(&a); + + if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) + pthread_barrier_destroy(&bar); + + pthread_mutex_lock(&b); + + pthread_mutex_unlock(&b); + pthread_mutex_unlock(&a); + + pthread_join(t, NULL); + + return 0; +} diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h index 6386dc3182a0..fd3e56a83fc2 100644 --- a/tools/lib/lockdep/uinclude/linux/compiler.h +++ b/tools/lib/lockdep/uinclude/linux/compiler.h @@ -3,6 +3,7 @@ #define __used __attribute__((__unused__)) #define unlikely +#define READ_ONCE(x) (x) #define WRITE_ONCE(x, val) x=(val) #define RCU_INIT_POINTER(p, v) p=(v) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 629cf8c14e68..a8103700c172 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../../perf/config/utilities.mak # QUIET_CLEAN +include ../../scripts/utilities.mak # QUIET_CLEAN ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -8,8 +8,10 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar +CC ?= $(CROSS_COMPILE)gcc +LD ?= $(CROSS_COMPILE)ld +AR ?= $(CROSS_COMPILE)ar + RM = rm -f MAKEFLAGS += --no-print-directory diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index c3bd294a63d1..a8b6357d1ffe 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1951,6 +1951,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) strcmp(token, "*") == 0 || strcmp(token, "^") == 0 || strcmp(token, "/") == 0 || + strcmp(token, "%") == 0 || strcmp(token, "<") == 0 || strcmp(token, ">") == 0 || strcmp(token, "<=") == 0 || @@ -2397,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val) break; *val = left + right; break; + case '~': + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = ~right; + break; default: do_warning("unknown op '%s'", arg->op.op); ret = 0; @@ -2634,6 +2641,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) free_field: free_arg(arg->hex.field); + arg->hex.field = NULL; out: *tok = NULL; return EVENT_ERROR; @@ -2658,8 +2666,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok) free_size: free_arg(arg->int_array.count); + arg->int_array.count = NULL; free_field: free_arg(arg->int_array.field); + arg->int_array.field = NULL; out: *tok = NULL; return EVENT_ERROR; @@ -3689,6 +3699,9 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg case '/': val = left / right; break; + case '%': + val = left % right; + break; case '*': val = left * right; break; @@ -4971,7 +4984,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event break; } } - if (pevent->long_size == 8 && ls && + if (pevent->long_size == 8 && ls == 1 && sizeof(long) != 8) { char *p; @@ -5335,41 +5348,45 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) return false; } -void pevent_print_event(struct pevent *pevent, struct trace_seq *s, - struct pevent_record *record, bool use_trace_clock) +/** + * pevent_find_event_by_record - return the event from a given record + * @pevent: a handle to the pevent + * @record: The record to get the event from + * + * Returns the associated event for a given record, or NULL if non is + * is found. + */ +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record) { - static const char *spaces = " "; /* 20 spaces */ - struct event_format *event; - unsigned long secs; - unsigned long usecs; - unsigned long nsecs; - const char *comm; - void *data = record->data; int type; - int pid; - int len; - int p; - bool use_usec_format; - - use_usec_format = is_timestamp_in_us(pevent->trace_clock, - use_trace_clock); - if (use_usec_format) { - secs = record->ts / NSECS_PER_SEC; - nsecs = record->ts - secs * NSECS_PER_SEC; - } if (record->size < 0) { do_warning("ug! negative record size %d", record->size); - return; + return NULL; } - type = trace_parse_common_type(pevent, data); + type = trace_parse_common_type(pevent, record->data); - event = pevent_find_event(pevent, type); - if (!event) { - do_warning("ug! no event found for type %d", type); - return; - } + return pevent_find_event(pevent, type); +} + +/** + * pevent_print_event_task - Write the event task comm, pid and CPU + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the tasks comm, pid and CPU to @s. + */ +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + void *data = record->data; + const char *comm; + int pid; pid = parse_common_pid(pevent, data); comm = find_cmdline(pevent, pid); @@ -5377,9 +5394,41 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, if (pevent->latency_format) { trace_seq_printf(s, "%8.8s-%-5d %3d", comm, pid, record->cpu); - pevent_data_lat_fmt(pevent, s, record); } else trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); +} + +/** + * pevent_print_event_time - Write the event timestamp + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * @use_trace_clock: Set to parse according to the @pevent->trace_clock + * + * Writes the timestamp of the record into @s. + */ +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock) +{ + unsigned long secs; + unsigned long usecs; + unsigned long nsecs; + int p; + bool use_usec_format; + + use_usec_format = is_timestamp_in_us(pevent->trace_clock, + use_trace_clock); + if (use_usec_format) { + secs = record->ts / NSECS_PER_SEC; + nsecs = record->ts - secs * NSECS_PER_SEC; + } + + if (pevent->latency_format) { + pevent_data_lat_fmt(pevent, s, record); + } if (use_usec_format) { if (pevent->flags & PEVENT_NSEC_OUTPUT) { @@ -5387,14 +5436,36 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, p = 9; } else { usecs = (nsecs + 500) / NSECS_PER_USEC; + /* To avoid usecs larger than 1 sec */ + if (usecs >= 1000000) { + usecs -= 1000000; + secs++; + } p = 6; } - trace_seq_printf(s, " %5lu.%0*lu: %s: ", - secs, p, usecs, event->name); + trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs); } else - trace_seq_printf(s, " %12llu: %s: ", - record->ts, event->name); + trace_seq_printf(s, " %12llu:", record->ts); +} + +/** + * pevent_print_event_data - Write the event data section + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the parsing of the record's data to @s. + */ +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + static const char *spaces = " "; /* 20 spaces */ + int len; + + trace_seq_printf(s, " %s: ", event->name); /* Space out the event names evenly. */ len = strlen(event->name); @@ -5404,6 +5475,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, pevent_event_info(s, event, record); } +void pevent_print_event(struct pevent *pevent, struct trace_seq *s, + struct pevent_record *record, bool use_trace_clock) +{ + struct event_format *event; + + event = pevent_find_event_by_record(pevent, record); + if (!event) { + do_warning("ug! no event found for type %d", + trace_parse_common_type(pevent, record->data)); + return; + } + + pevent_print_event_task(pevent, s, event, record); + pevent_print_event_time(pevent, s, event, record, use_trace_clock); + pevent_print_event_data(pevent, s, event, record); +} + static int events_id_cmp(const void *a, const void *b) { struct event_format * const * ea = a; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 706d9bc24066..9ffde377e89d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt, unsigned long long addr); int pevent_pid_is_registered(struct pevent *pevent, int pid); +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock); +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); void pevent_print_event(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record, bool use_trace_clock); @@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id); struct event_format * pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name); +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record); + void pevent_data_lat_fmt(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record); int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); diff --git a/tools/net/bpf_dbg.c b/tools/net/bpf_dbg.c index 9a287bec695a..4f254bcc4423 100644 --- a/tools/net/bpf_dbg.c +++ b/tools/net/bpf_dbg.c @@ -129,16 +129,16 @@ struct bpf_regs { }; static struct sock_filter bpf_image[BPF_MAXINSNS + 1]; -static unsigned int bpf_prog_len = 0; +static unsigned int bpf_prog_len; static int bpf_breakpoints[64]; static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1]; static struct bpf_regs bpf_curr; -static unsigned int bpf_regs_len = 0; +static unsigned int bpf_regs_len; static int pcap_fd = -1; -static unsigned int pcap_packet = 0; -static size_t pcap_map_size = 0; +static unsigned int pcap_packet; +static size_t pcap_map_size; static char *pcap_ptr_va_start, *pcap_ptr_va_curr; static const char * const op_table[] = { @@ -1172,7 +1172,7 @@ static int cmd_breakpoint(char *subcmd) static int cmd_run(char *num) { - static uint32_t pass = 0, fail = 0; + static uint32_t pass, fail; bool has_limit = true; int pkts = 0, i = 0; diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 7cc72a336645..bd83149e7be0 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -23,6 +23,9 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> + +#include <linux/filter.h> #include "bpf_exp.yacc.h" @@ -79,22 +82,71 @@ extern void yyerror(const char *str); "txa" { return OP_TXA; } "#"?("len") { return K_PKT_LEN; } -"#"?("proto") { return K_PROTO; } -"#"?("type") { return K_TYPE; } -"#"?("poff") { return K_POFF; } -"#"?("ifidx") { return K_IFIDX; } -"#"?("nla") { return K_NLATTR; } -"#"?("nlan") { return K_NLATTR_NEST; } -"#"?("mark") { return K_MARK; } -"#"?("queue") { return K_QUEUE; } -"#"?("hatype") { return K_HATYPE; } -"#"?("rxhash") { return K_RXHASH; } -"#"?("cpu") { return K_CPU; } -"#"?("vlan_tci") { return K_VLAN_TCI; } -"#"?("vlan_pr") { return K_VLAN_AVAIL; } -"#"?("vlan_avail") { return K_VLAN_AVAIL; } -"#"?("vlan_tpid") { return K_VLAN_TPID; } -"#"?("rand") { return K_RAND; } + +"#"?("proto") { + yylval.number = SKF_AD_PROTOCOL; + return extension; + } +"#"?("type") { + yylval.number = SKF_AD_PKTTYPE; + return extension; + } +"#"?("poff") { + yylval.number = SKF_AD_PAY_OFFSET; + return extension; + } +"#"?("ifidx") { + yylval.number = SKF_AD_IFINDEX; + return extension; + } +"#"?("nla") { + yylval.number = SKF_AD_NLATTR; + return extension; + } +"#"?("nlan") { + yylval.number = SKF_AD_NLATTR_NEST; + return extension; + } +"#"?("mark") { + yylval.number = SKF_AD_MARK; + return extension; + } +"#"?("queue") { + yylval.number = SKF_AD_QUEUE; + return extension; + } +"#"?("hatype") { + yylval.number = SKF_AD_HATYPE; + return extension; + } +"#"?("rxhash") { + yylval.number = SKF_AD_RXHASH; + return extension; + } +"#"?("cpu") { + yylval.number = SKF_AD_CPU; + return extension; + } +"#"?("vlan_tci") { + yylval.number = SKF_AD_VLAN_TAG; + return extension; + } +"#"?("vlan_pr") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_avail") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_tpid") { + yylval.number = SKF_AD_VLAN_TPID; + return extension; + } +"#"?("rand") { + yylval.number = SKF_AD_RANDOM; + return extension; + } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e24eea1b0db5..56ba1de50784 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -35,6 +35,7 @@ enum jmp_type { JTL, JFL, JKL }; extern FILE *yyin; +extern int yylineno; extern int yylex(void); extern void yyerror(const char *str); @@ -55,14 +56,14 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI %token OP_LDXI -%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLAN_TCI K_VLAN_AVAIL K_VLAN_TPID K_POFF K_RAND +%token K_PKT_LEN %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' -%token number label +%token extension number label %type <label> label +%type <number> extension %type <number> number %% @@ -125,51 +126,9 @@ ldb bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $6); } | OP_LDB '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, $3); } - | OP_LDB K_PROTO { + | OP_LDB extension { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LDB K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LDB K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LDB K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LDB K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LDB K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LDB K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LDB K_HATYPE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LDB K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LDB K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDB K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDB K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LDB K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LDB K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LDB K_VLAN_TPID { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } ; ldh @@ -179,51 +138,9 @@ ldh bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $6); } | OP_LDH '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, $3); } - | OP_LDH K_PROTO { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LDH K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LDH K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LDH K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LDH K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LDH K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LDH K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LDH K_HATYPE { + | OP_LDH extension { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LDH K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LDH K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDH K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDH K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LDH K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LDH K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LDH K_VLAN_TPID { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } ; ldi @@ -238,51 +155,9 @@ ld bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); } | OP_LD K_PKT_LEN { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_LEN, 0, 0, 0); } - | OP_LD K_PROTO { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LD K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LD K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LD K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LD K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LD K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LD K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LD K_HATYPE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LD K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LD K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LD K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LD K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LD K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LD K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LD K_VLAN_TPID { + | OP_LD extension { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { @@ -776,5 +651,6 @@ void bpf_asm_compile(FILE *fp, bool cstyle) void yyerror(const char *str) { + fprintf(stderr, "error: %s at line %d\n", str, yylineno); exit(1); } diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore new file mode 100644 index 000000000000..a0b3128bb31f --- /dev/null +++ b/tools/objtool/.gitignore @@ -0,0 +1,2 @@ +arch/x86/insn/inat-tables.c +objtool diff --git a/tools/objtool/Build b/tools/objtool/Build new file mode 100644 index 000000000000..0e89258a3541 --- /dev/null +++ b/tools/objtool/Build @@ -0,0 +1,13 @@ +objtool-y += arch/$(ARCH)/ +objtool-y += builtin-check.o +objtool-y += elf.o +objtool-y += special.o +objtool-y += objtool.o + +objtool-y += libstring.o + +CFLAGS += -I$(srctree)/tools/lib + +$(OUTPUT)libstring.o: ../lib/string.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt new file mode 100644 index 000000000000..5a95896105bc --- /dev/null +++ b/tools/objtool/Documentation/stack-validation.txt @@ -0,0 +1,342 @@ +Compile-time stack metadata validation +====================================== + + +Overview +-------- + +The kernel CONFIG_STACK_VALIDATION option enables a host tool named +objtool which runs at compile time. It has a "check" subcommand which +analyzes every .o file and ensures the validity of its stack metadata. +It enforces a set of rules on asm code and C inline assembly code so +that stack traces can be reliable. + +Currently it only checks frame pointer usage, but there are plans to add +CFI validation for C files and CFI generation for asm files. + +For each function, it recursively follows all possible code paths and +validates the correct frame pointer state at each instruction. + +It also follows code paths involving special sections, like +.altinstructions, __jump_table, and __ex_table, which can add +alternative execution paths to a given instruction (or set of +instructions). Similarly, it knows how to follow switch statements, for +which gcc sometimes uses jump tables. + + +Why do we need stack metadata validation? +----------------------------------------- + +Here are some of the benefits of validating stack metadata: + +a) More reliable stack traces for frame pointer enabled kernels + + Frame pointers are used for debugging purposes. They allow runtime + code and debug tools to be able to walk the stack to determine the + chain of function call sites that led to the currently executing + code. + + For some architectures, frame pointers are enabled by + CONFIG_FRAME_POINTER. For some other architectures they may be + required by the ABI (sometimes referred to as "backchain pointers"). + + For C code, gcc automatically generates instructions for setting up + frame pointers when the -fno-omit-frame-pointer option is used. + + But for asm code, the frame setup instructions have to be written by + hand, which most people don't do. So the end result is that + CONFIG_FRAME_POINTER is honored for C code but not for most asm code. + + For stack traces based on frame pointers to be reliable, all + functions which call other functions must first create a stack frame + and update the frame pointer. If a first function doesn't properly + create a stack frame before calling a second function, the *caller* + of the first function will be skipped on the stack trace. + + For example, consider the following example backtrace with frame + pointers enabled: + + [<ffffffff81812584>] dump_stack+0x4b/0x63 + [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30 + [<ffffffff8127f568>] seq_read+0x108/0x3e0 + [<ffffffff812cce62>] proc_reg_read+0x42/0x70 + [<ffffffff81256197>] __vfs_read+0x37/0x100 + [<ffffffff81256b16>] vfs_read+0x86/0x130 + [<ffffffff81257898>] SyS_read+0x58/0xd0 + [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76 + + It correctly shows that the caller of cmdline_proc_show() is + seq_read(). + + If we remove the frame pointer logic from cmdline_proc_show() by + replacing the frame pointer related instructions with nops, here's + what it looks like instead: + + [<ffffffff81812584>] dump_stack+0x4b/0x63 + [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30 + [<ffffffff812cce62>] proc_reg_read+0x42/0x70 + [<ffffffff81256197>] __vfs_read+0x37/0x100 + [<ffffffff81256b16>] vfs_read+0x86/0x130 + [<ffffffff81257898>] SyS_read+0x58/0xd0 + [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76 + + Notice that cmdline_proc_show()'s caller, seq_read(), has been + skipped. Instead the stack trace seems to show that + cmdline_proc_show() was called by proc_reg_read(). + + The benefit of objtool here is that because it ensures that *all* + functions honor CONFIG_FRAME_POINTER, no functions will ever[*] be + skipped on a stack trace. + + [*] unless an interrupt or exception has occurred at the very + beginning of a function before the stack frame has been created, + or at the very end of the function after the stack frame has been + destroyed. This is an inherent limitation of frame pointers. + +b) 100% reliable stack traces for DWARF enabled kernels + + (NOTE: This is not yet implemented) + + As an alternative to frame pointers, DWARF Call Frame Information + (CFI) metadata can be used to walk the stack. Unlike frame pointers, + CFI metadata is out of band. So it doesn't affect runtime + performance and it can be reliable even when interrupts or exceptions + are involved. + + For C code, gcc automatically generates DWARF CFI metadata. But for + asm code, generating CFI is a tedious manual approach which requires + manually placed .cfi assembler macros to be scattered throughout the + code. It's clumsy and very easy to get wrong, and it makes the real + code harder to read. + + Stacktool will improve this situation in several ways. For code + which already has CFI annotations, it will validate them. For code + which doesn't have CFI annotations, it will generate them. So an + architecture can opt to strip out all the manual .cfi annotations + from their asm code and have objtool generate them instead. + + We might also add a runtime stack validation debug option where we + periodically walk the stack from schedule() and/or an NMI to ensure + that the stack metadata is sane and that we reach the bottom of the + stack. + + So the benefit of objtool here will be that external tooling should + always show perfect stack traces. And the same will be true for + kernel warning/oops traces if the architecture has a runtime DWARF + unwinder. + +c) Higher live patching compatibility rate + + (NOTE: This is not yet implemented) + + Currently with CONFIG_LIVEPATCH there's a basic live patching + framework which is safe for roughly 85-90% of "security" fixes. But + patches can't have complex features like function dependency or + prototype changes, or data structure changes. + + There's a strong need to support patches which have the more complex + features so that the patch compatibility rate for security fixes can + eventually approach something resembling 100%. To achieve that, a + "consistency model" is needed, which allows tasks to be safely + transitioned from an unpatched state to a patched state. + + One of the key requirements of the currently proposed livepatch + consistency model [*] is that it needs to walk the stack of each + sleeping task to determine if it can be transitioned to the patched + state. If objtool can ensure that stack traces are reliable, this + consistency model can be used and the live patching compatibility + rate can be improved significantly. + + [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com + + +Rules +----- + +To achieve the validation, objtool enforces the following rules: + +1. Each callable function must be annotated as such with the ELF + function type. In asm code, this is typically done using the + ENTRY/ENDPROC macros. If objtool finds a return instruction + outside of a function, it flags an error since that usually indicates + callable code which should be annotated accordingly. + + This rule is needed so that objtool can properly identify each + callable function in order to analyze its stack metadata. + +2. Conversely, each section of code which is *not* callable should *not* + be annotated as an ELF function. The ENDPROC macro shouldn't be used + in this case. + + This rule is needed so that objtool can ignore non-callable code. + Such code doesn't have to follow any of the other rules. + +3. Each callable function which calls another function must have the + correct frame pointer logic, if required by CONFIG_FRAME_POINTER or + the architecture's back chain rules. This can by done in asm code + with the FRAME_BEGIN/FRAME_END macros. + + This rule ensures that frame pointer based stack traces will work as + designed. If function A doesn't create a stack frame before calling + function B, the _caller_ of function A will be skipped on the stack + trace. + +4. Dynamic jumps and jumps to undefined symbols are only allowed if: + + a) the jump is part of a switch statement; or + + b) the jump matches sibling call semantics and the frame pointer has + the same value it had on function entry. + + This rule is needed so that objtool can reliably analyze all of a + function's code paths. If a function jumps to code in another file, + and it's not a sibling call, objtool has no way to follow the jump + because it only analyzes a single file at a time. + +5. A callable function may not execute kernel entry/exit instructions. + The only code which needs such instructions is kernel entry code, + which shouldn't be be in callable functions anyway. + + This rule is just a sanity check to ensure that callable functions + return normally. + + +Errors in .S files +------------------ + +If you're getting an error in a compiled .S file which you don't +understand, first make sure that the affected code follows the above +rules. + +Here are some examples of common warnings reported by objtool, what +they mean, and suggestions for how to fix them. + + +1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup + + The func() function made a function call without first saving and/or + updating the frame pointer. + + If func() is indeed a callable function, add proper frame pointer + logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove + its ELF function annotation by changing ENDPROC to END. + + If you're getting this error in a .c file, see the "Errors in .c + files" section. + + +2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function + + A return instruction was detected, but objtool couldn't find a way + for a callable function to reach the instruction. + + If the return instruction is inside (or reachable from) a callable + function, the function needs to be annotated with the ENTRY/ENDPROC + macros. + + If you _really_ need a return instruction outside of a function, and + are 100% sure that it won't affect stack traces, you can tell + objtool to ignore it. See the "Adding exceptions" section below. + + +3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction + + The instruction lives inside of a callable function, but there's no + possible control flow path from the beginning of the function to the + instruction. + + If the instruction is actually needed, and it's actually in a + callable function, ensure that its function is properly annotated + with ENTRY/ENDPROC. + + If it's not actually in a callable function (e.g. kernel entry code), + change ENDPROC to END. + + +4. asm_file.o: warning: objtool: func(): can't find starting instruction + or + asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction + + Did you put data in a text section? If so, that can confuse + objtool's instruction decoder. Move the data to a more appropriate + section like .data or .rodata. + + +5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction + + This is a kernel entry/exit instruction like sysenter or sysret. + Such instructions aren't allowed in a callable function, and are most + likely part of the kernel entry code. + + If the instruction isn't actually in a callable function, change + ENDPROC to END. + + +6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer + + This is a dynamic jump or a jump to an undefined symbol. Stacktool + assumed it's a sibling call and detected that the frame pointer + wasn't first restored to its original state. + + If it's not really a sibling call, you may need to move the + destination code to the local file. + + If the instruction is not actually in a callable function (e.g. + kernel entry code), change ENDPROC to END. + + +7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch + + The instruction's frame pointer state is inconsistent, depending on + which execution path was taken to reach the instruction. + + Make sure the function pushes and sets up the frame pointer (for + x86_64, this means rbp) at the beginning of the function and pops it + at the end of the function. Also make sure that no other code in the + function touches the frame pointer. + + +Errors in .c files +------------------ + +If you're getting an objtool error in a compiled .c file, chances are +the file uses an asm() statement which has a "call" instruction. An +asm() statement with a call instruction must declare the use of the +stack pointer in its output operand. For example, on x86_64: + + register void *__sp asm("rsp"); + asm volatile("call func" : "+r" (__sp)); + +Otherwise the stack frame may not get created before the call. + +Another possible cause for errors in C code is if the Makefile removes +-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. + +Also see the above section for .S file errors for more information what +the individual error messages mean. + +If the error doesn't seem to make sense, it could be a bug in objtool. +Feel free to ask the objtool maintainer for help. + + +Adding exceptions +----------------- + +If you _really_ need objtool to ignore something, and are 100% sure +that it won't affect kernel stack traces, you can tell objtool to +ignore it: + +- To skip validation of a function, use the STACK_FRAME_NON_STANDARD + macro. + +- To skip validation of a file, add + + OBJECT_FILES_NON_STANDARD_filename.o := n + + to the Makefile. + +- To skip validation of a directory, add + + OBJECT_FILES_NON_STANDARD := y + + to the Makefile. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile new file mode 100644 index 000000000000..6765c7e949f3 --- /dev/null +++ b/tools/objtool/Makefile @@ -0,0 +1,63 @@ +include ../scripts/Makefile.include + +ifndef ($(ARCH)) +ARCH ?= $(shell uname -m) +ifeq ($(ARCH),x86_64) +ARCH := x86 +endif +endif + +# always use the host compiler +CC = gcc +LD = ld +AR = ar + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +SUBCMD_SRCDIR = $(srctree)/tools/lib/subcmd/ +LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(PWD)/) +LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a + +OBJTOOL := $(OUTPUT)objtool +OBJTOOL_IN := $(OBJTOOL)-in.o + +all: $(OBJTOOL) + +INCLUDES := -I$(srctree)/tools/include +CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) +LDFLAGS += -lelf $(LIBSUBCMD) + +AWK = awk +export srctree OUTPUT CFLAGS ARCH AWK +include $(srctree)/tools/build/Makefile.include + +$(OBJTOOL_IN): fixdep FORCE + @$(MAKE) $(build)=objtool + +$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) + @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \ + diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ + diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ + diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ + diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ + diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ + diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ + diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ + || echo "Warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true + $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ + + +$(LIBSUBCMD): fixdep FORCE + $(Q)$(MAKE) -C $(SUBCMD_SRCDIR) OUTPUT=$(LIBSUBCMD_OUTPUT) + +clean: + $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) + $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep + +FORCE: + +.PHONY: clean FORCE diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h new file mode 100644 index 000000000000..f7350fcedc70 --- /dev/null +++ b/tools/objtool/arch.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _ARCH_H +#define _ARCH_H + +#include <stdbool.h> +#include "elf.h" + +#define INSN_FP_SAVE 1 +#define INSN_FP_SETUP 2 +#define INSN_FP_RESTORE 3 +#define INSN_JUMP_CONDITIONAL 4 +#define INSN_JUMP_UNCONDITIONAL 5 +#define INSN_JUMP_DYNAMIC 6 +#define INSN_CALL 7 +#define INSN_CALL_DYNAMIC 8 +#define INSN_RETURN 9 +#define INSN_CONTEXT_SWITCH 10 +#define INSN_BUG 11 +#define INSN_NOP 12 +#define INSN_OTHER 13 +#define INSN_LAST INSN_OTHER + +int arch_decode_instruction(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, unsigned char *type, + unsigned long *displacement); + +#endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build new file mode 100644 index 000000000000..debbdb0b5c43 --- /dev/null +++ b/tools/objtool/arch/x86/Build @@ -0,0 +1,12 @@ +objtool-y += decode.o + +inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk +inat_tables_maps = arch/x86/insn/x86-opcode-map.txt + +$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c + +CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c new file mode 100644 index 000000000000..c0c0b265e88e --- /dev/null +++ b/tools/objtool/arch/x86/decode.c @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> + +#define unlikely(cond) (cond) +#include "insn/insn.h" +#include "insn/inat.c" +#include "insn/insn.c" + +#include "../../elf.h" +#include "../../arch.h" +#include "../../warn.h" + +static int is_x86_64(struct elf *elf) +{ + switch (elf->ehdr.e_machine) { + case EM_X86_64: + return 1; + case EM_386: + return 0; + default: + WARN("unexpected ELF machine type %d", elf->ehdr.e_machine); + return -1; + } +} + +int arch_decode_instruction(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, unsigned char *type, + unsigned long *immediate) +{ + struct insn insn; + int x86_64; + unsigned char op1, op2, ext; + + x86_64 = is_x86_64(elf); + if (x86_64 == -1) + return -1; + + insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64); + insn_get_length(&insn); + insn_get_opcode(&insn); + insn_get_modrm(&insn); + insn_get_immediate(&insn); + + if (!insn_complete(&insn)) { + WARN_FUNC("can't decode instruction", sec, offset); + return -1; + } + + *len = insn.length; + *type = INSN_OTHER; + + if (insn.vex_prefix.nbytes) + return 0; + + op1 = insn.opcode.bytes[0]; + op2 = insn.opcode.bytes[1]; + + switch (op1) { + case 0x55: + if (!insn.rex_prefix.nbytes) + /* push rbp */ + *type = INSN_FP_SAVE; + break; + + case 0x5d: + if (!insn.rex_prefix.nbytes) + /* pop rbp */ + *type = INSN_FP_RESTORE; + break; + + case 0x70 ... 0x7f: + *type = INSN_JUMP_CONDITIONAL; + break; + + case 0x89: + if (insn.rex_prefix.nbytes == 1 && + insn.rex_prefix.bytes[0] == 0x48 && + insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5) + /* mov rsp, rbp */ + *type = INSN_FP_SETUP; + break; + + case 0x90: + *type = INSN_NOP; + break; + + case 0x0f: + if (op2 >= 0x80 && op2 <= 0x8f) + *type = INSN_JUMP_CONDITIONAL; + else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || + op2 == 0x35) + /* sysenter, sysret */ + *type = INSN_CONTEXT_SWITCH; + else if (op2 == 0x0b || op2 == 0xb9) + /* ud2 */ + *type = INSN_BUG; + else if (op2 == 0x0d || op2 == 0x1f) + /* nopl/nopw */ + *type = INSN_NOP; + else if (op2 == 0x01 && insn.modrm.nbytes && + (insn.modrm.bytes[0] == 0xc2 || + insn.modrm.bytes[0] == 0xd8)) + /* vmlaunch, vmrun */ + *type = INSN_CONTEXT_SWITCH; + + break; + + case 0xc9: /* leave */ + *type = INSN_FP_RESTORE; + break; + + case 0xe3: /* jecxz/jrcxz */ + *type = INSN_JUMP_CONDITIONAL; + break; + + case 0xe9: + case 0xeb: + *type = INSN_JUMP_UNCONDITIONAL; + break; + + case 0xc2: + case 0xc3: + *type = INSN_RETURN; + break; + + case 0xc5: /* iret */ + case 0xca: /* retf */ + case 0xcb: /* retf */ + *type = INSN_CONTEXT_SWITCH; + break; + + case 0xe8: + *type = INSN_CALL; + break; + + case 0xff: + ext = X86_MODRM_REG(insn.modrm.bytes[0]); + if (ext == 2 || ext == 3) + *type = INSN_CALL_DYNAMIC; + else if (ext == 4) + *type = INSN_JUMP_DYNAMIC; + else if (ext == 5) /*jmpf */ + *type = INSN_CONTEXT_SWITCH; + + break; + + default: + break; + } + + *immediate = insn.immediate.nbytes ? insn.immediate.value : 0; + + return 0; +} diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..093a892026f9 --- /dev/null +++ b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk @@ -0,0 +1,387 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[A-Za-z/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[0-9A-Za-z]+" + + imm_expr = "^[IJAOL][a-z]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\((66|!F3)\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" + max_lprefix = 4 + + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" + prefix_num["XRELEASE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(count,opnd, i,j,imm,mod) +{ + imm = null + mod = null + for (j = 1; j <= count; j++) { + i = opnd[j] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX codes + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } + if (!match(ext, lprefix_expr)){ + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} + diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/insn/inat.c new file mode 100644 index 000000000000..e4bf28e6f4c7 --- /dev/null +++ b/tools/objtool/arch/x86/insn/inat.c @@ -0,0 +1,97 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "insn.h" + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_escape_id(esc_attr); + + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_group_id(grp_attr); + + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; + if (!table) + return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } + return table[opcode]; +} + diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h new file mode 100644 index 000000000000..611645e903a8 --- /dev/null +++ b/tools/objtool/arch/x86/insn/inat.h @@ -0,0 +1,221 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "inat_types.h" + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/insn/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/objtool/arch/x86/insn/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/insn/insn.c new file mode 100644 index 000000000000..9f26eae6c9f0 --- /dev/null +++ b/tools/objtool/arch/x86/insn/insn.c @@ -0,0 +1,594 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif +#include "inat.h" +#include "insn.h" + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) + +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) + +#define peek_nbyte_next(t, insn, n) \ + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +{ + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + +err_out: + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op; + int pfx_id; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; + +err_out: + return; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; + +err_out: + return; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; + +err_out: + return; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(signed char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; + +err_out: + return; +} + +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->moffset1.got = insn->moffset2.got = 1; + + return 1; + +err_out: + return 0; +} + +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + + return 1; + +err_out: + return 0; +} + +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/* Decode ptr16:16/32(Ap) */ +static int __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) + goto err_out; + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(signed char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + if (!__get_immptr(insn)) + goto err_out; + break; + case INAT_IMM_VWORD32: + if (!__get_immv32(insn)) + goto err_out; + break; + case INAT_IMM_VWORD: + if (!__get_immv(insn)) + goto err_out; + break; + default: + /* Here, insn must have an immediate, but failed */ + goto err_out; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(signed char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; + +err_out: + return; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/insn/insn.h new file mode 100644 index 000000000000..dd12da0f4593 --- /dev/null +++ b/tools/objtool/arch/x86/insn/insn.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include "inat.h" + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, buf_len, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, buf_len, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt new file mode 100644 index 000000000000..d388de72eaca --- /dev/null +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -0,0 +1,984 @@ +# x86 Opcode Maps +# +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +# (#326018-047US, June 2013) +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +# Note: The May 2011 Intel manual shows Xv for the second parameter of the +# next instruction but Yv is correct +af: SCAS/W/D/Q rAX,Yv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c6: Grp11A Eb,Ib (1A) +c7: Grp11B Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP. +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) | XACQUIRE (Prefix) +f3: REP/REPE (Prefix) | XRELEASE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +# AMD's prefetch group. Intel supports prefetchw(/1) only. +0d: GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) +18: Grp16 (1A) +19: +# Intel SDM opcode map does not list MPX instructions. For now using Gv for +# bnd registers and Ev for everything else is OK because the instruction +# decoder does not use the information except as an indication that there is +# a ModR/M byte. +1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey +7a: +7b: +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +# 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) +86: JBE/JNA Jz (f64) +87: JA/JNBE Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +# 0x0f 0xf0-0xff +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: vcvtph2ps Vx,Wx,Ib (66),(v) +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) +1b: +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) +1f: +# 0x0f 0x38 0x20-0x2f +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) +26: +27: +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) +# 0x0f 0x38 0x30-0x3f +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) +# 0x0f 0x38 0x40-0x8f +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +# 0x0f 0x38 0x90-0xbf (FMA) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +# 0x0f 0x38 0xc0-0xff +c8: sha1nexte Vdq,Wdq +c9: sha1msg1 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq +cb: sha256rnds2 Vdq,Wdq +cc: sha256msg1 Vdq,Wdq +cd: sha256msg2 Vdq,Wdq +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +f2: ANDN Gy,By,Ey (v) +f3: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +cc: sha1rnds4 Vdq,Wdq,Ib +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Mp +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: rdpkru (110),(11B) | wrpkru (111),(11B) +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +3: xrstors +4: xsavec +5: xsaves +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) +EndTable + +GrpTable: Grp10 +EndTable + +# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +GrpTable: Grp11A +0: MOV Eb,Ib +7: XABORT Ib (000),(11B) +EndTable + +GrpTable: Grp11B +0: MOV Eb,Iz +7: XBEGIN Jz (000),(11B) +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp15 +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) +4: XSAVE +5: XRSTOR | lfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) +7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c new file mode 100644 index 000000000000..7515cb2e879a --- /dev/null +++ b/tools/objtool/builtin-check.c @@ -0,0 +1,1159 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * objtool check: + * + * This command analyzes every .o file and ensures the validity of its stack + * trace metadata. It enforces a set of rules on asm code and C inline + * assembly code so that stack traces can be reliable. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ + +#include <string.h> +#include <subcmd/parse-options.h> + +#include "builtin.h" +#include "elf.h" +#include "special.h" +#include "arch.h" +#include "warn.h" + +#include <linux/hashtable.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define STATE_FP_SAVED 0x1 +#define STATE_FP_SETUP 0x2 +#define STATE_FENTRY 0x4 + +struct instruction { + struct list_head list; + struct hlist_node hash; + struct section *sec; + unsigned long offset; + unsigned int len, state; + unsigned char type; + unsigned long immediate; + bool alt_group, visited; + struct symbol *call_dest; + struct instruction *jump_dest; + struct list_head alts; +}; + +struct alternative { + struct list_head list; + struct instruction *insn; +}; + +struct objtool_file { + struct elf *elf; + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 16); + struct section *rodata, *whitelist; +}; + +const char *objname; +static bool nofp; + +static struct instruction *find_insn(struct objtool_file *file, + struct section *sec, unsigned long offset) +{ + struct instruction *insn; + + hash_for_each_possible(file->insn_hash, insn, hash, offset) + if (insn->sec == sec && insn->offset == offset) + return insn; + + return NULL; +} + +static struct instruction *next_insn_same_sec(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + + if (&next->list == &file->insn_list || next->sec != insn->sec) + return NULL; + + return next; +} + +#define for_each_insn(file, insn) \ + list_for_each_entry(insn, &file->insn_list, list) + +#define func_for_each_insn(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn && &insn->list != &file->insn_list && \ + insn->sec == func->sec && \ + insn->offset < func->offset + func->len; \ + insn = list_next_entry(insn, list)) + +#define sec_for_each_insn_from(file, insn) \ + for (; insn; insn = next_insn_same_sec(file, insn)) + + +/* + * Check if the function has been manually whitelisted with the + * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted + * due to its use of a context switching instruction. + */ +static bool ignore_func(struct objtool_file *file, struct symbol *func) +{ + struct rela *rela; + struct instruction *insn; + + /* check for STACK_FRAME_NON_STANDARD */ + if (file->whitelist && file->whitelist->rela) + list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) + if (rela->sym->sec == func->sec && + rela->addend == func->offset) + return true; + + /* check if it has a context switching instruction */ + func_for_each_insn(file, func, insn) + if (insn->type == INSN_CONTEXT_SWITCH) + return true; + + return false; +} + +/* + * This checks to see if the given function is a "noreturn" function. + * + * For global functions which are outside the scope of this object file, we + * have to keep a manual list of them. + * + * For local functions, we have to detect them manually by simply looking for + * the lack of a return instruction. + * + * Returns: + * -1: error + * 0: no dead end + * 1: dead end + */ +static int __dead_end_function(struct objtool_file *file, struct symbol *func, + int recursion) +{ + int i; + struct instruction *insn; + bool empty = true; + + /* + * Unfortunately these have to be hard coded because the noreturn + * attribute isn't provided in ELF data. + */ + static const char * const global_noreturns[] = { + "__stack_chk_fail", + "panic", + "do_exit", + "__module_put_and_exit", + "complete_and_exit", + "kvm_spurious_fault", + "__reiserfs_panic", + "lbug_with_loc" + }; + + if (func->bind == STB_WEAK) + return 0; + + if (func->bind == STB_GLOBAL) + for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) + if (!strcmp(func->name, global_noreturns[i])) + return 1; + + if (!func->sec) + return 0; + + func_for_each_insn(file, func, insn) { + empty = false; + + if (insn->type == INSN_RETURN) + return 0; + } + + if (empty) + return 0; + + /* + * A function can have a sibling call instead of a return. In that + * case, the function's dead-end status depends on whether the target + * of the sibling call returns. + */ + func_for_each_insn(file, func, insn) { + if (insn->sec != func->sec || + insn->offset >= func->offset + func->len) + break; + + if (insn->type == INSN_JUMP_UNCONDITIONAL) { + struct instruction *dest = insn->jump_dest; + struct symbol *dest_func; + + if (!dest) + /* sibling call to another file */ + return 0; + + if (dest->sec != func->sec || + dest->offset < func->offset || + dest->offset >= func->offset + func->len) { + /* local sibling call */ + dest_func = find_symbol_by_offset(dest->sec, + dest->offset); + if (!dest_func) + continue; + + if (recursion == 5) { + WARN_FUNC("infinite recursion (objtool bug!)", + dest->sec, dest->offset); + return -1; + } + + return __dead_end_function(file, dest_func, + recursion + 1); + } + } + + if (insn->type == INSN_JUMP_DYNAMIC) + /* sibling call */ + return 0; + } + + return 1; +} + +static int dead_end_function(struct objtool_file *file, struct symbol *func) +{ + return __dead_end_function(file, func, 0); +} + +/* + * Call the arch-specific instruction decoder for all the instructions and add + * them to the global instruction list. + */ +static int decode_instructions(struct objtool_file *file) +{ + struct section *sec; + unsigned long offset; + struct instruction *insn; + int ret; + + list_for_each_entry(sec, &file->elf->sections, list) { + + if (!(sec->sh.sh_flags & SHF_EXECINSTR)) + continue; + + for (offset = 0; offset < sec->len; offset += insn->len) { + insn = malloc(sizeof(*insn)); + memset(insn, 0, sizeof(*insn)); + + INIT_LIST_HEAD(&insn->alts); + insn->sec = sec; + insn->offset = offset; + + ret = arch_decode_instruction(file->elf, sec, offset, + sec->len - offset, + &insn->len, &insn->type, + &insn->immediate); + if (ret) + return ret; + + if (!insn->type || insn->type > INSN_LAST) { + WARN_FUNC("invalid instruction type %d", + insn->sec, insn->offset, insn->type); + return -1; + } + + hash_add(file->insn_hash, &insn->hash, insn->offset); + list_add_tail(&insn->list, &file->insn_list); + } + } + + return 0; +} + +/* + * Warnings shouldn't be reported for ignored functions. + */ +static void add_ignores(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + struct symbol *func; + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + if (!ignore_func(file, func)) + continue; + + func_for_each_insn(file, func, insn) + insn->visited = true; + } + } +} + +/* + * Find the destination instructions for all jumps. + */ +static int add_jump_destinations(struct objtool_file *file) +{ + struct instruction *insn; + struct rela *rela; + struct section *dest_sec; + unsigned long dest_off; + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + + /* skip ignores */ + if (insn->visited) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!rela) { + dest_sec = insn->sec; + dest_off = insn->offset + insn->len + insn->immediate; + } else if (rela->sym->type == STT_SECTION) { + dest_sec = rela->sym->sec; + dest_off = rela->addend + 4; + } else if (rela->sym->sec->idx) { + dest_sec = rela->sym->sec; + dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else { + /* sibling call */ + insn->jump_dest = 0; + continue; + } + + insn->jump_dest = find_insn(file, dest_sec, dest_off); + if (!insn->jump_dest) { + + /* + * This is a special case where an alt instruction + * jumps past the end of the section. These are + * handled later in handle_group_alt(). + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); + return -1; + } + } + + return 0; +} + +/* + * Find the destination instructions for all calls. + */ +static int add_call_destinations(struct objtool_file *file) +{ + struct instruction *insn; + unsigned long dest_off; + struct rela *rela; + + for_each_insn(file, insn) { + if (insn->type != INSN_CALL) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!rela) { + dest_off = insn->offset + insn->len + insn->immediate; + insn->call_dest = find_symbol_by_offset(insn->sec, + dest_off); + if (!insn->call_dest) { + WARN_FUNC("can't find call dest symbol at offset 0x%lx", + insn->sec, insn->offset, dest_off); + return -1; + } + } else if (rela->sym->type == STT_SECTION) { + insn->call_dest = find_symbol_by_offset(rela->sym->sec, + rela->addend+4); + if (!insn->call_dest || + insn->call_dest->type != STT_FUNC) { + WARN_FUNC("can't find call dest symbol at %s+0x%x", + insn->sec, insn->offset, + rela->sym->sec->name, + rela->addend + 4); + return -1; + } + } else + insn->call_dest = rela->sym; + } + + return 0; +} + +/* + * The .alternatives section requires some extra special care, over and above + * what other special sections require: + * + * 1. Because alternatives are patched in-place, we need to insert a fake jump + * instruction at the end so that validate_branch() skips all the original + * replaced instructions when validating the new instruction path. + * + * 2. An added wrinkle is that the new instruction length might be zero. In + * that case the old instructions are replaced with noops. We simulate that + * by creating a fake jump as the only new instruction. + * + * 3. In some cases, the alternative section includes an instruction which + * conditionally jumps to the _end_ of the entry. We have to modify these + * jumps' destinations to point back to .text rather than the end of the + * entry in .altinstr_replacement. + * + * 4. It has been requested that we don't validate the !POPCNT feature path + * which is a "very very small percentage of machines". + */ +static int handle_group_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) +{ + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; + unsigned long dest_off; + + last_orig_insn = NULL; + insn = orig_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->orig_off + special_alt->orig_len) + break; + + if (special_alt->skip_orig) + insn->type = INSN_NOP; + + insn->alt_group = true; + last_orig_insn = insn; + } + + if (!next_insn_same_sec(file, last_orig_insn)) { + WARN("%s: don't know how to handle alternatives at end of section", + special_alt->orig_sec->name); + return -1; + } + + fake_jump = malloc(sizeof(*fake_jump)); + if (!fake_jump) { + WARN("malloc failed"); + return -1; + } + memset(fake_jump, 0, sizeof(*fake_jump)); + INIT_LIST_HEAD(&fake_jump->alts); + fake_jump->sec = special_alt->new_sec; + fake_jump->offset = -1; + fake_jump->type = INSN_JUMP_UNCONDITIONAL; + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + + if (!special_alt->new_len) { + *new_insn = fake_jump; + return 0; + } + + last_new_insn = NULL; + insn = *new_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->new_off + special_alt->new_len) + break; + + last_new_insn = insn; + + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + + if (!insn->immediate) + continue; + + dest_off = insn->offset + insn->len + insn->immediate; + if (dest_off == special_alt->new_off + special_alt->new_len) + insn->jump_dest = fake_jump; + + if (!insn->jump_dest) { + WARN_FUNC("can't find alternative jump destination", + insn->sec, insn->offset); + return -1; + } + } + + if (!last_new_insn) { + WARN_FUNC("can't find last new alternative instruction", + special_alt->new_sec, special_alt->new_off); + return -1; + } + + list_add(&fake_jump->list, &last_new_insn->list); + + return 0; +} + +/* + * A jump table entry can either convert a nop to a jump or a jump to a nop. + * If the original instruction is a jump, make the alt entry an effective nop + * by just skipping the original instruction. + */ +static int handle_jump_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) +{ + if (orig_insn->type == INSN_NOP) + return 0; + + if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { + WARN_FUNC("unsupported instruction at jump label", + orig_insn->sec, orig_insn->offset); + return -1; + } + + *new_insn = list_next_entry(orig_insn, list); + return 0; +} + +/* + * Read all the special sections which have alternate instructions which can be + * patched in or redirected to at runtime. Each instruction having alternate + * instruction(s) has them added to its insn->alts list, which will be + * traversed in validate_branch(). + */ +static int add_special_section_alts(struct objtool_file *file) +{ + struct list_head special_alts; + struct instruction *orig_insn, *new_insn; + struct special_alt *special_alt, *tmp; + struct alternative *alt; + int ret; + + ret = special_get_alts(file->elf, &special_alts); + if (ret) + return ret; + + list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + + orig_insn = find_insn(file, special_alt->orig_sec, + special_alt->orig_off); + if (!orig_insn) { + WARN_FUNC("special: can't find orig instruction", + special_alt->orig_sec, special_alt->orig_off); + ret = -1; + goto out; + } + + new_insn = NULL; + if (!special_alt->group || special_alt->new_len) { + new_insn = find_insn(file, special_alt->new_sec, + special_alt->new_off); + if (!new_insn) { + WARN_FUNC("special: can't find new instruction", + special_alt->new_sec, + special_alt->new_off); + ret = -1; + goto out; + } + } + + if (special_alt->group) { + ret = handle_group_alt(file, special_alt, orig_insn, + &new_insn); + if (ret) + goto out; + } else if (special_alt->jump_or_nop) { + ret = handle_jump_alt(file, special_alt, orig_insn, + &new_insn); + if (ret) + goto out; + } + + alt->insn = new_insn; + list_add_tail(&alt->list, &orig_insn->alts); + + list_del(&special_alt->list); + free(special_alt); + } + +out: + return ret; +} + +static int add_switch_table(struct objtool_file *file, struct symbol *func, + struct instruction *insn, struct rela *table, + struct rela *next_table) +{ + struct rela *rela = table; + struct instruction *alt_insn; + struct alternative *alt; + + list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { + if (rela == next_table) + break; + + if (rela->sym->sec != insn->sec || + rela->addend <= func->offset || + rela->addend >= func->offset + func->len) + break; + + alt_insn = find_insn(file, insn->sec, rela->addend); + if (!alt_insn) { + WARN("%s: can't find instruction at %s+0x%x", + file->rodata->rela->name, insn->sec->name, + rela->addend); + return -1; + } + + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + return -1; + } + + alt->insn = alt_insn; + list_add_tail(&alt->list, &insn->alts); + } + + return 0; +} + +static int add_func_switch_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn, *prev_jump; + struct rela *text_rela, *rodata_rela, *prev_rela; + int ret; + + prev_jump = NULL; + + func_for_each_insn(file, func, insn) { + if (insn->type != INSN_JUMP_DYNAMIC) + continue; + + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* common case: jmpq *[addr](,%rax,8) */ + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend); + + /* + * TODO: Document where this is needed, or get rid of it. + * + * rare case: jmpq *[addr](%rip) + */ + if (!rodata_rela) + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend + 4); + + if (!rodata_rela) + continue; + + /* + * We found a switch table, but we don't know yet how big it + * is. Don't add it until we reach the end of the function or + * the beginning of another switch table in the same function. + */ + if (prev_jump) { + ret = add_switch_table(file, func, prev_jump, prev_rela, + rodata_rela); + if (ret) + return ret; + } + + prev_jump = insn; + prev_rela = rodata_rela; + } + + if (prev_jump) { + ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + if (ret) + return ret; + } + + return 0; +} + +/* + * For some switch statements, gcc generates a jump table in the .rodata + * section which contains a list of addresses within the function to jump to. + * This finds these jump tables and adds them to the insn->alts lists. + */ +static int add_switch_table_alts(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + int ret; + + if (!file->rodata || !file->rodata->rela) + return 0; + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + ret = add_func_switch_tables(file, func); + if (ret) + return ret; + } + } + + return 0; +} + +static int decode_sections(struct objtool_file *file) +{ + int ret; + + file->whitelist = find_section_by_name(file->elf, "__func_stack_frame_non_standard"); + file->rodata = find_section_by_name(file->elf, ".rodata"); + + ret = decode_instructions(file); + if (ret) + return ret; + + add_ignores(file); + + ret = add_jump_destinations(file); + if (ret) + return ret; + + ret = add_call_destinations(file); + if (ret) + return ret; + + ret = add_special_section_alts(file); + if (ret) + return ret; + + ret = add_switch_table_alts(file); + if (ret) + return ret; + + return 0; +} + +static bool is_fentry_call(struct instruction *insn) +{ + if (insn->type == INSN_CALL && + insn->call_dest->type == STT_NOTYPE && + !strcmp(insn->call_dest->name, "__fentry__")) + return true; + + return false; +} + +static bool has_modified_stack_frame(struct instruction *insn) +{ + return (insn->state & STATE_FP_SAVED) || + (insn->state & STATE_FP_SETUP); +} + +static bool has_valid_stack_frame(struct instruction *insn) +{ + return (insn->state & STATE_FP_SAVED) && + (insn->state & STATE_FP_SETUP); +} + +static unsigned int frame_state(unsigned long state) +{ + return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); +} + +/* + * Follow the branch starting at the given instruction, and recursively follow + * any other branches (jumps). Meanwhile, track the frame pointer state at + * each instruction and validate all the rules described in + * tools/objtool/Documentation/stack-validation.txt. + */ +static int validate_branch(struct objtool_file *file, + struct instruction *first, unsigned char first_state) +{ + struct alternative *alt; + struct instruction *insn; + struct section *sec; + unsigned char state; + int ret; + + insn = first; + sec = insn->sec; + state = first_state; + + if (insn->alt_group && list_empty(&insn->alts)) { + WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", + sec, insn->offset); + return 1; + } + + while (1) { + if (insn->visited) { + if (frame_state(insn->state) != frame_state(state)) { + WARN_FUNC("frame pointer state mismatch", + sec, insn->offset); + return 1; + } + + return 0; + } + + /* + * Catch a rare case where a noreturn function falls through to + * the next function. + */ + if (is_fentry_call(insn) && (state & STATE_FENTRY)) + return 0; + + insn->visited = true; + insn->state = state; + + list_for_each_entry(alt, &insn->alts, list) { + ret = validate_branch(file, alt->insn, state); + if (ret) + return 1; + } + + switch (insn->type) { + + case INSN_FP_SAVE: + if (!nofp) { + if (state & STATE_FP_SAVED) { + WARN_FUNC("duplicate frame pointer save", + sec, insn->offset); + return 1; + } + state |= STATE_FP_SAVED; + } + break; + + case INSN_FP_SETUP: + if (!nofp) { + if (state & STATE_FP_SETUP) { + WARN_FUNC("duplicate frame pointer setup", + sec, insn->offset); + return 1; + } + state |= STATE_FP_SETUP; + } + break; + + case INSN_FP_RESTORE: + if (!nofp) { + if (has_valid_stack_frame(insn)) + state &= ~STATE_FP_SETUP; + + state &= ~STATE_FP_SAVED; + } + break; + + case INSN_RETURN: + if (!nofp && has_modified_stack_frame(insn)) { + WARN_FUNC("return without frame pointer restore", + sec, insn->offset); + return 1; + } + return 0; + + case INSN_CALL: + if (is_fentry_call(insn)) { + state |= STATE_FENTRY; + break; + } + + ret = dead_end_function(file, insn->call_dest); + if (ret == 1) + return 0; + if (ret == -1) + return 1; + + /* fallthrough */ + case INSN_CALL_DYNAMIC: + if (!nofp && !has_valid_stack_frame(insn)) { + WARN_FUNC("call without frame pointer save/setup", + sec, insn->offset); + return 1; + } + break; + + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: + if (insn->jump_dest) { + ret = validate_branch(file, insn->jump_dest, + state); + if (ret) + return 1; + } else if (has_modified_stack_frame(insn)) { + WARN_FUNC("sibling call from callable instruction with changed frame pointer", + sec, insn->offset); + return 1; + } /* else it's a sibling call */ + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + + case INSN_JUMP_DYNAMIC: + if (list_empty(&insn->alts) && + has_modified_stack_frame(insn)) { + WARN_FUNC("sibling call from callable instruction with changed frame pointer", + sec, insn->offset); + return 1; + } + + return 0; + + case INSN_BUG: + return 0; + + default: + break; + } + + insn = next_insn_same_sec(file, insn); + if (!insn) { + WARN("%s: unexpected end of section", sec->name); + return 1; + } + } + + return 0; +} + +static bool is_gcov_insn(struct instruction *insn) +{ + struct rela *rela; + struct section *sec; + struct symbol *sym; + unsigned long offset; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); + if (!rela) + return false; + + if (rela->sym->type != STT_SECTION) + return false; + + sec = rela->sym->sec; + offset = rela->addend + insn->offset + insn->len - rela->offset; + + list_for_each_entry(sym, &sec->symbol_list, list) { + if (sym->type != STT_OBJECT) + continue; + + if (offset >= sym->offset && offset < sym->offset + sym->len) + return (!memcmp(sym->name, "__gcov0.", 8)); + } + + return false; +} + +static bool is_kasan_insn(struct instruction *insn) +{ + return (insn->type == INSN_CALL && + !strcmp(insn->call_dest->name, "__asan_handle_no_return")); +} + +static bool is_ubsan_insn(struct instruction *insn) +{ + return (insn->type == INSN_CALL && + !strcmp(insn->call_dest->name, + "__ubsan_handle_builtin_unreachable")); +} + +static bool ignore_unreachable_insn(struct symbol *func, + struct instruction *insn) +{ + int i; + + if (insn->type == INSN_NOP) + return true; + + if (is_gcov_insn(insn)) + return true; + + /* + * Check if this (or a subsequent) instruction is related to + * CONFIG_UBSAN or CONFIG_KASAN. + * + * End the search at 5 instructions to avoid going into the weeds. + */ + for (i = 0; i < 5; i++) { + + if (is_kasan_insn(insn) || is_ubsan_insn(insn)) + return true; + + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { + insn = insn->jump_dest; + continue; + } + + if (insn->offset + insn->len >= func->offset + func->len) + break; + insn = list_next_entry(insn, list); + } + + return false; +} + +static int validate_functions(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + struct instruction *insn; + int ret, warnings = 0; + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + insn = find_insn(file, sec, func->offset); + if (!insn) { + WARN("%s(): can't find starting instruction", + func->name); + warnings++; + continue; + } + + ret = validate_branch(file, insn, 0); + warnings += ret; + } + } + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + func_for_each_insn(file, func, insn) { + if (insn->visited) + continue; + + if (!ignore_unreachable_insn(func, insn) && + !warnings) { + WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); + warnings++; + } + + insn->visited = true; + } + } + } + + return warnings; +} + +static int validate_uncallable_instructions(struct objtool_file *file) +{ + struct instruction *insn; + int warnings = 0; + + for_each_insn(file, insn) { + if (!insn->visited && insn->type == INSN_RETURN) { + WARN_FUNC("return instruction outside of a callable function", + insn->sec, insn->offset); + warnings++; + } + } + + return warnings; +} + +static void cleanup(struct objtool_file *file) +{ + struct instruction *insn, *tmpinsn; + struct alternative *alt, *tmpalt; + + list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { + list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { + list_del(&alt->list); + free(alt); + } + list_del(&insn->list); + hash_del(&insn->hash); + free(insn); + } + elf_close(file->elf); +} + +const char * const check_usage[] = { + "objtool check [<options>] file.o", + NULL, +}; + +int cmd_check(int argc, const char **argv) +{ + struct objtool_file file; + int ret, warnings = 0; + + const struct option options[] = { + OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), + OPT_END(), + }; + + argc = parse_options(argc, argv, options, check_usage, 0); + + if (argc != 1) + usage_with_options(check_usage, options); + + objname = argv[0]; + + file.elf = elf_open(objname); + if (!file.elf) { + fprintf(stderr, "error reading elf file %s\n", objname); + return 1; + } + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + + ret = decode_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + + ret = validate_functions(&file); + if (ret < 0) + goto out; + warnings += ret; + + ret = validate_uncallable_instructions(&file); + if (ret < 0) + goto out; + warnings += ret; + +out: + cleanup(&file); + + /* ignore warnings for now until we get all the code cleaned up */ + if (ret || warnings) + return 0; + return 0; +} diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h new file mode 100644 index 000000000000..34d2ba78a616 --- /dev/null +++ b/tools/objtool/builtin.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef _BUILTIN_H +#define _BUILTIN_H + +extern int cmd_check(int argc, const char **argv); + +#endif /* _BUILTIN_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c new file mode 100644 index 000000000000..e11f6b69cce6 --- /dev/null +++ b/tools/objtool/elf.c @@ -0,0 +1,412 @@ +/* + * elf.c - ELF access library + * + * Adapted from kpatch (https://github.com/dynup/kpatch): + * Copyright (C) 2013-2015 Josh Poimboeuf <jpoimboe@redhat.com> + * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "elf.h" +#include "warn.h" + +struct section *find_section_by_name(struct elf *elf, const char *name) +{ + struct section *sec; + + list_for_each_entry(sec, &elf->sections, list) + if (!strcmp(sec->name, name)) + return sec; + + return NULL; +} + +static struct section *find_section_by_index(struct elf *elf, + unsigned int idx) +{ + struct section *sec; + + list_for_each_entry(sec, &elf->sections, list) + if (sec->idx == idx) + return sec; + + return NULL; +} + +static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) +{ + struct section *sec; + struct symbol *sym; + + list_for_each_entry(sec, &elf->sections, list) + hash_for_each_possible(sec->symbol_hash, sym, hash, idx) + if (sym->idx == idx) + return sym; + + return NULL; +} + +struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type != STT_SECTION && + sym->offset == offset) + return sym; + + return NULL; +} + +struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, + unsigned int len) +{ + struct rela *rela; + unsigned long o; + + if (!sec->rela) + return NULL; + + for (o = offset; o < offset + len; o++) + hash_for_each_possible(sec->rela->rela_hash, rela, hash, o) + if (rela->offset == o) + return rela; + + return NULL; +} + +struct rela *find_rela_by_dest(struct section *sec, unsigned long offset) +{ + return find_rela_by_dest_range(sec, offset, 1); +} + +struct symbol *find_containing_func(struct section *sec, unsigned long offset) +{ + struct symbol *func; + + list_for_each_entry(func, &sec->symbol_list, list) + if (func->type == STT_FUNC && offset >= func->offset && + offset < func->offset + func->len) + return func; + + return NULL; +} + +static int read_sections(struct elf *elf) +{ + Elf_Scn *s = NULL; + struct section *sec; + size_t shstrndx, sections_nr; + int i; + + if (elf_getshdrnum(elf->elf, §ions_nr)) { + perror("elf_getshdrnum"); + return -1; + } + + if (elf_getshdrstrndx(elf->elf, &shstrndx)) { + perror("elf_getshdrstrndx"); + return -1; + } + + for (i = 0; i < sections_nr; i++) { + sec = malloc(sizeof(*sec)); + if (!sec) { + perror("malloc"); + return -1; + } + memset(sec, 0, sizeof(*sec)); + + INIT_LIST_HEAD(&sec->symbol_list); + INIT_LIST_HEAD(&sec->rela_list); + hash_init(sec->rela_hash); + hash_init(sec->symbol_hash); + + list_add_tail(&sec->list, &elf->sections); + + s = elf_getscn(elf->elf, i); + if (!s) { + perror("elf_getscn"); + return -1; + } + + sec->idx = elf_ndxscn(s); + + if (!gelf_getshdr(s, &sec->sh)) { + perror("gelf_getshdr"); + return -1; + } + + sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name); + if (!sec->name) { + perror("elf_strptr"); + return -1; + } + + sec->elf_data = elf_getdata(s, NULL); + if (!sec->elf_data) { + perror("elf_getdata"); + return -1; + } + + if (sec->elf_data->d_off != 0 || + sec->elf_data->d_size != sec->sh.sh_size) { + WARN("unexpected data attributes for %s", sec->name); + return -1; + } + + sec->data = (unsigned long)sec->elf_data->d_buf; + sec->len = sec->elf_data->d_size; + } + + /* sanity check, one more call to elf_nextscn() should return NULL */ + if (elf_nextscn(elf->elf, s)) { + WARN("section entry mismatch"); + return -1; + } + + return 0; +} + +static int read_symbols(struct elf *elf) +{ + struct section *symtab; + struct symbol *sym; + struct list_head *entry, *tmp; + int symbols_nr, i; + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + WARN("missing symbol table"); + return -1; + } + + symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize; + + for (i = 0; i < symbols_nr; i++) { + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return -1; + } + memset(sym, 0, sizeof(*sym)); + + sym->idx = i; + + if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) { + perror("gelf_getsym"); + goto err; + } + + sym->name = elf_strptr(elf->elf, symtab->sh.sh_link, + sym->sym.st_name); + if (!sym->name) { + perror("elf_strptr"); + goto err; + } + + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + + if (sym->sym.st_shndx > SHN_UNDEF && + sym->sym.st_shndx < SHN_LORESERVE) { + sym->sec = find_section_by_index(elf, + sym->sym.st_shndx); + if (!sym->sec) { + WARN("couldn't find section for symbol %s", + sym->name); + goto err; + } + if (sym->type == STT_SECTION) { + sym->name = sym->sec->name; + sym->sec->sym = sym; + } + } else + sym->sec = find_section_by_index(elf, 0); + + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + /* sorted insert into a per-section list */ + entry = &sym->sec->symbol_list; + list_for_each_prev(tmp, &sym->sec->symbol_list) { + struct symbol *s; + + s = list_entry(tmp, struct symbol, list); + + if (sym->offset > s->offset) { + entry = tmp; + break; + } + + if (sym->offset == s->offset && sym->len >= s->len) { + entry = tmp; + break; + } + } + list_add(&sym->list, entry); + hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); + } + + return 0; + +err: + free(sym); + return -1; +} + +static int read_relas(struct elf *elf) +{ + struct section *sec; + struct rela *rela; + int i; + unsigned int symndx; + + list_for_each_entry(sec, &elf->sections, list) { + if (sec->sh.sh_type != SHT_RELA) + continue; + + sec->base = find_section_by_name(elf, sec->name + 5); + if (!sec->base) { + WARN("can't find base section for rela section %s", + sec->name); + return -1; + } + + sec->base->rela = sec; + + for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) { + rela = malloc(sizeof(*rela)); + if (!rela) { + perror("malloc"); + return -1; + } + memset(rela, 0, sizeof(*rela)); + + if (!gelf_getrela(sec->elf_data, i, &rela->rela)) { + perror("gelf_getrela"); + return -1; + } + + rela->type = GELF_R_TYPE(rela->rela.r_info); + rela->addend = rela->rela.r_addend; + rela->offset = rela->rela.r_offset; + symndx = GELF_R_SYM(rela->rela.r_info); + rela->sym = find_symbol_by_index(elf, symndx); + if (!rela->sym) { + WARN("can't find rela entry symbol %d for %s", + symndx, sec->name); + return -1; + } + + list_add_tail(&rela->list, &sec->rela_list); + hash_add(sec->rela_hash, &rela->hash, rela->offset); + + } + } + + return 0; +} + +struct elf *elf_open(const char *name) +{ + struct elf *elf; + + elf_version(EV_CURRENT); + + elf = malloc(sizeof(*elf)); + if (!elf) { + perror("malloc"); + return NULL; + } + memset(elf, 0, sizeof(*elf)); + + INIT_LIST_HEAD(&elf->sections); + + elf->name = strdup(name); + if (!elf->name) { + perror("strdup"); + goto err; + } + + elf->fd = open(name, O_RDONLY); + if (elf->fd == -1) { + perror("open"); + goto err; + } + + elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL); + if (!elf->elf) { + perror("elf_begin"); + goto err; + } + + if (!gelf_getehdr(elf->elf, &elf->ehdr)) { + perror("gelf_getehdr"); + goto err; + } + + if (read_sections(elf)) + goto err; + + if (read_symbols(elf)) + goto err; + + if (read_relas(elf)) + goto err; + + return elf; + +err: + elf_close(elf); + return NULL; +} + +void elf_close(struct elf *elf) +{ + struct section *sec, *tmpsec; + struct symbol *sym, *tmpsym; + struct rela *rela, *tmprela; + + list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) { + list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) { + list_del(&sym->list); + hash_del(&sym->hash); + free(sym); + } + list_for_each_entry_safe(rela, tmprela, &sec->rela_list, list) { + list_del(&rela->list); + hash_del(&rela->hash); + free(rela); + } + list_del(&sec->list); + free(sec); + } + if (elf->name) + free(elf->name); + if (elf->fd > 0) + close(elf->fd); + if (elf->elf) + elf_end(elf->elf); + free(elf); +} diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h new file mode 100644 index 000000000000..7f3e00a2f907 --- /dev/null +++ b/tools/objtool/elf.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _OBJTOOL_ELF_H +#define _OBJTOOL_ELF_H + +#include <stdio.h> +#include <gelf.h> +#include <linux/list.h> +#include <linux/hashtable.h> + +struct section { + struct list_head list; + GElf_Shdr sh; + struct list_head symbol_list; + DECLARE_HASHTABLE(symbol_hash, 8); + struct list_head rela_list; + DECLARE_HASHTABLE(rela_hash, 16); + struct section *base, *rela; + struct symbol *sym; + Elf_Data *elf_data; + char *name; + int idx; + unsigned long data; + unsigned int len; +}; + +struct symbol { + struct list_head list; + struct hlist_node hash; + GElf_Sym sym; + struct section *sec; + char *name; + unsigned int idx; + unsigned char bind, type; + unsigned long offset; + unsigned int len; +}; + +struct rela { + struct list_head list; + struct hlist_node hash; + GElf_Rela rela; + struct symbol *sym; + unsigned int type; + unsigned long offset; + int addend; +}; + +struct elf { + Elf *elf; + GElf_Ehdr ehdr; + int fd; + char *name; + struct list_head sections; + DECLARE_HASHTABLE(rela_hash, 16); +}; + + +struct elf *elf_open(const char *name); +struct section *find_section_by_name(struct elf *elf, const char *name); +struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, + unsigned int len); +struct symbol *find_containing_func(struct section *sec, unsigned long offset); +void elf_close(struct elf *elf); + + + +#endif /* _OBJTOOL_ELF_H */ diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c new file mode 100644 index 000000000000..46c326db4f46 --- /dev/null +++ b/tools/objtool/objtool.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * objtool: + * + * The 'check' subcmd analyzes every .o file and ensures the validity of its + * stack trace metadata. It enforces a set of rules on asm code and C inline + * assembly code so that stack traces can be reliable. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ + +#include <stdio.h> +#include <stdbool.h> +#include <string.h> +#include <stdlib.h> +#include <subcmd/exec-cmd.h> +#include <subcmd/pager.h> + +#include "builtin.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +struct cmd_struct { + const char *name; + int (*fn)(int, const char **); + const char *help; +}; + +static const char objtool_usage_string[] = + "objtool [OPTIONS] COMMAND [ARGS]"; + +static struct cmd_struct objtool_cmds[] = { + {"check", cmd_check, "Perform stack metadata validation on an object file" }, +}; + +bool help; + +static void cmd_usage(void) +{ + unsigned int i, longest = 0; + + printf("\n usage: %s\n\n", objtool_usage_string); + + for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { + if (longest < strlen(objtool_cmds[i].name)) + longest = strlen(objtool_cmds[i].name); + } + + puts(" Commands:"); + for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { + printf(" %-*s ", longest, objtool_cmds[i].name); + puts(objtool_cmds[i].help); + } + + printf("\n"); + + exit(1); +} + +static void handle_options(int *argc, const char ***argv) +{ + while (*argc > 0) { + const char *cmd = (*argv)[0]; + + if (cmd[0] != '-') + break; + + if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) { + help = true; + break; + } else { + fprintf(stderr, "Unknown option: %s\n", cmd); + fprintf(stderr, "\n Usage: %s\n", + objtool_usage_string); + exit(1); + } + + (*argv)++; + (*argc)--; + } +} + +static void handle_internal_command(int argc, const char **argv) +{ + const char *cmd = argv[0]; + unsigned int i, ret; + + for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { + struct cmd_struct *p = objtool_cmds+i; + + if (strcmp(p->name, cmd)) + continue; + + ret = p->fn(argc, argv); + + exit(ret); + } + + cmd_usage(); +} + +int main(int argc, const char **argv) +{ + static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED"; + + /* libsubcmd init */ + exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED); + pager_init(UNUSED); + + argv++; + argc--; + handle_options(&argc, &argv); + + if (!argc || help) + cmd_usage(); + + handle_internal_command(argc, argv); + + return 0; +} diff --git a/tools/objtool/special.c b/tools/objtool/special.c new file mode 100644 index 000000000000..bff8abb3a4aa --- /dev/null +++ b/tools/objtool/special.c @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This file reads all the special sections which have alternate instructions + * which can be patched in or redirected to at runtime. + */ + +#include <stdlib.h> +#include <string.h> + +#include "special.h" +#include "warn.h" + +#define EX_ENTRY_SIZE 12 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 24 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 8 + +#define ALT_ENTRY_SIZE 13 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#define X86_FEATURE_POPCNT (4*32+23) + +struct special_entry { + const char *sec; + bool group, jump_or_nop; + unsigned char size, orig, new; + unsigned char orig_len, new_len; /* group only */ + unsigned char feature; /* ALTERNATIVE macro CPU feature */ +}; + +struct special_entry entries[] = { + { + .sec = ".altinstructions", + .group = true, + .size = ALT_ENTRY_SIZE, + .orig = ALT_ORIG_OFFSET, + .orig_len = ALT_ORIG_LEN_OFFSET, + .new = ALT_NEW_OFFSET, + .new_len = ALT_NEW_LEN_OFFSET, + .feature = ALT_FEATURE_OFFSET, + }, + { + .sec = "__jump_table", + .jump_or_nop = true, + .size = JUMP_ENTRY_SIZE, + .orig = JUMP_ORIG_OFFSET, + .new = JUMP_NEW_OFFSET, + }, + { + .sec = "__ex_table", + .size = EX_ENTRY_SIZE, + .orig = EX_ORIG_OFFSET, + .new = EX_NEW_OFFSET, + }, + {}, +}; + +static int get_alt_entry(struct elf *elf, struct special_entry *entry, + struct section *sec, int idx, + struct special_alt *alt) +{ + struct rela *orig_rela, *new_rela; + unsigned long offset; + + offset = idx * entry->size; + + alt->group = entry->group; + alt->jump_or_nop = entry->jump_or_nop; + + if (alt->group) { + alt->orig_len = *(unsigned char *)(sec->data + offset + + entry->orig_len); + alt->new_len = *(unsigned char *)(sec->data + offset + + entry->new_len); + } + + if (entry->feature) { + unsigned short feature; + + feature = *(unsigned short *)(sec->data + offset + + entry->feature); + + /* + * It has been requested that we don't validate the !POPCNT + * feature path which is a "very very small percentage of + * machines". + */ + if (feature == X86_FEATURE_POPCNT) + alt->skip_orig = true; + } + + orig_rela = find_rela_by_dest(sec, offset + entry->orig); + if (!orig_rela) { + WARN_FUNC("can't find orig rela", sec, offset + entry->orig); + return -1; + } + if (orig_rela->sym->type != STT_SECTION) { + WARN_FUNC("don't know how to handle non-section rela symbol %s", + sec, offset + entry->orig, orig_rela->sym->name); + return -1; + } + + alt->orig_sec = orig_rela->sym->sec; + alt->orig_off = orig_rela->addend; + + if (!entry->group || alt->new_len) { + new_rela = find_rela_by_dest(sec, offset + entry->new); + if (!new_rela) { + WARN_FUNC("can't find new rela", + sec, offset + entry->new); + return -1; + } + + alt->new_sec = new_rela->sym->sec; + alt->new_off = (unsigned int)new_rela->addend; + + /* _ASM_EXTABLE_EX hack */ + if (alt->new_off >= 0x7ffffff0) + alt->new_off -= 0x7ffffff0; + } + + return 0; +} + +/* + * Read all the special sections and create a list of special_alt structs which + * describe all the alternate instructions which can be patched in or + * redirected to at runtime. + */ +int special_get_alts(struct elf *elf, struct list_head *alts) +{ + struct special_entry *entry; + struct section *sec; + unsigned int nr_entries; + struct special_alt *alt; + int idx, ret; + + INIT_LIST_HEAD(alts); + + for (entry = entries; entry->sec; entry++) { + sec = find_section_by_name(elf, entry->sec); + if (!sec) + continue; + + if (sec->len % entry->size != 0) { + WARN("%s size not a multiple of %d", + sec->name, entry->size); + return -1; + } + + nr_entries = sec->len / entry->size; + + for (idx = 0; idx < nr_entries; idx++) { + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + return -1; + } + memset(alt, 0, sizeof(*alt)); + + ret = get_alt_entry(elf, entry, sec, idx, alt); + if (ret) + return ret; + + list_add_tail(&alt->list, alts); + } + } + + return 0; +} diff --git a/tools/objtool/special.h b/tools/objtool/special.h new file mode 100644 index 000000000000..fad1d092f679 --- /dev/null +++ b/tools/objtool/special.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SPECIAL_H +#define _SPECIAL_H + +#include <stdbool.h> +#include "elf.h" + +struct special_alt { + struct list_head list; + + bool group; + bool skip_orig; + bool jump_or_nop; + + struct section *orig_sec; + unsigned long orig_off; + + struct section *new_sec; + unsigned long new_off; + + unsigned int orig_len, new_len; /* group only */ +}; + +int special_get_alts(struct elf *elf, struct list_head *alts); + +#endif /* _SPECIAL_H */ diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h new file mode 100644 index 000000000000..ac7e07523e84 --- /dev/null +++ b/tools/objtool/warn.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _WARN_H +#define _WARN_H + +extern const char *objname; + +static inline char *offstr(struct section *sec, unsigned long offset) +{ + struct symbol *func; + char *name, *str; + unsigned long name_off; + + func = find_containing_func(sec, offset); + if (func) { + name = func->name; + name_off = offset - func->offset; + } else { + name = sec->name; + name_off = offset; + } + + str = malloc(strlen(name) + 20); + + if (func) + sprintf(str, "%s()+0x%lx", name, name_off); + else + sprintf(str, "%s+0x%lx", name, name_off); + + return str; +} + +#define WARN(format, ...) \ + fprintf(stderr, \ + "%s: warning: objtool: " format "\n", \ + objname, ##__VA_ARGS__) + +#define WARN_FUNC(format, sec, offset, ...) \ +({ \ + char *_str = offstr(sec, offset); \ + WARN("%s: " format, _str, ##__VA_ARGS__); \ + free(_str); \ +}) + +#endif /* _WARN_H */ diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 3ba1c0b09908..098cfb9ca8f0 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../config/utilities.mak +include ../../scripts/utilities.mak MAN1_TXT= \ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index b9ca1e304158..15949e2a7805 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -8,7 +8,7 @@ perf-config - Get and set variables in a configuration file. SYNOPSIS -------- [verse] -'perf config' -l | --list +'perf config' [<file-option>] -l | --list DESCRIPTION ----------- @@ -21,6 +21,14 @@ OPTIONS --list:: Show current config variables, name and value, for all sections. +--user:: + For writing and reading options: write to user + '$HOME/.perfconfig' file or read it. + +--system:: + For writing and reading options: write to system-wide + '$(sysconfdir)/perfconfig' or read it. + CONFIGURATION FILE ------------------ @@ -30,6 +38,10 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration. The file '$(sysconfdir)/perfconfig' can be used to store a system-wide default configuration. +When reading or writing, the values are read from the system and user +configuration files by default, and options '--system' and '--user' +can be used to tell the command to read from or write to only that location. + Syntax ~~~~~~ @@ -62,7 +74,7 @@ Given a $HOME/.perfconfig like this: medium = green, default normal = lightgray, default selected = white, lightgray - code = blue, default + jump_arrows = blue, default addr = magenta, default root = white, blue @@ -98,6 +110,347 @@ Given a $HOME/.perfconfig like this: order = caller sort-key = function +Variables +~~~~~~~~~ + +colors.*:: + The variables for customizing the colors used in the output for the + 'report', 'top' and 'annotate' in the TUI. They should specify the + foreground and background colors, separated by a comma, for example: + + medium = green, lightgray + + If you want to use the color configured for you terminal, just leave it + as 'default', for example: + + medium = default, lightgray + + Available colors: + red, yellow, green, cyan, gray, black, blue, + white, default, magenta, lightgray + + colors.top:: + 'top' means a overhead percentage which is more than 5%. + And values of this variable specify percentage colors. + Basic key values are foreground-color 'red' and + background-color 'default'. + colors.medium:: + 'medium' means a overhead percentage which has more than 0.5%. + Default values are 'green' and 'default'. + colors.normal:: + 'normal' means the rest of overhead percentages + except 'top', 'medium', 'selected'. + Default values are 'lightgray' and 'default'. + colors.selected:: + This selects the colors for the current entry in a list of entries + from sub-commands (top, report, annotate). + Default values are 'black' and 'lightgray'. + colors.jump_arrows:: + Colors for jump arrows on assembly code listings + such as 'jns', 'jmp', 'jane', etc. + Default values are 'blue', 'default'. + colors.addr:: + This selects colors for addresses from 'annotate'. + Default values are 'magenta', 'default'. + colors.root:: + Colors for headers in the output of a sub-commands (top, report). + Default values are 'white', 'blue'. + +tui.*, gtk.*:: + Subcommands that can be configured here are 'top', 'report' and 'annotate'. + These values are booleans, for example: + + [tui] + top = true + + will make the TUI be the default for the 'top' subcommand. Those will be + available if the required libs were detected at tool build time. + +buildid.*:: + buildid.dir:: + Each executable and shared library in modern distributions comes with a + content based identifier that, if available, will be inserted in a + 'perf.data' file header to, at analysis time find what is needed to do + symbol resolution, code annotation, etc. + + The recording tools also stores a hard link or copy in a per-user + directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms + and /proc/kcore files to be used at analysis time. + + The buildid.dir variable can be used to either change this directory + cache location, or to disable it altogether. If you want to disable it, + set buildid.dir to /dev/null. The default is $HOME/.debug + +annotate.*:: + These options work only for TUI. + These are in control of addresses, jump function, source code + in lines of assembly code from a specific program. + + annotate.hide_src_code:: + If a program which is analyzed has source code, + this option lets 'annotate' print a list of assembly code with the source code. + For example, let's see a part of a program. There're four lines. + If this option is 'true', they can be printed + without source code from a program as below. + + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ mov (%rdi),%rdx + + But if this option is 'false', source code of the part + can be also printed as below. Default is 'false'. + + │ struct rb_node *rb_next(const struct rb_node *node) + │ { + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ struct rb_node *parent; + │ + │ if (RB_EMPTY_NODE(node)) + │ mov (%rdi),%rdx + │ return n; + + annotate.use_offset:: + Basing on a first address of a loaded function, offset can be used. + Instead of using original addresses of assembly code, + addresses subtracted from a base address can be printed. + Let's illustrate an example. + If a base address is 0XFFFFFFFF81624d50 as below, + + ffffffff81624d50 <load0> + + an address on assembly code has a specific absolute address as below + + ffffffff816250b8:│ mov 0x8(%r14),%rdi + + but if use_offset is 'true', an address subtracted from a base address is printed. + Default is true. This option is only applied to TUI. + + 368:│ mov 0x8(%r14),%rdi + + annotate.jump_arrows:: + There can be jump instruction among assembly code. + Depending on a boolean value of jump_arrows, + arrows can be printed or not which represent + where do the instruction jump into as below. + + │ ┌──jmp 1333 + │ │ xchg %ax,%ax + │1330:│ mov %r15,%r10 + │1333:└─→cmp %r15,%r14 + + If jump_arrow is 'false', the arrows isn't printed as below. + Default is 'false'. + + │ ↓ jmp 1333 + │ xchg %ax,%ax + │1330: mov %r15,%r10 + │1333: cmp %r15,%r14 + + annotate.show_linenr:: + When showing source code if this option is 'true', + line numbers are printed as below. + + │1628 if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │1628 data->id = *array; + │1629 array++; + │1630 } + + However if this option is 'false', they aren't printed as below. + Default is 'false'. + + │ if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │ data->id = *array; + │ array++; + │ } + + annotate.show_nr_jumps:: + Let's see a part of assembly code. + + │1382: movb $0x1,-0x270(%rbp) + + If use this, the number of branches jumping to that address can be printed as below. + Default is 'false'. + + │1 1382: movb $0x1,-0x270(%rbp) + + annotate.show_total_period:: + To compare two records on an instruction base, with this option + provided, display total number of samples that belong to a line + in assembly code. If this option is 'true', total periods are printed + instead of percent values as below. + + 302 │ mov %eax,%eax + + But if this option is 'false', percent values for overhead are printed i.e. + Default is 'false'. + + 99.93 │ mov %eax,%eax + +hist.*:: + hist.percentage:: + This option control the way to calculate overhead of filtered entries - + that means the value of this option is effective only if there's a + filter (by comm, dso or symbol name). Suppose a following example: + + Overhead Symbols + ........ ....... + 33.33% foo + 33.33% bar + 33.33% baz + + This is an original overhead and we'll filter out the first 'foo' + entry. The value of 'relative' would increase the overhead of 'bar' + and 'baz' to 50.00% for each, while 'absolute' would show their + current overhead (33.33%). + +ui.*:: + ui.show-headers:: + This option controls display of column headers (like 'Overhead' and 'Symbol') + in 'report' and 'top'. If this option is false, they are hidden. + This option is only applied to TUI. + +call-graph.*:: + When sub-commands 'top' and 'report' work with -g/—-children + there're options in control of call-graph. + + call-graph.record-mode:: + The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. + The value of 'dwarf' is effective only if perf detect needed library + (libunwind or a recent version of libdw). + 'lbr' only work for cpus that support it. + + call-graph.dump-size:: + The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). + When using dwarf into record-mode, the default size will be used if omitted. + + call-graph.print-type:: + The print-types can be graph (graph absolute), fractal (graph relative), + flat and folded. This option controls a way to show overhead for each callchain + entry. Suppose a following example. + + Overhead Symbols + ........ ....... + 40.00% foo + | + ---foo + | + |--50.00%--bar + | main + | + --50.00%--baz + main + + This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly + half and half so 'fractal' shows 50.00% for each + (meaning that it assumes 100% total overhead of 'foo'). + + The 'graph' uses absolute overhead value of 'foo' as total so each of + 'bar' and 'baz' callchain will have 20.00% of overhead. + If 'flat' is used, single column and linear exposure of call chains. + 'folded' mean call chains are displayed in a line, separated by semicolons. + + call-graph.order:: + This option controls print order of callchains. The default is + 'callee' which means callee is printed at top and then followed by its + caller and so on. The 'caller' prints it in reverse order. + + If this option is not set and report.children or top.children is + set to true (or the equivalent command line option is given), + the default value of this option is changed to 'caller' for the + execution of 'perf report' or 'perf top'. Other commands will + still default to 'callee'. + + call-graph.sort-key:: + The callchains are merged if they contain same information. + The sort-key option determines a way to compare the callchains. + A value of 'sort-key' can be 'function' or 'address'. + The default is 'function'. + + call-graph.threshold:: + When there're many callchains it'd print tons of lines. So perf omits + small callchains under a certain overhead (threshold) and this option + control the threshold. Default is 0.5 (%). The overhead is calculated + by value depends on call-graph.print-type. + + call-graph.print-limit:: + This is a maximum number of lines of callchain printed for a single + histogram entry. Default is 0 which means no limitation. + +report.*:: + report.percent-limit:: + This one is mostly the same as call-graph.threshold but works for + histogram entries. Entries having an overhead lower than this + percentage will not be printed. Default is '0'. If percent-limit + is '10', only entries which have more than 10% of overhead will be + printed. + + report.queue-size:: + This option sets up the maximum allocation size of the internal + event queue for ordering events. Default is 0, meaning no limit. + + report.children:: + 'Children' means functions called from another function. + If this option is true, 'perf report' cumulates callchains of children + and show (accumulated) total overhead as well as 'Self' overhead. + Please refer to the 'perf report' manual. The default is 'true'. + + report.group:: + This option is to show event group information together. + Example output with this turned on, notice that there is one column + per event in the group, ref-cycles and cycles: + + # group: {ref-cycles,cycles} + # ======== + # + # Samples: 7K of event 'anon group { ref-cycles, cycles }' + # Event count (approx.): 6876107743 + # + # Overhead Command Shared Object Symbol + # ................ ....... ................. ................... + # + 99.84% 99.76% noploop noploop [.] main + 0.07% 0.00% noploop ld-2.15.so [.] strcmp + 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del + +top.*:: + top.children:: + Same as 'report.children'. So if it is enabled, the output of 'top' + command will have 'Children' overhead column as well as 'Self' overhead + column by default. + The default is 'true'. + +man.*:: + man.viewer:: + This option can assign a tool to view manual pages when 'help' + subcommand was invoked. Supported tools are 'man', 'woman' + (with emacs client) and 'konqueror'. Default is 'man'. + + New man viewer tool can be also added using 'man.<tool>.cmd' + or use different path using 'man.<tool>.path' config option. + +pager.*:: + pager.<subcommand>:: + When the subcommand is run on stdio, determine whether it uses + pager or not based on this value. Default is 'unspecified'. + +kmem.*:: + kmem.default:: + This option decides which allocator is to be analyzed if neither + '--slab' nor '--page' option is used. Default is 'slab'. + +record.*:: + record.build-id:: + This option can be 'cache', 'no-cache' or 'skip'. + 'cache' is to post-process data and save/update the binaries into + the build-id cache (in ~/.debug). This is the default. + But if this option is 'no-cache', it will not update the build-id cache. + 'skip' skips post-processing and does not update the cache. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0b1cedeef895..87b2588d1cbd 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -53,6 +53,13 @@ include::itrace.txt[] --strip:: Use with --itrace to strip out non-synthesized events. +-j:: +--jit:: + Process jitdump files by injecting the mmap records corresponding to jitted + functions. This option also generates the ELF images for each jitted function + found in the jitdumps files captured in the input perf.data file. Use this option + if you are monitoring environment using JIT runtimes, such as Java, DART or V8. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 79483f40e991..ec723d0a5bb3 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -40,10 +40,12 @@ address should be. The 'p' modifier can be specified multiple times: 0 - SAMPLE_IP can have arbitrary skid 1 - SAMPLE_IP must have constant skid 2 - SAMPLE_IP requested to have 0 skid - 3 - SAMPLE_IP must have 0 skid + 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid + sample shadowing effects. For Intel systems precise event sampling is implemented with PEBS -which supports up to precise-level 2. +which supports up to precise-level 2, and precise level 3 for +some special cases On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index fbceb631387c..19aa17532a16 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -341,6 +341,12 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--all-kernel:: +Configure all used events to run in kernel space. + +--all-user:: +Configure all used events to run in user space. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8a301f6afb37..12113992ac9d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -117,6 +117,22 @@ OPTIONS And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. + If the --mem-mode option is used, the following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of the sample + - locked: whether the bus was locked at the time of the sample + - tlb: type of tlb access for the data at the time of the sample + - mem: type of memory access for the data at the time of the sample + - snoop: type of snoop (if any) for the data at the time of the sample + - dcacheline: the cacheline the data address is on at the time of the sample + + And the default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + If the data file has tracepoint event(s), following (dynamic) sort keys are also available: trace, trace_fields, [<event>.]<field>[/raw] @@ -151,22 +167,6 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. - If --mem-mode option is used, following sort keys are also available - (incompatible with --branch-stack): - symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. - - - symbol_daddr: name of data symbol being executed on at the time of sample - - dso_daddr: name of library or module containing the data being executed - on at the time of sample - - locked: whether the bus was locked at the time of sample - - tlb: type of tlb access for the data at the time of sample - - mem: type of memory access for the data at the time of sample - - snoop: type of snoop (if any) for the data at the time of sample - - dcacheline: the cacheline the data address is on at the time of sample - - And default sort keys are changed to local_weight, mem, sym, dso, - symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. - -p:: --parent=<regex>:: A regex filter to identify parent. The parent is a caller of this @@ -351,7 +351,10 @@ OPTIONS --percent-limit:: Do not show entries which have an overhead under that percent. - (Default: 0). + (Default: 0). Note that this option also sets the percent limit (threshold) + of callchains. However the default value of callchain threshold is + different than the default value of hist entries. Please see the + --call-graph option for details. --percentage:: Determine how to display the overhead percentage of filtered entries. @@ -398,6 +401,9 @@ include::itrace.txt[] --raw-trace:: When displaying traceevent output, do not use print fmt or plugins. +--hierarchy:: + Enable hierarchical output. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 52ef7a9d50aa..04f23b404bbc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -69,6 +69,14 @@ report:: --scale:: scale/normalize counter values +-d:: +--detailed:: + print more detailed statistics, can be specified up to 3 times + + -d: detailed events, L1 and LLC data cache + -d -d: more detailed events, dTLB and iTLB events + -d -d -d: very detailed events, adding prefetch events + -r:: --repeat=<n>:: repeat command and print average + stddev (max: 100). 0 means forever. @@ -139,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +--metric-only:: +Only print computed metrics. Print them in a single line. +Don't show any raw values. Not supported with --per-thread. + --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This is a useful mode to detect imbalance between sockets. To enable this mode, @@ -211,6 +223,29 @@ $ perf stat -- make -j Wall-clock time elapsed: 719.554352 msecs +CSV FORMAT +---------- + +With -x, perf stat is able to output a not-quite-CSV format output +Commas in the output are not put into "". To make it easy to parse +it is recommended to use a different character like -x \; + +The fields are in this order: + + - optional usec time stamp in fractions of second (with -I xxx) + - optional CPU, core, or socket identifier + - optional number of logical CPUs aggregated + - counter value + - unit of the counter value or empty + - event name + - run time of counter + - percentage of measurement time the counter was running + - optional variance if multiple values are collected with -r + - optional metric value + - optional unit of metric + +Additional metrics may be printed with all earlier fields being empty. + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b0e60e17db38..19f046f027cd 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -233,6 +233,9 @@ Default is to monitor all CPUS. --raw-trace:: When displaying traceevent output, do not use print fmt or plugins. +--hierarchy:: + Enable hierarchy output. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example index 767ea2436e1c..1d8d5bc4cd2d 100644 --- a/tools/perf/Documentation/perfconfig.example +++ b/tools/perf/Documentation/perfconfig.example @@ -5,7 +5,7 @@ medium = green, lightgray normal = black, lightgray selected = lightgray, magenta - code = blue, lightgray + jump_arrows = blue, lightgray addr = magenta, lightgray [tui] diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index e0ce9573b79b..5950b5a24efd 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -27,3 +27,4 @@ Skip collecing build-id when recording: perf record -B To change sampling frequency to 100 Hz: perf record -F 100 See assembly instructions with percentage: perf annotate <symbol> If you prefer Intel style assembly, try: perf annotate -M intel +For hierarchical output, try: perf report --hierarchy diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 2e1fa2357528..8c8c6b9ce915 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -74,6 +74,7 @@ arch/*/include/uapi/asm/unistd*.h arch/*/include/uapi/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S +arch/*/include/asm/*features.h include/linux/poison.h include/linux/hw_breakpoint.h include/uapi/linux/perf_event.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcd9a70c7193..32a64e619028 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -68,6 +68,20 @@ all tags TAGS: $(print_msg) $(make) +ifdef MAKECMDGOALS +has_clean := 0 +ifneq ($(filter clean,$(MAKECMDGOALS)),) + has_clean := 1 +endif # clean + +ifeq ($(has_clean),1) + rest := $(filter-out clean,$(MAKECMDGOALS)) + ifneq ($(rest),) +$(rest): clean + endif # rest +endif # has_clean +endif # MAKECMDGOALS + # # The clean target is not really parallel, don't print the jobs info: # @@ -75,10 +89,17 @@ clean: $(make) # -# The build-test target is not really parallel, don't print the jobs info: +# The build-test target is not really parallel, don't print the jobs info, +# it also uses only the tests/make targets that don't pollute the source +# repository, i.e. that uses O= or builds the tarpkg outside the source +# repo directories. +# +# For a full test, use: +# +# make -C tools/perf -f tests/make # build-test: - @$(MAKE) SHUF=1 -f tests/make --no-print-directory + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out # # All other targets get passed through: diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5d34815c7ccb..000ea210389d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -3,7 +3,7 @@ include ../scripts/Makefile.include # The default target of this Makefile is... all: -include config/utilities.mak +include ../scripts/utilities.mak # Define V to have a more verbose compile. # @@ -58,6 +58,9 @@ include config/utilities.mak # # Define NO_LIBBIONIC if you do not want bionic support # +# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support +# used for generating build-ids for ELFs generated by jitdump. +# # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. # @@ -136,6 +139,8 @@ $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,LD,$(CROSS_COMPILE)ld) +LD += $(EXTRA_LDFLAGS) + PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f @@ -165,7 +170,16 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) endif endif +# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. +# Without this setting the output feature dump file misses some features, for +# example, liberty. Select all checkers so we won't get an incomplete feature +# dump file. ifeq ($(config),1) +ifdef MAKECMDGOALS +ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump) +FEATURE_TESTS := all +endif +endif include config/Makefile endif @@ -618,7 +632,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ - $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c + $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 7fbca175099e..56e05f126ad8 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,3 +1,6 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7b8b0d1a1b62..c8fe2074d217 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,6 @@ libperf-y += header.o libperf-y += sym-handling.o +libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h new file mode 100644 index 000000000000..0dd6b7f2d44f --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -0,0 +1,123 @@ +#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H +#define ARCH_PERF_BOOK3S_HV_HCALLS_H + +/* + * PowerPC HCALL codes : hcall code to name mapping + */ +#define kvm_trace_symbol_hcall \ + {0x4, "H_REMOVE"}, \ + {0x8, "H_ENTER"}, \ + {0xc, "H_READ"}, \ + {0x10, "H_CLEAR_MOD"}, \ + {0x14, "H_CLEAR_REF"}, \ + {0x18, "H_PROTECT"}, \ + {0x1c, "H_GET_TCE"}, \ + {0x20, "H_PUT_TCE"}, \ + {0x24, "H_SET_SPRG0"}, \ + {0x28, "H_SET_DABR"}, \ + {0x2c, "H_PAGE_INIT"}, \ + {0x30, "H_SET_ASR"}, \ + {0x34, "H_ASR_ON"}, \ + {0x38, "H_ASR_OFF"}, \ + {0x3c, "H_LOGICAL_CI_LOAD"}, \ + {0x40, "H_LOGICAL_CI_STORE"}, \ + {0x44, "H_LOGICAL_CACHE_LOAD"}, \ + {0x48, "H_LOGICAL_CACHE_STORE"}, \ + {0x4c, "H_LOGICAL_ICBI"}, \ + {0x50, "H_LOGICAL_DCBF"}, \ + {0x54, "H_GET_TERM_CHAR"}, \ + {0x58, "H_PUT_TERM_CHAR"}, \ + {0x5c, "H_REAL_TO_LOGICAL"}, \ + {0x60, "H_HYPERVISOR_DATA"}, \ + {0x64, "H_EOI"}, \ + {0x68, "H_CPPR"}, \ + {0x6c, "H_IPI"}, \ + {0x70, "H_IPOLL"}, \ + {0x74, "H_XIRR"}, \ + {0x78, "H_MIGRATE_DMA"}, \ + {0x7c, "H_PERFMON"}, \ + {0xdc, "H_REGISTER_VPA"}, \ + {0xe0, "H_CEDE"}, \ + {0xe4, "H_CONFER"}, \ + {0xe8, "H_PROD"}, \ + {0xec, "H_GET_PPP"}, \ + {0xf0, "H_SET_PPP"}, \ + {0xf4, "H_PURR"}, \ + {0xf8, "H_PIC"}, \ + {0xfc, "H_REG_CRQ"}, \ + {0x100, "H_FREE_CRQ"}, \ + {0x104, "H_VIO_SIGNAL"}, \ + {0x108, "H_SEND_CRQ"}, \ + {0x110, "H_COPY_RDMA"}, \ + {0x114, "H_REGISTER_LOGICAL_LAN"}, \ + {0x118, "H_FREE_LOGICAL_LAN"}, \ + {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {0x120, "H_SEND_LOGICAL_LAN"}, \ + {0x124, "H_BULK_REMOVE"}, \ + {0x130, "H_MULTICAST_CTRL"}, \ + {0x134, "H_SET_XDABR"}, \ + {0x138, "H_STUFF_TCE"}, \ + {0x13c, "H_PUT_TCE_INDIRECT"}, \ + {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {0x150, "H_VTERM_PARTNER_INFO"}, \ + {0x154, "H_REGISTER_VTERM"}, \ + {0x158, "H_FREE_VTERM"}, \ + {0x15c, "H_RESET_EVENTS"}, \ + {0x160, "H_ALLOC_RESOURCE"}, \ + {0x164, "H_FREE_RESOURCE"}, \ + {0x168, "H_MODIFY_QP"}, \ + {0x16c, "H_QUERY_QP"}, \ + {0x170, "H_REREGISTER_PMR"}, \ + {0x174, "H_REGISTER_SMR"}, \ + {0x178, "H_QUERY_MR"}, \ + {0x17c, "H_QUERY_MW"}, \ + {0x180, "H_QUERY_HCA"}, \ + {0x184, "H_QUERY_PORT"}, \ + {0x188, "H_MODIFY_PORT"}, \ + {0x18c, "H_DEFINE_AQP1"}, \ + {0x190, "H_GET_TRACE_BUFFER"}, \ + {0x194, "H_DEFINE_AQP0"}, \ + {0x198, "H_RESIZE_MR"}, \ + {0x19c, "H_ATTACH_MCQP"}, \ + {0x1a0, "H_DETACH_MCQP"}, \ + {0x1a4, "H_CREATE_RPT"}, \ + {0x1a8, "H_REMOVE_RPT"}, \ + {0x1ac, "H_REGISTER_RPAGES"}, \ + {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b4, "H_ERROR_DATA"}, \ + {0x1b8, "H_GET_HCA_INFO"}, \ + {0x1bc, "H_GET_PERF_COUNT"}, \ + {0x1c0, "H_MANAGE_TRACE"}, \ + {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {0x1d8, "H_POLL_PENDING"}, \ + {0x1e4, "H_QUERY_INT_STATE"}, \ + {0x244, "H_ILLAN_ATTRIBUTES"}, \ + {0x250, "H_MODIFY_HEA_QP"}, \ + {0x254, "H_QUERY_HEA_QP"}, \ + {0x258, "H_QUERY_HEA"}, \ + {0x25c, "H_QUERY_HEA_PORT"}, \ + {0x260, "H_MODIFY_HEA_PORT"}, \ + {0x264, "H_REG_BCMC"}, \ + {0x268, "H_DEREG_BCMC"}, \ + {0x26c, "H_REGISTER_HEA_RPAGES"}, \ + {0x270, "H_DISABLE_AND_GET_HEA"}, \ + {0x274, "H_GET_HEA_INFO"}, \ + {0x278, "H_ALLOC_HEA_RESOURCE"}, \ + {0x284, "H_ADD_CONN"}, \ + {0x288, "H_DEL_CONN"}, \ + {0x298, "H_JOIN"}, \ + {0x2a4, "H_VASI_STATE"}, \ + {0x2b0, "H_ENABLE_CRQ"}, \ + {0x2b8, "H_GET_EM_PARMS"}, \ + {0x2d0, "H_SET_MPP"}, \ + {0x2d4, "H_GET_MPP"}, \ + {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {0x2f4, "H_BEST_ENERGY"}, \ + {0x2fc, "H_XIRR_X"}, \ + {0x300, "H_RANDOM"}, \ + {0x304, "H_COP"}, \ + {0x314, "H_GET_MPP_X"}, \ + {0x31c, "H_SET_MODE"}, \ + {0xf000, "H_RTAS"} \ + +#endif diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h new file mode 100644 index 000000000000..e68ba2da8970 --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h @@ -0,0 +1,33 @@ +#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H +#define ARCH_PERF_BOOK3S_HV_EXITS_H + +/* + * PowerPC Interrupt vectors : exit code to name mapping + */ + +#define kvm_trace_symbol_exit \ + {0x0, "RETURN_TO_HOST"}, \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a75db09..f8ccee132867 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -3,9 +3,9 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> - -#include "../../util/header.h" -#include "../../util/util.h" +#include <linux/stringify.h> +#include "header.h" +#include "util.h" #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c new file mode 100644 index 000000000000..74eee30398f8 --- /dev/null +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -0,0 +1,170 @@ +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" + +#include "book3s_hv_exits.h" +#include "book3s_hcalls.h" + +#define NR_TPS 4 + +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 40; +const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter"; +const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit"; + +define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit); +define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall); + +/* Tracepoints specific to ppc_book3s_hv */ +const char *ppc_book3s_hv_kvm_tp[] = { + "kvm_hv:kvm_guest_enter", + "kvm_hv:kvm_guest_exit", + "kvm_hv:kvm_hcall_enter", + "kvm_hv:kvm_hcall_exit", + NULL, +}; + +/* 1 extra placeholder for NULL */ +const char *kvm_events_tp[NR_TPS + 1]; +const char *kvm_exit_reason; + +static void hcall_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = perf_evsel__intval(evsel, sample, "req"); +} + +static const char *get_hcall_exit_reason(u64 exit_code) +{ + struct exit_reasons_table *tbl = hcall_reasons; + + while (tbl->reason != NULL) { + if (tbl->exit_code == exit_code) + return tbl->reason; + tbl++; + } + + pr_debug("Unknown hcall code: %lld\n", + (unsigned long long)exit_code); + return "UNKNOWN"; +} + +static bool hcall_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return (!strcmp(evsel->name, kvm_events_tp[3])); +} + +static bool hcall_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_events_tp[2])) { + hcall_event_get_key(evsel, sample, key); + return true; + } + + return false; +} +static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + const char *hcall_reason = get_hcall_exit_reason(key->key); + + scnprintf(decode, decode_str_len, "%s", hcall_reason); +} + +static struct kvm_events_ops hcall_events = { + .is_begin_event = hcall_event_begin, + .is_end_event = hcall_event_end, + .decode_key = hcall_event_decode_key, + .name = "HCALL-EVENT", +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events }, + { .name = "hcall", .ops = &hcall_events }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + + +static int is_tracepoint_available(const char *str, struct perf_evlist *evlist) +{ + struct parse_events_error err; + int ret; + + err.str = NULL; + ret = parse_events(evlist, str, &err); + if (err.str) + pr_err("%s : %s\n", str, err.str); + return ret; +} + +static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm, + struct perf_evlist *evlist) +{ + const char **events_ptr; + int i, nr_tp = 0, err = -1; + + /* Check for book3s_hv tracepoints */ + for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) { + err = is_tracepoint_available(*events_ptr, evlist); + if (err) + return -1; + nr_tp++; + } + + for (i = 0; i < nr_tp; i++) + kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i]; + + kvm_events_tp[i] = NULL; + kvm_exit_reason = "trap"; + kvm->exit_reasons = hv_exit_reasons; + kvm->exit_reasons_isa = "HV"; + + return 0; +} + +/* Wrapper to setup kvm tracepoints */ +static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm) +{ + struct perf_evlist *evlist = perf_evlist__new(); + + if (evlist == NULL) + return -ENOMEM; + + /* Right now, only supported on book3s_hv */ + return ppc__setup_book3s_hv(kvm, evlist); +} + +int setup_kvm_events_tp(struct perf_kvm_stat *kvm) +{ + return ppc__setup_kvm_tp(kvm); +} + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + int ret; + + ret = ppc__setup_kvm_tp(kvm); + if (ret) { + kvm->exit_reasons = NULL; + kvm->exit_reasons_isa = NULL; + } + + return ret; +} diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index a5dbc07ec9dc..ed57df2e6d68 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -10,7 +10,7 @@ */ #include "../../util/kvm-stat.h" -#include <asm/kvm_perf.h> +#include <asm/sie.h> define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes); @@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes); define_exit_reasons_table(sie_diagnose_codes, diagnose_codes); define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes); +const char *vcpu_id_str = "id"; +const int decode_str_len = 40; +const char *kvm_exit_reason = "icptcode"; +const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter"; +const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit"; + static void event_icpt_insn_get_key(struct perf_evsel *evsel, struct perf_sample *sample, struct event_key *key) @@ -73,7 +79,7 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_s390_sie_enter", "kvm:kvm_s390_sie_exit", "kvm:kvm_s390_intercept_instruction", diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 09ba923debe8..269af2143735 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 7bb0d13c235f..72193f19d6d7 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -59,7 +59,7 @@ static u64 mmap_read_self(void *addr) u64 quot, rem; quot = (cyc >> time_shift); - rem = cyc & ((1 << time_shift) - 1); + rem = cyc & (((u64)1 << time_shift) - 1); delta = time_offset + quot * time_mult + ((rem * time_mult) >> time_shift); @@ -103,6 +103,7 @@ static int __test__rdpmc(void) sigfillset(&sa.sa_mask); sa.sa_sigaction = segfault_handler; + sa.sa_flags = 0; sigaction(SIGSEGV, &sa, NULL); fd = sys_perf_event_open(&attr, 0, -1, -1, diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 8d8150f1cf9b..d66f9ad4df2e 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -60,7 +60,9 @@ struct branch { u64 misc; }; -static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_BTS_AUXTRACE_PRIV_SIZE; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f05daacc9e78..a3395179c9ee 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -89,7 +89,7 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats, *config = attr.config; out_free: - parse_events__free_terms(terms); + parse_events_terms__delete(terms); return err; } @@ -273,7 +273,9 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) return attr; } -static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_PT_AUXTRACE_PRIV_SIZE; } diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 14e4e668fad7..b63d4be655a2 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,5 +1,7 @@ #include "../../util/kvm-stat.h" -#include <asm/kvm_perf.h> +#include <asm/svm.h> +#include <asm/vmx.h> +#include <asm/kvm.h> define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); @@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "exit_reason"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + /* * For the mmio events, we treat: * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry @@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#lx:%s", + scnprintf(decode, decode_str_len, "%#lx:%s", (unsigned long)key->key, key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); } @@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#llx:%s", + scnprintf(decode, decode_str_len, "%#llx:%s", (unsigned long long)key->key, key->info ? "POUT" : "PIN"); } @@ -121,7 +129,7 @@ static struct kvm_events_ops ioport_events = { .name = "IO Port Access" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_entry", "kvm:kvm_exit", "kvm:kvm_mmio", diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a50df86f2b9b..579a592990dd 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -25,19 +25,17 @@ # endif #endif -extern int bench_numa(int argc, const char **argv, const char *prefix); -extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); -extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); -extern int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused); -extern int bench_mem_memset(int argc, const char **argv, const char *prefix); -extern int bench_futex_hash(int argc, const char **argv, const char *prefix); -extern int bench_futex_wake(int argc, const char **argv, const char *prefix); -extern int bench_futex_wake_parallel(int argc, const char **argv, - const char *prefix); -extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); +int bench_numa(int argc, const char **argv, const char *prefix); +int bench_sched_messaging(int argc, const char **argv, const char *prefix); +int bench_sched_pipe(int argc, const char **argv, const char *prefix); +int bench_mem_memcpy(int argc, const char **argv, const char *prefix); +int bench_mem_memset(int argc, const char **argv, const char *prefix); +int bench_futex_hash(int argc, const char **argv, const char *prefix); +int bench_futex_wake(int argc, const char **argv, const char *prefix); +int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); +int bench_futex_requeue(int argc, const char **argv, const char *prefix); /* pi futexes */ -extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); +int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index 57b4ed871459..5aad2a9408b0 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) \ - extern void *fn(void *, const void *, size_t); + void *fn(void *, const void *, size_t); #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index e4c2c30143b9..5c3cce082cb8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,6 +1,11 @@ + +/* Various wrappers to make the kernel .S file build in user-space: */ + #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl +#define _ASM_EXTABLE_FAULT(x, y) + #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index 633800cb0dcb..0d15786d9ae3 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) \ - extern void *fn(void *, int, size_t); + void *fn(void *, int, size_t); #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5049d6357a46..7500d959d7eb 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -293,7 +293,7 @@ static void bind_to_memnode(int node) if (node == -1) return; - BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)); + BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); nodemask = 1L << node; ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cc5c1267c738..814158393656 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -94,7 +94,7 @@ static int process_sample_event(struct perf_tool *tool, struct addr_location al; int ret = 0; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -245,7 +245,7 @@ static int __cmd_annotate(struct perf_annotate *ann) hists__collapse_resort(hists, NULL); /* Don't sort callchain */ perf_evsel__reset_sample_bit(pos, CALLCHAIN); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(pos, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d93bff7fc0e4..632efc6b79a0 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -38,19 +38,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) static int build_id_cache__kcore_dir(char *dir, size_t sz) { - struct timeval tv; - struct tm tm; - char dt[32]; - - if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) - return -1; - - if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) - return -1; - - scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); - - return 0; + return fetch_current_timestamp(dir, sz); } static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index f04e804a9fad..c42448ed5dfe 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -13,8 +13,10 @@ #include "util/util.h" #include "util/debug.h" +static bool use_system_config, use_user_config; + static const char * const config_usage[] = { - "perf config [options]", + "perf config [<file-option>] [options]", NULL }; @@ -25,6 +27,8 @@ enum actions { static struct option config_options[] = { OPT_SET_UINT('l', "list", &actions, "show current config variables", ACTION_LIST), + OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"), + OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"), OPT_END() }; @@ -42,10 +46,23 @@ static int show_config(const char *key, const char *value, int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { int ret = 0; + char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); argc = parse_options(argc, argv, config_options, config_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (use_system_config && use_user_config) { + pr_err("Error: only one config file at a time\n"); + parse_options_usage(config_usage, config_options, "user", 0); + parse_options_usage(NULL, config_options, "system", 0); + return -1; + } + + if (use_system_config) + config_exclusive_filename = perf_etc_perfconfig(); + else if (use_user_config) + config_exclusive_filename = user_config; + switch (actions) { case ACTION_LIST: if (argc) { @@ -53,9 +70,13 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) parse_options_usage(config_usage, config_options, "l", 1); } else { ret = perf_config(show_config, NULL); - if (ret < 0) + if (ret < 0) { + const char * config_filename = config_exclusive_filename; + if (!config_exclusive_filename) + config_filename = user_config; pr_err("Nothing configured, " - "please check your ~/.perfconfig file\n"); + "please check your %s \n", config_filename); + } } break; default: diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 36ccc2b8827f..8053a8ceefda 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -330,7 +330,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, struct hists *hists = evsel__hists(evsel); int ret = -1; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -1264,8 +1264,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (ret < 0) return ret; - perf_config(perf_default_config, NULL); - argc = parse_options(argc, argv, options, diff_usage, 0); if (symbol__init(NULL) < 0) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 96c1a4cfbbbf..bc1de9b8fd67 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -86,8 +86,7 @@ static int check_emacsclient_version(void) return -1; } - strbuf_remove(&buffer, 0, strlen("emacsclient")); - version = atoi(buffer.buf); + version = atoi(buffer.buf + strlen("emacsclient")); if (version < 22) { fprintf(stderr, @@ -107,12 +106,14 @@ static void exec_woman_emacs(const char *path, const char *page) if (!check_emacsclient_version()) { /* This works only with emacsclient version >= 22. */ - struct strbuf man_page = STRBUF_INIT; + char *man_page; if (!path) path = "emacsclient"; - strbuf_addf(&man_page, "(woman \"%s\")", page); - execlp(path, "emacsclient", "-e", man_page.buf, NULL); + if (asprintf(&man_page, "(woman \"%s\")", page) > 0) { + execlp(path, "emacsclient", "-e", man_page, NULL); + free(man_page); + } warning("failed to exec '%s': %s", path, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -123,7 +124,7 @@ static void exec_man_konqueror(const char *path, const char *page) const char *display = getenv("DISPLAY"); if (display && *display) { - struct strbuf man_page = STRBUF_INIT; + char *man_page; const char *filename = "kfmclient"; char sbuf[STRERR_BUFSIZE]; @@ -142,8 +143,10 @@ static void exec_man_konqueror(const char *path, const char *page) filename = file; } else path = "kfmclient"; - strbuf_addf(&man_page, "man:%s(1)", page); - execlp(path, filename, "newTab", man_page.buf, NULL); + if (asprintf(&man_page, "man:%s(1)", page) > 0) { + execlp(path, filename, "newTab", man_page, NULL); + free(man_page); + } warning("failed to exec '%s': %s", path, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -162,11 +165,13 @@ static void exec_man_man(const char *path, const char *page) static void exec_man_cmd(const char *cmd, const char *page) { - struct strbuf shell_cmd = STRBUF_INIT; char sbuf[STRERR_BUFSIZE]; + char *shell_cmd; - strbuf_addf(&shell_cmd, "%s %s", cmd, page); - execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); + if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) { + execl("/bin/sh", "sh", "-c", shell_cmd, NULL); + free(shell_cmd); + } warning("failed to exec '%s': %s", cmd, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -273,7 +278,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "man.")) return add_man_viewer_info(var, value); - return perf_default_config(var, value, cb); + return 0; } static struct cmdnames main_cmds, other_cmds; @@ -300,43 +305,33 @@ static int is_perf_command(const char *s) is_in_cmdlist(&other_cmds, s); } -static const char *prepend(const char *prefix, const char *cmd) -{ - size_t pre_len = strlen(prefix); - size_t cmd_len = strlen(cmd); - char *p = malloc(pre_len + cmd_len + 1); - memcpy(p, prefix, pre_len); - strcpy(p + pre_len, cmd); - return p; -} - static const char *cmd_to_page(const char *perf_cmd) { + char *s; + if (!perf_cmd) return "perf"; else if (!prefixcmp(perf_cmd, "perf")) return perf_cmd; - else - return prepend("perf-", perf_cmd); + + return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; } static void setup_man_path(void) { - struct strbuf new_path = STRBUF_INIT; + char *new_path; const char *old_path = getenv("MANPATH"); /* We should always put ':' after our path. If there is no * old_path, the ':' at the end will let 'man' to try * system-wide paths after ours to find the manual page. If * there is old_path, we need ':' as delimiter. */ - strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); - strbuf_addch(&new_path, ':'); - if (old_path) - strbuf_addstr(&new_path, old_path); - - setenv("MANPATH", new_path.buf, 1); - - strbuf_release(&new_path); + if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) { + setenv("MANPATH", new_path, 1); + free(new_path); + } else { + error("Unable to setup man path"); + } } static void exec_viewer(const char *name, const char *page) @@ -381,7 +376,7 @@ static int show_info_page(const char *perf_cmd) return -1; } -static int get_html_page_path(struct strbuf *page_path, const char *page) +static int get_html_page_path(char **page_path, const char *page) { struct stat st; const char *html_path = system_path(PERF_HTML_PATH); @@ -393,10 +388,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page) return -1; } - strbuf_init(page_path, 0); - strbuf_addf(page_path, "%s/%s.html", html_path, page); - - return 0; + return asprintf(page_path, "%s/%s.html", html_path, page); } /* @@ -414,12 +406,12 @@ static void open_html(const char *path) static int show_html_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); - struct strbuf page_path; /* it leaks but we exec bellow */ + char *page_path; /* it leaks but we exec bellow */ - if (get_html_page_path(&page_path, page) != 0) + if (get_html_page_path(&page_path, page) < 0) return -1; - open_html(page_path.buf); + open_html(page_path); return 0; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 0022e02ed31a..d1a2d104f2bc 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -17,6 +17,7 @@ #include "util/build-id.h" #include "util/data.h" #include "util/auxtrace.h" +#include "util/jit.h" #include <subcmd/parse-options.h> @@ -29,6 +30,7 @@ struct perf_inject { bool sched_stat; bool have_auxtrace; bool strip; + bool jit_mode; const char *input_name; struct perf_data_file output; u64 bytes_written; @@ -71,6 +73,15 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +#ifdef HAVE_JITDUMP +static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct ordered_events *oe __maybe_unused) +{ + return 0; +} +#endif + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -120,8 +131,7 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size) static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, union perf_event *event, - struct perf_session *session - __maybe_unused) + struct perf_session *session) { struct perf_inject *inject = container_of(tool, struct perf_inject, tool); @@ -234,6 +244,31 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } +#ifdef HAVE_JITDUMP +static int perf_event__jit_repipe_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + int ret; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + ret = jit_process(inject->session, &inject->output, machine, + event->mmap.filename, sample->pid, &n); + if (ret < 0) + return ret; + if (ret) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -247,6 +282,31 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } +#ifdef HAVE_JITDUMP +static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + int ret; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + ret = jit_process(inject->session, &inject->output, machine, + event->mmap2.filename, sample->pid, &n); + if (ret < 0) + return ret; + if (ret) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap2(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -356,9 +416,6 @@ static int perf_event__inject_buildid(struct perf_tool *tool, { struct addr_location al; struct thread *thread; - u8 cpumode; - - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) { @@ -367,7 +424,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) { if (!al.map->dso->hit) { @@ -626,12 +683,16 @@ static int __cmd_inject(struct perf_inject *inject) ret = perf_session__process_events(session); if (!file_out->is_pipe) { - if (inject->build_ids) { + if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); - if (inject->have_auxtrace) - dsos__hit_all(session); - } + /* + * Keep all buildids when there is unprocessed AUX data because + * it is not known which ones the AUX trace hits. + */ + if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && + inject->have_auxtrace && !inject->itrace_synth_opts.set) + dsos__hit_all(session); /* * The AUX areas have been removed and replaced with * synthesized hardware events, so clear the feature flag and @@ -703,7 +764,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) }; int ret; - const struct option options[] = { + struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", @@ -713,6 +774,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " "where and how long tasks slept"), +#ifdef HAVE_JITDUMP + OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), +#endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", @@ -729,7 +793,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject [<options>]", NULL }; - +#ifndef HAVE_JITDUMP + set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); +#endif argc = parse_options(argc, argv, options, inject_usage, 0); /* @@ -755,6 +821,29 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (inject.session == NULL) return -1; + if (inject.build_ids) { + /* + * to make sure the mmap records are ordered correctly + * and so that the correct especially due to jitted code + * mmaps. We cannot generate the buildid hit list and + * inject the jit mmaps at the same time for now. + */ + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + } +#ifdef HAVE_JITDUMP + if (inject.jit_mode) { + inject.tool.mmap2 = perf_event__jit_repipe_mmap2; + inject.tool.mmap = perf_event__jit_repipe_mmap; + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + /* + * JIT MMAP injection injects all MMAP events in one go, so it + * does not obey finished_round semantics. + */ + inject.tool.finished_round = perf_event__drop_oe; + } +#endif ret = symbol__init(&inject.session->header.env); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 118010553d0c..c9cb3be47cff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -602,7 +602,7 @@ static int gfpcmp(const void *a, const void *b) return fa->flags - fb->flags; } -/* see include/trace/events/gfpflags.h */ +/* see include/trace/events/mmflags.h */ static const struct { const char *original; const char *compact; @@ -612,30 +612,39 @@ static const struct { { "GFP_HIGHUSER", "HU" }, { "GFP_USER", "U" }, { "GFP_TEMPORARY", "TMP" }, + { "GFP_KERNEL_ACCOUNT", "KAC" }, { "GFP_KERNEL", "K" }, { "GFP_NOFS", "NF" }, { "GFP_ATOMIC", "A" }, { "GFP_NOIO", "NI" }, - { "GFP_HIGH", "H" }, - { "GFP_WAIT", "W" }, - { "GFP_IO", "I" }, - { "GFP_COLD", "CO" }, - { "GFP_NOWARN", "NWR" }, - { "GFP_REPEAT", "R" }, - { "GFP_NOFAIL", "NF" }, - { "GFP_NORETRY", "NR" }, - { "GFP_COMP", "C" }, - { "GFP_ZERO", "Z" }, - { "GFP_NOMEMALLOC", "NMA" }, - { "GFP_MEMALLOC", "MA" }, - { "GFP_HARDWALL", "HW" }, - { "GFP_THISNODE", "TN" }, - { "GFP_RECLAIMABLE", "RC" }, - { "GFP_MOVABLE", "M" }, - { "GFP_NOTRACK", "NT" }, - { "GFP_NO_KSWAPD", "NK" }, - { "GFP_OTHER_NODE", "ON" }, { "GFP_NOWAIT", "NW" }, + { "GFP_DMA", "D" }, + { "__GFP_HIGHMEM", "HM" }, + { "GFP_DMA32", "D32" }, + { "__GFP_HIGH", "H" }, + { "__GFP_ATOMIC", "_A" }, + { "__GFP_IO", "I" }, + { "__GFP_FS", "F" }, + { "__GFP_COLD", "CO" }, + { "__GFP_NOWARN", "NWR" }, + { "__GFP_REPEAT", "R" }, + { "__GFP_NOFAIL", "NF" }, + { "__GFP_NORETRY", "NR" }, + { "__GFP_COMP", "C" }, + { "__GFP_ZERO", "Z" }, + { "__GFP_NOMEMALLOC", "NMA" }, + { "__GFP_MEMALLOC", "MA" }, + { "__GFP_HARDWALL", "HW" }, + { "__GFP_THISNODE", "TN" }, + { "__GFP_RECLAIMABLE", "RC" }, + { "__GFP_MOVABLE", "M" }, + { "__GFP_ACCOUNT", "AC" }, + { "__GFP_NOTRACK", "NT" }, + { "__GFP_WRITE", "WR" }, + { "__GFP_RECLAIM", "R" }, + { "__GFP_DIRECT_RECLAIM", "DR" }, + { "__GFP_KSWAPD_RECLAIM", "KR" }, + { "__GFP_OTHER_NODE", "ON" }, }; static size_t max_gfp_len; @@ -1834,7 +1843,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static int kmem_config(const char *var, const char *value, void *cb) +static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "kmem.default")) { if (!strcmp(value, "slab")) @@ -1847,7 +1856,7 @@ static int kmem_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4418d9214872..bff666458b28 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -30,7 +30,6 @@ #include <math.h> #ifdef HAVE_KVM_STAT_SUPPORT -#include <asm/kvm_perf.h> #include "util/kvm-stat.h" void exit_event_get_key(struct perf_evsel *evsel, @@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON); + key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_EXIT_TRACE); + return !strcmp(evsel->name, kvm_exit_trace); } bool exit_event_begin(struct perf_evsel *evsel, @@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel, bool kvm_entry_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_ENTRY_TRACE); + return !strcmp(evsel->name, kvm_entry_trace); } bool exit_event_end(struct perf_evsel *evsel, @@ -91,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, key->key); - scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason); + scnprintf(decode, decode_str_len, "%s", exit_reason); } static bool register_kvm_events_ops(struct perf_kvm_stat *kvm) @@ -357,7 +356,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm, time_diff = sample->time - time_begin; if (kvm->duration && time_diff > kvm->duration) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; kvm->events_ops->decode_key(kvm, &event->key, decode); if (!skip_event(decode)) { @@ -385,7 +384,8 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return NULL; } - vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID); + vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, + vcpu_id_str); thread__set_priv(thread, vcpu_record); } @@ -574,7 +574,7 @@ static void show_timeofday(void) static void print_result(struct perf_kvm_stat *kvm) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; struct kvm_event *event; int vcpu = kvm->trace_vcpu; @@ -585,7 +585,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\n\n"); print_vcpu_info(kvm); - pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name); + pr_info("%*s ", decode_str_len, kvm->events_ops->name); pr_info("%10s ", "Samples"); pr_info("%9s ", "Samples%"); @@ -604,7 +604,7 @@ static void print_result(struct perf_kvm_stat *kvm) min = get_event_min(event, vcpu); kvm->events_ops->decode_key(kvm, &event->key, decode); - pr_info("%*s ", DECODE_STR_LEN, decode); + pr_info("%*s ", decode_str_len, decode); pr_info("%10llu ", (unsigned long long)ecount); pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100); pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100); @@ -1132,6 +1132,11 @@ exit: _p; \ }) +int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused) +{ + return 0; +} + static int kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) { @@ -1148,7 +1153,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) NULL }; const char * const *events_tp; + int ret; + events_tp_size = 0; + ret = setup_kvm_events_tp(kvm); + if (ret < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return ret; + } for (events_tp = kvm_events_tp; *events_tp; events_tp++) events_tp_size++; @@ -1377,6 +1389,12 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * generate the event list */ + err = setup_kvm_events_tp(kvm); + if (err < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return err; + } + kvm->evlist = kvm_live_event_list(); if (kvm->evlist == NULL) { err = -1; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 390170041696..85db3be4b3cb 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -6,6 +6,8 @@ #include "util/tool.h" #include "util/session.h" #include "util/data.h" +#include "util/mem-events.h" +#include "util/debug.h" #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -21,11 +23,56 @@ struct perf_mem { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int parse_record_events(const struct option *opt, + const char *str, int unset __maybe_unused) +{ + struct perf_mem *mem = *(struct perf_mem **)opt->value; + int j; + + if (strcmp(str, "list")) { + if (!perf_mem_events__parse(str)) { + mem->operation = 0; + return 0; + } + exit(-1); + } + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + fprintf(stderr, "%-13s%-*s%s\n", + e->tag, + verbose ? 25 : 0, + verbose ? perf_mem_events__name(j) : "", + e->supported ? ": available" : ""); + } + exit(0); +} + +static const char * const __usage[] = { + "perf mem record [<options>] [<command>]", + "perf mem record [<options>] -- <command> [<options>]", + NULL +}; + +static const char * const *record_mem_usage = __usage; + static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; int ret; + struct option options[] = { + OPT_CALLBACK('e', "event", &mem, "event", + "event selector. use 'perf mem record -e list' to list available events", + parse_record_events), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_END() + }; + + argc = parse_options(argc, argv, options, record_mem_usage, + PARSE_OPT_STOP_AT_NON_OPTION); rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -35,23 +82,40 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; if (mem->operation & MEM_OPERATION_LOAD) + perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; - if (mem->operation & MEM_OPERATION_LOAD) { - rec_argv[i++] = "-e"; - rec_argv[i++] = "cpu/mem-loads/pp"; - } + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + if (!perf_mem_events[j].record) + continue; + + if (!perf_mem_events[j].supported) { + pr_err("failed: event '%s' not supported\n", + perf_mem_events__name(j)); + return -1; + } - if (mem->operation & MEM_OPERATION_STORE) { rec_argv[i++] = "-e"; - rec_argv[i++] = "cpu/mem-stores/pp"; - } + rec_argv[i++] = perf_mem_events__name(j); + }; - for (j = 1; j < argc; j++, i++) + for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; + if (verbose > 0) { + pr_debug("calling: record "); + + while (rec_argv[j]) { + pr_debug("%s ", rec_argv[j]); + j++; + } + pr_debug("\n"); + } + ret = cmd_record(i, rec_argv, NULL); free(rec_argv); return ret; @@ -67,7 +131,7 @@ dump_raw_samples(struct perf_tool *tool, struct addr_location al; const char *fmt; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -298,6 +362,10 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 319712a4e02b..515510ecc76a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -32,6 +32,8 @@ #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/llvm-utils.h" +#include "util/bpf-loader.h" +#include "asm/bug.h" #include <unistd.h> #include <sched.h> @@ -49,7 +51,9 @@ struct record { const char *progname; int realtime_prio; bool no_buildid; + bool no_buildid_set; bool no_buildid_cache; + bool no_buildid_cache_set; bool buildid_all; unsigned long long samples; }; @@ -320,7 +324,10 @@ try_again: } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror_r(errno, msg, sizeof(msg))); - rc = -errno; + if (errno) + rc = -errno; + else + rc = -EINVAL; } goto out; } @@ -464,6 +471,29 @@ static void record__init_features(struct record *rec) perf_header__clear_feat(&session->header, HEADER_STAT); } +static void +record__finish_output(struct record *rec) +{ + struct perf_data_file *file = &rec->file; + int fd = perf_data_file__fd(file); + + if (file->is_pipe) + return; + + rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); + + if (!rec->no_buildid) { + process_buildids(rec); + + if (rec->buildid_all) + dsos__hit_all(rec->session); + } + perf_session__write_header(rec->session, rec->evlist, fd, true); + + return; +} + static volatile int workload_exec_errno; /* @@ -482,6 +512,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +static int record__synthesize(struct record *rec) +{ + struct perf_session *session = rec->session; + struct machine *machine = &session->machines.host; + struct perf_data_file *file = &rec->file; + struct record_opts *opts = &rec->opts; + struct perf_tool *tool = &rec->tool; + int fd = perf_data_file__fd(file); + int err = 0; + + if (file->is_pipe) { + err = perf_event__synthesize_attrs(tool, session, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + goto out; + } + + if (have_tracepoints(&rec->evlist->entries)) { + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, + process_synthesized_event); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + goto out; + } + rec->bytes_written += err; + } + } + + if (rec->opts.full_auxtrace) { + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, + session, process_synthesized_event); + if (err) + goto out; + } + + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + + if (perf_guest) { + machines__process_guests(&session->machines, + perf_event__synthesize_guest_os, tool); + } + + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, + process_synthesized_event, opts->sample_address, + opts->proc_map_timeout); +out: + return err; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -534,6 +632,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_child; + } + /* * Normally perf_session__new would do this, but it doesn't have the * evlist. @@ -566,63 +674,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - if (file->is_pipe) { - err = perf_event__synthesize_attrs(tool, session, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - goto out_child; - } - - if (have_tracepoints(&rec->evlist->entries)) { - /* - * FIXME err <= 0 here actually means that - * there were no tracepoints so its not really - * an error, just that we don't need to - * synthesize anything. We really have to - * return this more properly and also - * propagate errors that now are calling die() - */ - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, - process_synthesized_event); - if (err <= 0) { - pr_err("Couldn't record tracing data.\n"); - goto out_child; - } - rec->bytes_written += err; - } - } - - if (rec->opts.full_auxtrace) { - err = perf_event__synthesize_auxtrace_info(rec->itr, tool, - session, process_synthesized_event); - if (err) - goto out_delete_session; - } - - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - if (err < 0) - pr_err("Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); + err = record__synthesize(rec); if (err < 0) - pr_err("Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); - - if (perf_guest) { - machines__process_guests(&session->machines, - perf_event__synthesize_guest_os, tool); - } - - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, - process_synthesized_event, opts->sample_address, - opts->proc_map_timeout); - if (err != 0) goto out_child; if (rec->realtime_prio) { @@ -758,18 +811,8 @@ out_child: /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err && !file->is_pipe) { - rec->session->header.data_size += rec->bytes_written; - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - - if (!rec->no_buildid) { - process_buildids(rec); - - if (rec->buildid_all) - dsos__hit_all(rec->session); - } - perf_session__write_header(rec->session, rec->evlist, fd, true); - } + if (!err) + record__finish_output(rec); if (!err && !quiet) { char samples[128]; @@ -1097,10 +1140,12 @@ struct option __record_options[] = { OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), - OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, - "do not update the buildid cache"), - OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, - "do not collect buildids in perf.data"), + OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, + &record.no_buildid_cache_set, + "do not update the buildid cache"), + OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, + &record.no_buildid_set, + "do not collect buildids in perf.data"), OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), @@ -1136,6 +1181,12 @@ struct option __record_options[] = { "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), + OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, + "Configure all used events to run in kernel space.", + PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, + "Configure all used events to run in user space.", + PARSE_OPT_EXCLUSIVE), OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", "clang binary to use for compiling BPF scriptlets"), OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2bf537f190a0..160ea23b45aa 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -41,6 +41,7 @@ #include <dlfcn.h> #include <linux/bitmap.h> +#include <linux/stringify.h> struct report { struct perf_tool tool; @@ -75,7 +76,10 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } if (!strcmp(var, "report.percent-limit")) { - rep->min_percent = strtof(value, NULL); + double pcnt = strtof(value, NULL); + + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } if (!strcmp(var, "report.children")) { @@ -87,7 +91,7 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int hist_iter__report_callback(struct hist_entry_iter *iter, @@ -151,7 +155,7 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -466,10 +470,11 @@ static int report__browse_hists(struct report *rep) return ret; } -static void report__collapse_hists(struct report *rep) +static int report__collapse_hists(struct report *rep) { struct ui_progress prog; struct perf_evsel *pos; + int ret = 0; ui_progress__init(&prog, rep->nr_entries, "Merging related events..."); @@ -481,7 +486,9 @@ static void report__collapse_hists(struct report *rep) hists->socket_filter = rep->socket_filter; - hists__collapse_resort(hists, &prog); + ret = hists__collapse_resort(hists, &prog); + if (ret < 0) + break; /* Non-group events are considered as leader */ if (symbol_conf.event_group && @@ -494,6 +501,7 @@ static void report__collapse_hists(struct report *rep) } ui_progress__finish(); + return ret; } static void report__output_resort(struct report *rep) @@ -504,7 +512,7 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); evlist__for_each(rep->session->evlist, pos) - hists__output_resort(evsel__hists(pos), &prog); + perf_evsel__output_resort(pos, &prog); ui_progress__finish(); } @@ -561,7 +569,11 @@ static int __cmd_report(struct report *rep) } } - report__collapse_hists(rep); + ret = report__collapse_hists(rep); + if (ret) { + ui__error("failed to process hist entry\n"); + return ret; + } if (session_done()) return 0; @@ -633,8 +645,10 @@ parse_percent_limit(const struct option *opt, const char *str, int unset __maybe_unused) { struct report *rep = opt->value; + double pcnt = strtof(str, NULL); - rep->min_percent = strtof(str, NULL); + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } @@ -798,6 +812,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "only show processor socket that match with this filter"), OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, "Show raw trace event output (do not use print fmt or plugins)"), + OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, + "Show entries in a hierarchy"), OPT_END() }; struct perf_data_file file = { @@ -907,13 +923,19 @@ repeat: symbol_conf.cumulate_callchain = false; } - if (setup_sorting(session->evlist) < 0) { - if (sort_order) - parse_options_usage(report_usage, options, "s", 1); - if (field_order) - parse_options_usage(sort_order ? NULL : report_usage, - options, "F", 1); - goto error; + if (symbol_conf.report_hierarchy) { + /* disable incompatible options */ + symbol_conf.event_group = false; + symbol_conf.cumulate_callchain = false; + + if (field_order) { + pr_err("Error: --hierarchy and --fields options cannot be used together\n"); + parse_options_usage(report_usage, options, "F", 1); + parse_options_usage(NULL, options, "hierarchy", 0); + goto error; + } + + sort__need_collapse = true; } /* Force tty output for header output and per-thread stat. */ @@ -925,6 +947,15 @@ repeat: else use_browser = 0; + if (setup_sorting(session->evlist) < 0) { + if (sort_order) + parse_options_usage(report_usage, options, "s", 1); + if (field_order) + parse_options_usage(sort_order ? NULL : report_usage, + options, "F", 1); + goto error; + } + if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c691214d820f..3770c3dffe5e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -23,6 +23,7 @@ #include "util/stat.h" #include <linux/bitmap.h> #include "asm/bug.h" +#include "util/mem-events.h" static char const *script_name; static char const *generate_script_lang; @@ -58,6 +59,9 @@ enum perf_output_field { PERF_OUTPUT_IREGS = 1U << 14, PERF_OUTPUT_BRSTACK = 1U << 15, PERF_OUTPUT_BRSTACKSYM = 1U << 16, + PERF_OUTPUT_DATA_SRC = 1U << 17, + PERF_OUTPUT_WEIGHT = 1U << 18, + PERF_OUTPUT_BPF_OUTPUT = 1U << 19, }; struct output_option { @@ -81,6 +85,9 @@ struct output_option { {.str = "iregs", .field = PERF_OUTPUT_IREGS}, {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, + {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, + {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, + {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, }; /* default set to maintain compatibility with current format */ @@ -101,7 +108,7 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, [PERF_TYPE_SOFTWARE] = { @@ -111,7 +118,7 @@ static struct { PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT, .invalid_fields = PERF_OUTPUT_TRACE, }, @@ -121,7 +128,7 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | - PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE, + PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE }, [PERF_TYPE_RAW] = { @@ -131,9 +138,10 @@ static struct { PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | + PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, [PERF_TYPE_BREAKPOINT] = { @@ -145,7 +153,7 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, }; @@ -242,6 +250,16 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_ADDR, allow_user_set)) return -EINVAL; + if (PRINT_FIELD(DATA_SRC) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", + PERF_OUTPUT_DATA_SRC)) + return -EINVAL; + + if (PRINT_FIELD(WEIGHT) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", + PERF_OUTPUT_WEIGHT)) + return -EINVAL; + if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { pr_err("Display of symbols requested but neither sample IP nor " "sample address\nis selected. Hence, no addresses to convert " @@ -387,9 +405,7 @@ out: return 0; } -static void print_sample_iregs(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, +static void print_sample_iregs(struct perf_sample *sample, struct perf_event_attr *attr) { struct regs_dump *regs = &sample->intr_regs; @@ -458,10 +474,7 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, - struct perf_event_attr *attr __maybe_unused) +static void print_sample_brstack(struct perf_sample *sample) { struct branch_stack *br = sample->branch_stack; u64 i; @@ -480,14 +493,11 @@ static void print_sample_brstack(union perf_event *event __maybe_unused, } } -static void print_sample_brstacksym(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, - struct perf_event_attr *attr __maybe_unused) +static void print_sample_brstacksym(struct perf_sample *sample, + struct thread *thread) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; u64 i, from, to; if (!(br && br->nr)) @@ -500,11 +510,11 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); if (alf.map) alf.sym = map__find_symbol(alf.map, alf.addr, NULL); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); if (alt.map) alt.sym = map__find_symbol(alt.map, alt.addr, NULL); @@ -520,8 +530,7 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused, } -static void print_sample_addr(union perf_event *event, - struct perf_sample *sample, +static void print_sample_addr(struct perf_sample *sample, struct thread *thread, struct perf_event_attr *attr) { @@ -532,7 +541,7 @@ static void print_sample_addr(union perf_event *event, if (!sample_addr_correlates_sym(attr)) return; - perf_event__preprocess_sample_addr(event, sample, thread, &al); + thread__resolve(thread, &al, sample); if (PRINT_FIELD(SYM)) { printf(" "); @@ -549,8 +558,7 @@ static void print_sample_addr(union perf_event *event, } } -static void print_sample_bts(union perf_event *event, - struct perf_sample *sample, +static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, struct addr_location *al) @@ -580,7 +588,7 @@ static void print_sample_bts(union perf_event *event, ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && !output[attr->type].user_set)) { printf(" => "); - print_sample_addr(event, sample, thread, attr); + print_sample_addr(sample, thread, attr); } if (print_srcline_last) @@ -608,6 +616,84 @@ static void print_sample_flags(u32 flags) printf(" %-4s ", str); } +struct printer_data { + int line_no; + bool hit_nul; + bool is_printable; +}; + +static void +print_sample_bpf_output_printer(enum binary_printer_ops op, + unsigned int val, + void *extra) +{ + unsigned char ch = (unsigned char)val; + struct printer_data *printer_data = extra; + + switch (op) { + case BINARY_PRINT_DATA_BEGIN: + printf("\n"); + break; + case BINARY_PRINT_LINE_BEGIN: + printf("%17s", !printer_data->line_no ? "BPF output:" : + " "); + break; + case BINARY_PRINT_ADDR: + printf(" %04x:", val); + break; + case BINARY_PRINT_NUM_DATA: + printf(" %02x", val); + break; + case BINARY_PRINT_NUM_PAD: + printf(" "); + break; + case BINARY_PRINT_SEP: + printf(" "); + break; + case BINARY_PRINT_CHAR_DATA: + if (printer_data->hit_nul && ch) + printer_data->is_printable = false; + + if (!isprint(ch)) { + printf("%c", '.'); + + if (!printer_data->is_printable) + break; + + if (ch == '\0') + printer_data->hit_nul = true; + else + printer_data->is_printable = false; + } else { + printf("%c", ch); + } + break; + case BINARY_PRINT_CHAR_PAD: + printf(" "); + break; + case BINARY_PRINT_LINE_END: + printf("\n"); + printer_data->line_no++; + break; + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void print_sample_bpf_output(struct perf_sample *sample) +{ + unsigned int nr_bytes = sample->raw_size; + struct printer_data printer_data = {0, false, true}; + + print_binary(sample->raw_data, nr_bytes, 8, + print_sample_bpf_output_printer, &printer_data); + + if (printer_data.is_printable && printer_data.hit_nul) + printf("%17s \"%s\"\n", "BPF string:", + (char *)(sample->raw_data)); +} + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -634,7 +720,24 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist) return max; } -static void process_event(struct perf_script *script, union perf_event *event, +static size_t data_src__printf(u64 data_src) +{ + struct mem_info mi = { .data_src.val = data_src }; + char decode[100]; + char out[100]; + static int maxlen; + int len; + + perf_script__meminfo_scnprintf(decode, 100, &mi); + + len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode); + if (maxlen < len) + maxlen = len; + + return printf("%-*s", maxlen, out); +} + +static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { @@ -663,7 +766,7 @@ static void process_event(struct perf_script *script, union perf_event *event, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(event, sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al); return; } @@ -671,7 +774,13 @@ static void process_event(struct perf_script *script, union perf_event *event, event_format__print(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) - print_sample_addr(event, sample, thread, attr); + print_sample_addr(sample, thread, attr); + + if (PRINT_FIELD(DATA_SRC)) + data_src__printf(sample->data_src); + + if (PRINT_FIELD(WEIGHT)) + printf("%16" PRIu64, sample->weight); if (PRINT_FIELD(IP)) { if (!symbol_conf.use_callchain) @@ -685,12 +794,15 @@ static void process_event(struct perf_script *script, union perf_event *event, } if (PRINT_FIELD(IREGS)) - print_sample_iregs(event, sample, thread, attr); + print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(event, sample, thread, attr); + print_sample_brstack(sample); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(event, sample, thread, attr); + print_sample_brstacksym(sample, thread); + + if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) + print_sample_bpf_output(sample); printf("\n"); } @@ -783,7 +895,7 @@ static int process_sample_event(struct perf_tool *tool, return 0; } - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_err("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -798,7 +910,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, event, sample, evsel, &al); + process_event(scr, sample, evsel, &al); out_put: addr_location__put(&al); @@ -1090,23 +1202,6 @@ static struct script_spec *script_spec__find(const char *spec) return NULL; } -static struct script_spec *script_spec__findnew(const char *spec, - struct scripting_ops *ops) -{ - struct script_spec *s = script_spec__find(spec); - - if (s) - return s; - - s = script_spec__new(spec, ops); - if (!s) - return NULL; - - script_spec__add(s); - - return s; -} - int script_spec_register(const char *spec, struct scripting_ops *ops) { struct script_spec *s; @@ -1115,9 +1210,11 @@ int script_spec_register(const char *spec, struct scripting_ops *ops) if (s) return -1; - s = script_spec__findnew(spec, ops); + s = script_spec__new(spec, ops); if (!s) return -1; + else + script_spec__add(s); return 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 038e877081b6..1f19f2f999c8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -122,6 +122,7 @@ static bool sync_run = false; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; +static bool metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -735,6 +736,191 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) } } +struct outstate { + FILE *fh; + bool newline; + const char *prefix; + int nfields; + int id, nr; + struct perf_evsel *evsel; +}; + +#define METRIC_LEN 35 + +static void new_line_std(void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct outstate *os) +{ + fputc('\n', os->fh); + fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); + if (stat_config.aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + +static void new_line_csv(void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(csv_sep, os->fh); +} + +static void print_metric_csv(void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); +} + +#define METRIC_ONLY_LEN 20 + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + char buf[1024]; + unsigned mlen = METRIC_ONLY_LEN; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (color) + n = color_fprintf(out, color, fmt, val); + else + n = fprintf(out, fmt, val); + if (n > METRIC_ONLY_LEN) + n = METRIC_ONLY_LEN; + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + fprintf(out, "%*s", mlen - n, ""); +} + +static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, csv_sep); +} + +static void new_line_metric(void *ctx __maybe_unused) +{ +} + +static void print_metric_header(void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (csv_output) + fprintf(os->fh, "%s%s", unit, csv_sep); + else + fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -763,6 +949,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (!aggr_get_id) + return 0; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -793,22 +1001,124 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } -static void printout(int id, int nr, struct perf_evsel *counter, double uval) +static void printout(int id, int nr, struct perf_evsel *counter, double uval, + char *prefix, u64 run, u64 ena, double noise) { - int cpu = cpu_map__id_to_cpu(id); + struct perf_stat_output_ctx out; + struct outstate os = { + .fh = stat_config.output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, + }; + print_metric_t pm = print_metric_std; + void (*nl)(void *); - if (stat_config.aggr_mode == AGGR_GLOBAL) - cpu = 0; + if (metric_only) { + nl = new_line_metric; + if (csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; + + if (csv_output && !metric_only) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[stat_config.aggr_mode]; + if (counter->cgrp) + os.nfields++; + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (metric_only) { + pm(&os, NULL, "", "", 0); + return; + } + aggr_printout(counter, id, nr); + + fprintf(stat_config.output, "%*s%s", + csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + csv_sep); + + fprintf(stat_config.output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(stat_config.output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(stat_config.output, "%s%s", + csv_sep, counter->cgrp->name); - if (nsec_counter(counter)) + if (!csv_output) + pm(&os, NULL, NULL, "", 0); + print_noise(counter, noise); + print_running(run, ena); + if (csv_output) + pm(&os, NULL, NULL, "", 0); + return; + } + + if (metric_only) + /* nothing */; + else if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else abs_printout(id, nr, counter, uval); - if (!csv_output && !stat_config.interval) - perf_stat__print_shadow_stats(stat_config.output, counter, - uval, cpu, - stat_config.aggr_mode); + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + + if (csv_output && !metric_only) { + print_noise(counter, noise); + print_running(run, ena); + } + + perf_stat__print_shadow_stats(counter, uval, + first_shadow_cpu(counter, id), + &out); + if (!csv_output && !metric_only) { + print_noise(counter, noise); + print_running(run, ena); + } +} + +static void aggr_update_shadow(void) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < aggr_map->nr; s++) { + id = aggr_map->map[s]; + evlist__for_each(evsel_list, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = aggr_get_id(evsel_list->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + val = val * counter->scale; + perf_stat__update_shadow_stats(counter, &val, + first_shadow_cpu(counter, id)); + } + } } static void print_aggr(char *prefix) @@ -818,12 +1128,23 @@ static void print_aggr(char *prefix) int cpu, s, s2, id, nr; double uval; u64 ena, run, val; + bool first; if (!(aggr_map || aggr_get_id)) return; + aggr_update_shadow(); + + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ for (s = 0; s < aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + id = aggr_map->map[s]; + first = true; evlist__for_each(evsel_list, counter) { val = ena = run = 0; nr = 0; @@ -836,41 +1157,20 @@ static void print_aggr(char *prefix) run += perf_counts(counter->counts, cpu, 0)->run; nr++; } - if (prefix) - fprintf(output, "%s", prefix); - - if (run == 0 || ena == 0) { + if (first && metric_only) { + first = false; aggr_printout(counter, id, nr); - - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; } - uval = val * counter->scale; - printout(id, nr, counter, uval); - if (!csv_output) - print_noise(counter, 1.0); + if (prefix && !metric_only) + fprintf(output, "%s", prefix); - print_running(run, ena); - fputc('\n', output); + uval = val * counter->scale; + printout(id, nr, counter, uval, prefix, run, ena, 1.0); + if (!metric_only) + fputc('\n', output); } + if (metric_only) + fputc('\n', output); } } @@ -895,12 +1195,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval); - - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -914,43 +1209,19 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) FILE *output = stat_config.output; struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); - int scaled = counter->counts->scaled; double uval; double avg_enabled, avg_running; avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - if (prefix) + if (prefix && !metric_only) fprintf(output, "%s", prefix); - if (scaled == -1 || !counter->supported) { - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", csv_sep, counter->cgrp->name); - - print_running(avg_running, avg_enabled); - fputc('\n', output); - return; - } - uval = avg * counter->scale; - printout(-1, 0, counter, uval); - - print_noise(counter, avg); - - print_running(avg_running, avg_enabled); - fprintf(output, "\n"); + printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); + if (!metric_only) + fprintf(output, "\n"); } /* @@ -972,39 +1243,78 @@ static void print_counter(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s", - csv_output ? 0 : -4, - perf_evsel__cpus(counter)->map[cpu], csv_sep, - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); + uval = val * counter->scale; + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); + fputc('\n', output); + } +} - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); +static void print_no_aggr_metric(char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); + nrcpus = evsel_list->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; - print_running(run, ena); - fputc('\n', output); - continue; + if (prefix) + fputs(prefix, stat_config.output); + evlist__for_each(evsel_list, counter) { + if (first) { + aggr_printout(counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); } + fputc('\n', stat_config.output); + } +} - uval = val * counter->scale; - printout(cpu, 0, counter, uval); - if (!csv_output) - print_noise(counter, 1.0); - print_running(run, ena); +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; - fputc('\n', output); +static void print_metric_headers(char *prefix) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = stat_config.output + }; + + if (prefix) + fprintf(stat_config.output, "%s", prefix); + + if (!csv_output) + fprintf(stat_config.output, "%*s", + aggr_header_lens[stat_config.aggr_mode], ""); + + /* Print metrics headers only */ + evlist__for_each(evsel_list, counter) { + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + os.evsel = counter; + perf_stat__print_shadow_stats(counter, 0, + 0, + &out); } + fputc('\n', stat_config.output); } static void print_interval(char *prefix, struct timespec *ts) @@ -1014,7 +1324,7 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { + if (num_print_interval == 0 && !csv_output && !metric_only) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); @@ -1101,6 +1411,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) else print_header(argc, argv); + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0) + print_metric_headers(prefix); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) + fprintf(stat_config.output, "%s", prefix); + } + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: @@ -1113,10 +1434,16 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) print_counter_aggr(counter, prefix); + if (metric_only) + fputc('\n', stat_config.output); break; case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); + if (metric_only) + print_no_aggr_metric(prefix); + else { + evlist__for_each(evsel_list, counter) + print_counter(counter, prefix); + } break; case AGGR_UNSET: default: @@ -1237,6 +1564,8 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), + OPT_BOOLEAN(0, "metric-only", &metric_only, + "Only print computed metrics. No raw values"), OPT_END() }; @@ -1435,7 +1764,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) */ static int add_default_attributes(void) { - struct perf_event_attr default_attrs[] = { + struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, @@ -1443,8 +1772,14 @@ static int add_default_attributes(void) { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, +}; + struct perf_event_attr frontend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, +}; + struct perf_event_attr backend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + struct perf_event_attr default_attrs1[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, @@ -1561,7 +1896,19 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { - if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + return -1; + if (pmu_have_event("cpu", "stalled-cycles-frontend")) { + if (perf_evlist__add_default_attrs(evsel_list, + frontend_attrs) < 0) + return -1; + } + if (pmu_have_event("cpu", "stalled-cycles-backend")) { + if (perf_evlist__add_default_attrs(evsel_list, + backend_attrs) < 0) + return -1; + } + if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } @@ -1825,9 +2172,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (evsel_list == NULL) return -ENOMEM; + parse_events__shrink_config_terms(); argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__init_shadow_stats(); if (csv_sep) { csv_output = true; @@ -1858,6 +2207,16 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { + fprintf(stderr, "--metric-only is not supported with --per-thread\n"); + goto out; + } + + if (metric_only && run_count > 1) { + fprintf(stderr, "--metric-only is not supported with -r\n"); + goto out; + } + if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); parse_options_usage(stat_usage, stat_options, "log-fd", 0); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index bd7a7757176f..40cc9bb3506c 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -489,7 +489,7 @@ static const char *cat_backtrace(union perf_event *event, if (!chain) goto exit; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); goto exit; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bf01cbb0ef23..833214979c4f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -67,6 +67,7 @@ #include <sys/utsname.h> #include <sys/mman.h> +#include <linux/stringify.h> #include <linux/types.h> static volatile int done; @@ -252,7 +253,8 @@ static void perf_top__print_sym_table(struct perf_top *top) char bf[160]; int printed = 0; const int win_width = top->winsize.ws_col - 1; - struct hists *hists = evsel__hists(top->sym_evsel); + struct perf_evsel *evsel = top->sym_evsel; + struct hists *hists = evsel__hists(evsel); puts(CONSOLE_CLEAR); @@ -288,7 +290,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -540,6 +542,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) static void perf_top__sort_new_samples(void *arg) { struct perf_top *t = arg; + struct perf_evsel *evsel = t->sym_evsel; struct hists *hists; perf_top__reset_sample_counters(t); @@ -547,7 +550,7 @@ static void perf_top__sort_new_samples(void *arg) if (t->evlist->selected != NULL) t->sym_evsel = t->evlist->selected; - hists = evsel__hists(t->sym_evsel); + hists = evsel__hists(evsel); if (t->evlist->enabled) { if (t->zero) { @@ -559,7 +562,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); } static void *display_thread_tui(void *arg) @@ -726,7 +729,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) top->exact_samples++; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) + if (machine__resolve(machine, &al, sample) < 0) return; if (!top->kptr_restrict_warned && @@ -807,7 +810,6 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) struct perf_session *session = top->session; union perf_event *event; struct machine *machine; - u8 origin; int ret; while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { @@ -820,12 +822,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) evsel = perf_evlist__id2evsel(session->evlist, sample.id); assert(evsel != NULL); - origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (event->header.type == PERF_RECORD_SAMPLE) ++top->samples; - switch (origin) { + switch (sample.cpumode) { case PERF_RECORD_MISC_USER: ++top->us_samples; if (top->hide_user_symbols) @@ -1063,7 +1063,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) return parse_callchain_top_opt(arg); } -static int perf_top_config(const char *var, const char *value, void *cb) +static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "top.call-graph")) var = "call-graph.record-mode"; /* fall-through */ @@ -1072,7 +1072,7 @@ static int perf_top_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int @@ -1212,6 +1212,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) parse_branch_stack), OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, "Show raw trace event output (do not use print fmt or plugins)"), + OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, + "Show entries in a hierarchy"), OPT_END() }; const char * const top_usage[] = { @@ -1239,10 +1241,30 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete_evlist; } + if (symbol_conf.report_hierarchy) { + /* disable incompatible options */ + symbol_conf.event_group = false; + symbol_conf.cumulate_callchain = false; + + if (field_order) { + pr_err("Error: --hierarchy and --fields options cannot be used together\n"); + parse_options_usage(top_usage, options, "fields", 0); + parse_options_usage(NULL, options, "hierarchy", 0); + goto out_delete_evlist; + } + } + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ sort__need_collapse = 1; + if (top.use_stdio) + use_browser = 0; + else if (top.use_tui) + use_browser = 1; + + setup_browser(false); + if (setup_sorting(top.evlist) < 0) { if (sort_order) parse_options_usage(top_usage, options, "s", 1); @@ -1252,13 +1274,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete_evlist; } - if (top.use_stdio) - use_browser = 0; - else if (top.use_tui) - use_browser = 1; - - setup_browser(false); - status = target__validate(target); if (status) { target__strerror(target, status, errbuf, BUFSIZ); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 20916dd77aac..93ac724fb635 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -33,6 +33,7 @@ #include "util/stat.h" #include "trace-event.h" #include "util/parse-events.h" +#include "util/bpf-loader.h" #include <libaudit.h> #include <stdlib.h> @@ -1724,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->args = sc->tp_format->format.fields; sc->nr_args = sc->tp_format->format.nr_fields; - /* drop nr field - not relevant here; does not exist on older kernels */ - if (sc->args && strcmp(sc->args->name, "nr") == 0) { + /* + * We need to check and discard the first variable '__syscall_nr' + * or 'nr' that mean the syscall number. It is needless here. + * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. + */ + if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { sc->args = sc->args->next; --sc->nr_args; } @@ -2177,6 +2182,37 @@ out_dump: return 0; } +static void bpf_output__printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + FILE *output = extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_CHAR_DATA: + fprintf(output, "%c", isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_DATA_BEGIN: + case BINARY_PRINT_LINE_BEGIN: + case BINARY_PRINT_ADDR: + case BINARY_PRINT_NUM_DATA: + case BINARY_PRINT_NUM_PAD: + case BINARY_PRINT_SEP: + case BINARY_PRINT_CHAR_PAD: + case BINARY_PRINT_LINE_END: + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void bpf_output__fprintf(struct trace *trace, + struct perf_sample *sample) +{ + print_binary(sample->raw_data, sample->raw_size, 8, + bpf_output__printer, trace->output); +} + static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2189,7 +2225,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, fprintf(trace->output, "%s:", evsel->name); - if (evsel->tp_format) { + if (perf_evsel__is_bpf_output(evsel)) { + bpf_output__fprintf(trace, sample); + } else if (evsel->tp_format) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); @@ -2218,11 +2256,10 @@ static void print_location(FILE *f, struct perf_sample *sample, static int trace__pgfault(struct trace *trace, struct perf_evsel *evsel, - union perf_event *event, + union perf_event *event __maybe_unused, struct perf_sample *sample) { struct thread *thread; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct addr_location al; char map_type = 'd'; struct thread_trace *ttrace; @@ -2241,7 +2278,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) goto out; - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); @@ -2254,11 +2291,11 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, cpumode, MAP__VARIABLE, + thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, cpumode, + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->addr, &al); if (al.map) @@ -2586,6 +2623,16 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_error_open; + } + /* * Better not use !target__has_task() here because we need to cover the * case where no threads were specified in the command line, but a diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3f871b54e261..41c24010ab43 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -7,38 +7,38 @@ extern const char perf_usage_string[]; extern const char perf_more_info_string[]; -extern void list_common_cmds_help(void); -extern const char *help_unknown_cmd(const char *cmd); -extern void prune_packed_objects(int); -extern int read_line_with_nul(char *buf, int size, FILE *file); -extern int check_pager_config(const char *cmd); +void list_common_cmds_help(void); +const char *help_unknown_cmd(const char *cmd); +void prune_packed_objects(int); +int read_line_with_nul(char *buf, int size, FILE *file); +int check_pager_config(const char *cmd); -extern int cmd_annotate(int argc, const char **argv, const char *prefix); -extern int cmd_bench(int argc, const char **argv, const char *prefix); -extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix); -extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); -extern int cmd_config(int argc, const char **argv, const char *prefix); -extern int cmd_diff(int argc, const char **argv, const char *prefix); -extern int cmd_evlist(int argc, const char **argv, const char *prefix); -extern int cmd_help(int argc, const char **argv, const char *prefix); -extern int cmd_sched(int argc, const char **argv, const char *prefix); -extern int cmd_list(int argc, const char **argv, const char *prefix); -extern int cmd_record(int argc, const char **argv, const char *prefix); -extern int cmd_report(int argc, const char **argv, const char *prefix); -extern int cmd_stat(int argc, const char **argv, const char *prefix); -extern int cmd_timechart(int argc, const char **argv, const char *prefix); -extern int cmd_top(int argc, const char **argv, const char *prefix); -extern int cmd_script(int argc, const char **argv, const char *prefix); -extern int cmd_version(int argc, const char **argv, const char *prefix); -extern int cmd_probe(int argc, const char **argv, const char *prefix); -extern int cmd_kmem(int argc, const char **argv, const char *prefix); -extern int cmd_lock(int argc, const char **argv, const char *prefix); -extern int cmd_kvm(int argc, const char **argv, const char *prefix); -extern int cmd_test(int argc, const char **argv, const char *prefix); -extern int cmd_trace(int argc, const char **argv, const char *prefix); -extern int cmd_inject(int argc, const char **argv, const char *prefix); -extern int cmd_mem(int argc, const char **argv, const char *prefix); -extern int cmd_data(int argc, const char **argv, const char *prefix); +int cmd_annotate(int argc, const char **argv, const char *prefix); +int cmd_bench(int argc, const char **argv, const char *prefix); +int cmd_buildid_cache(int argc, const char **argv, const char *prefix); +int cmd_buildid_list(int argc, const char **argv, const char *prefix); +int cmd_config(int argc, const char **argv, const char *prefix); +int cmd_diff(int argc, const char **argv, const char *prefix); +int cmd_evlist(int argc, const char **argv, const char *prefix); +int cmd_help(int argc, const char **argv, const char *prefix); +int cmd_sched(int argc, const char **argv, const char *prefix); +int cmd_list(int argc, const char **argv, const char *prefix); +int cmd_record(int argc, const char **argv, const char *prefix); +int cmd_report(int argc, const char **argv, const char *prefix); +int cmd_stat(int argc, const char **argv, const char *prefix); +int cmd_timechart(int argc, const char **argv, const char *prefix); +int cmd_top(int argc, const char **argv, const char *prefix); +int cmd_script(int argc, const char **argv, const char *prefix); +int cmd_version(int argc, const char **argv, const char *prefix); +int cmd_probe(int argc, const char **argv, const char *prefix); +int cmd_kmem(int argc, const char **argv, const char *prefix); +int cmd_lock(int argc, const char **argv, const char *prefix); +int cmd_kvm(int argc, const char **argv, const char *prefix); +int cmd_test(int argc, const char **argv, const char *prefix); +int cmd_trace(int argc, const char **argv, const char *prefix); +int cmd_inject(int argc, const char **argv, const char *prefix); +int cmd_mem(int argc, const char **argv, const char *prefix); +int cmd_data(int argc, const char **argv, const char *prefix); -extern int find_scripts(char **scripts_array, char **scripts_path_array); +int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 511141b102e8..f7d7f5a1cad5 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -61,50 +61,45 @@ endif ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 -else - # - # For linking with debug library, run like: - # - # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ - # - ifdef LIBUNWIND_DIR - LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include - LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib - endif - LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) - - # Set per-feature check compilation flags - FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) - FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) endif +# +# For linking with debug library, run like: +# +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +# +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib +endif +LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) + +# Set per-feature check compilation flags +FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif -ifndef NO_LIBELF - # for linking with debug library, run like: - # make DEBUG=1 LIBDW_DIR=/opt/libdw/ - ifdef LIBDW_DIR - LIBDW_CFLAGS := -I$(LIBDW_DIR)/include - LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) - FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw +# for linking with debug library, run like: +# make DEBUG=1 LIBDW_DIR=/opt/libdw/ +ifdef LIBDW_DIR + LIBDW_CFLAGS := -I$(LIBDW_DIR)/include + LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw -ifdef LIBBABELTRACE - # for linking with debug library, run like: - # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ - ifdef LIBBABELTRACE_DIR - LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include - LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) - FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +# for linking with debug library, run like: +# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ +ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) +FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi # include ARCH specific config @@ -114,7 +109,7 @@ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET endif -include $(src-perf)/config/utilities.mak +include $(srctree)/tools/scripts/utilities.mak ifeq ($(call get-executable,$(FLEX)),) dummy := $(error Error: $(FLEX) is missing on this system, please install it) @@ -145,28 +140,26 @@ ifdef PARSER_DEBUG $(call detected_var,PARSER_DEBUG_FLEX) endif -ifndef NO_LIBPYTHON - # Try different combinations to accommodate systems that only have - # python[2][-config] in weird combinations but always preferring - # python2 and python2-config as per pep-0394. If we catch a - # python[-config] in version 3, the version check will kill it. - PYTHON2 := $(if $(call get-executable,python2),python2,python) - override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) - PYTHON2_CONFIG := \ - $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) - override PYTHON_CONFIG := \ - $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) +# Try different combinations to accommodate systems that only have +# python[2][-config] in weird combinations but always preferring +# python2 and python2-config as per pep-0394. If we catch a +# python[-config] in version 3, the version check will kill it. +PYTHON2 := $(if $(call get-executable,python2),python2,python) +override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) +PYTHON2_CONFIG := \ + $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) +override PYTHON_CONFIG := \ + $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) - PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) +PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) - FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) -endif +FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) +FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 @@ -335,6 +328,13 @@ ifndef NO_LIBELF endif # NO_LIBBPF endif # NO_LIBELF +ifdef PERF_HAVE_JITDUMP + ifndef NO_DWARF + $(call detected,CONFIG_JITDUMP) + CFLAGS += -DHAVE_JITDUMP + endif +endif + ifeq ($(ARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX @@ -411,6 +411,17 @@ ifndef NO_LIBAUDIT endif endif +ifndef NO_LIBCRYPTO + ifneq ($(feature-libcrypto), 1) + msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev); + NO_LIBCRYPTO := 1 + else + CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT + EXTLIBS += -lcrypto + $(call detected,CONFIG_CRYPTO) + endif +endif + ifdef NO_NEWT NO_SLANG=1 endif diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile new file mode 100644 index 000000000000..5ce61a1bda9c --- /dev/null +++ b/tools/perf/jvmti/Makefile @@ -0,0 +1,89 @@ +ARCH=$(shell uname -m) + +ifeq ($(ARCH), x86_64) +JARCH=amd64 +endif +ifeq ($(ARCH), armv7l) +JARCH=armhf +endif +ifeq ($(ARCH), armv6l) +JARCH=armhf +endif +ifeq ($(ARCH), aarch64) +JARCH=aarch64 +endif +ifeq ($(ARCH), ppc64) +JARCH=powerpc +endif +ifeq ($(ARCH), ppc64le) +JARCH=powerpc +endif + +DESTDIR=/usr/local + +VERSION=1 +REVISION=0 +AGE=0 + +LN=ln -sf +RM=rm + +SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE) +VLIBJVMTI=libjvmti.so.$(VERSION) +SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI) +SOLIBEXT=so + +# The following works at least on fedora 23, you may need the next +# line for other distros. +ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) +JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +else + ifneq (,$(wildcard /usr/sbin/alternatives)) + JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + endif +endif +ifndef JDIR +$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory) +else + ifeq (,$(wildcard $(JDIR)/include/jvmti.h)) + $(error the openjdk development package appears to me missing, install and try again) + endif +endif +$(info Using Java from $(JDIR)) +# -lrt required in 32-bit mode for clock_gettime() +LIBS=-lelf -lrt +INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux + +TARGETS=$(SLIBJVMTI) + +SRCS=libjvmti.c jvmti_agent.c +OBJS=$(SRCS:.c=.o) +SOBJS=$(OBJS:.o=.lo) +OPT=-O2 -g -Werror -Wall + +CFLAGS=$(INCDIR) $(OPT) + +all: $(TARGETS) + +.c.o: + $(CC) $(CFLAGS) -c $*.c +.c.lo: + $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo + +$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h + +$(SLIBJVMTI): $(SOBJS) + $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS) + $(LN) $@ libjvmti.$(SOLIBEXT) + +clean: + $(RM) -f *.o *.so.* *.so *.lo + +install: + -mkdir -p $(DESTDIR)/lib + install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/ + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI)) + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT)) + ldconfig + +.SUFFIXES: .c .S .o .lo diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c new file mode 100644 index 000000000000..6461e02ab940 --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.c @@ -0,0 +1,465 @@ +/* + * jvmti_agent.c: JVMTI agent interface + * + * Adapted from the Oprofile code in opagent.c: + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#include <sys/types.h> +#include <sys/stat.h> /* for mkdir() */ +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <limits.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <sys/mman.h> +#include <syscall.h> /* for gettid() */ +#include <err.h> + +#include "jvmti_agent.h" +#include "../util/jitdump.h" + +#define JIT_LANG "java" + +static char jit_path[PATH_MAX]; +static void *marker_addr; + +/* + * padding buffer + */ +static const char pad_bytes[7]; + +static inline pid_t gettid(void) +{ + return (pid_t)syscall(__NR_gettid); +} + +static int get_e_machine(struct jitheader *hdr) +{ + ssize_t sret; + char id[16]; + int fd, ret = -1; + int m = -1; + struct { + uint16_t e_type; + uint16_t e_machine; + } info; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, id, sizeof(id)); + if (sret != sizeof(id)) + goto error; + + /* check ELF signature */ + if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') + goto error; + + sret = read(fd, &info, sizeof(info)); + if (sret != sizeof(info)) + goto error; + + m = info.e_machine; + if (m < 0) + m = 0; /* ELF EM_NONE */ + + hdr->elf_mach = m; + ret = 0; +error: + close(fd); + return ret; +} + +#define NSEC_PER_SEC 1000000000 +static int perf_clk_id = CLOCK_MONOTONIC; + +static inline uint64_t +timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +static inline uint64_t +perf_get_timestamp(void) +{ + struct timespec ts; + int ret; + + ret = clock_gettime(perf_clk_id, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +} + +static int +debug_cache_init(void) +{ + char str[32]; + char *base, *p; + struct tm tm; + time_t t; + int ret; + + time(&t); + localtime_r(&t, &tm); + + base = getenv("JITDUMPDIR"); + if (!base) + base = getenv("HOME"); + if (!base) + base = "."; + + strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); + + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("jvmti: cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); + + p = mkdtemp(jit_path); + if (p != jit_path) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + + return 0; +} + +static int +perf_open_marker_file(int fd) +{ + long pgsz; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return -1; + + /* + * we mmap the jitdump to create an MMAP RECORD in perf.data file. + * The mmap is captured either live (perf record running when we mmap) + * or in deferred mode, via /proc/PID/maps + * the MMAP record is used as a marker of a jitdump file for more meta + * data info about the jitted code. Perf report/annotate detect this + * special filename and process the jitdump file. + * + * mapping must be PROT_EXEC to ensure it is captured by perf record + * even when not using -d option + */ + marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0); + return (marker_addr == MAP_FAILED) ? -1 : 0; +} + +static void +perf_close_marker_file(void) +{ + long pgsz; + + if (!marker_addr) + return; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return; + + munmap(marker_addr, pgsz); +} + +void *jvmti_open(void) +{ + int pad_cnt; + char dump_path[PATH_MAX]; + struct jitheader header; + int fd; + FILE *fp; + + /* + * check if clockid is supported + */ + if (!perf_get_timestamp()) + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + + memset(&header, 0, sizeof(header)); + + debug_cache_init(); + + /* + * jitdump file name + */ + snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + + fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + return NULL; + + /* + * create perf.data maker for the jitdump file + */ + if (perf_open_marker_file(fd)) { + warnx("jvmti: failed to create marker file"); + return NULL; + } + + fp = fdopen(fd, "w+"); + if (!fp) { + warn("jvmti: cannot create %s", dump_path); + close(fd); + goto error; + } + + warnx("jvmti: jitdump in %s", dump_path); + + if (get_e_machine(&header)) { + warn("get_e_machine failed\n"); + goto error; + } + + header.magic = JITHEADER_MAGIC; + header.version = JITHEADER_VERSION; + header.total_size = sizeof(header); + header.pid = getpid(); + + /* calculate amount of padding '\0' */ + pad_cnt = PADDING_8ALIGNED(header.total_size); + header.total_size += pad_cnt; + + header.timestamp = perf_get_timestamp(); + + if (!fwrite(&header, sizeof(header), 1, fp)) { + warn("jvmti: cannot write dumpfile header"); + goto error; + } + + /* write padding '\0' if necessary */ + if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) { + warn("jvmti: cannot write dumpfile header padding"); + goto error; + } + + return fp; +error: + fclose(fp); + return NULL; +} + +int +jvmti_close(void *agent) +{ + struct jr_code_close rec; + FILE *fp = agent; + + if (!fp) { + warnx("jvmti: incalid fd in close_agent"); + return -1; + } + + rec.p.id = JIT_CODE_CLOSE; + rec.p.total_size = sizeof(rec); + + rec.p.timestamp = perf_get_timestamp(); + + if (!fwrite(&rec, sizeof(rec), 1, fp)) + return -1; + + fclose(fp); + + fp = NULL; + + perf_close_marker_file(); + + return 0; +} + +int +jvmti_write_code(void *agent, char const *sym, + uint64_t vma, void const *code, unsigned int const size) +{ + static int code_generation = 1; + struct jr_code_load rec; + size_t sym_len; + size_t padding_count; + FILE *fp = agent; + int ret = -1; + + /* don't care about 0 length function, no samples */ + if (size == 0) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_native_code"); + return -1; + } + + sym_len = strlen(sym) + 1; + + rec.p.id = JIT_CODE_LOAD; + rec.p.total_size = sizeof(rec) + sym_len; + padding_count = PADDING_8ALIGNED(rec.p.total_size); + rec.p. total_size += padding_count; + rec.p.timestamp = perf_get_timestamp(); + + rec.code_size = size; + rec.vma = vma; + rec.code_addr = vma; + rec.pid = getpid(); + rec.tid = gettid(); + + if (code) + rec.p.total_size += size; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + /* + * get code index inside lock to avoid race condition + */ + rec.code_index = code_generation++; + + ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + fwrite_unlocked(sym, sym_len, 1, fp); + + if (padding_count) + fwrite_unlocked(pad_bytes, padding_count, 1, fp); + + if (code) + fwrite_unlocked(code, size, 1, fp); + + funlockfile(fp); + + ret = 0; + + return ret; +} + +int +jvmti_write_debug_info(void *agent, uint64_t code, const char *file, + jvmti_line_info_t *li, int nr_lines) +{ + struct jr_code_debug_info rec; + size_t sret, len, size, flen; + size_t padding_count; + uint64_t addr; + const char *fn = file; + FILE *fp = agent; + int i; + + /* + * no entry to write + */ + if (!nr_lines) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_debug_info"); + return -1; + } + + flen = strlen(file) + 1; + + rec.p.id = JIT_CODE_DEBUG_INFO; + size = sizeof(rec); + rec.p.timestamp = perf_get_timestamp(); + rec.code_addr = (uint64_t)(uintptr_t)code; + rec.nr_entry = nr_lines; + + /* + * on disk source line info layout: + * uint64_t : addr + * int : line number + * int : column discriminator + * file[] : source file name + * padding : pad to multiple of 8 bytes + */ + size += nr_lines * sizeof(struct debug_entry); + size += flen * nr_lines; + /* + * pad to 8 bytes + */ + padding_count = PADDING_8ALIGNED(size); + + rec.p.total_size = size + padding_count; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + if (sret != 1) + goto error; + + for (i = 0; i < nr_lines; i++) { + + addr = (uint64_t)li[i].pc; + len = sizeof(addr); + sret = fwrite_unlocked(&addr, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(li[0].line_number); + sret = fwrite_unlocked(&li[i].line_number, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(li[0].discrim); + sret = fwrite_unlocked(&li[i].discrim, len, 1, fp); + if (sret != 1) + goto error; + + sret = fwrite_unlocked(fn, flen, 1, fp); + if (sret != 1) + goto error; + } + if (padding_count) + sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); + if (sret != 1) + goto error; + + funlockfile(fp); + return 0; +error: + funlockfile(fp); + return -1; +} diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h new file mode 100644 index 000000000000..bedf5d0ba9ff --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.h @@ -0,0 +1,36 @@ +#ifndef __JVMTI_AGENT_H__ +#define __JVMTI_AGENT_H__ + +#include <sys/types.h> +#include <stdint.h> +#include <jvmti.h> + +#define __unused __attribute__((unused)) + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef struct { + unsigned long pc; + int line_number; + int discrim; /* discriminator -- 0 for now */ +} jvmti_line_info_t; + +void *jvmti_open(void); +int jvmti_close(void *agent); +int jvmti_write_code(void *agent, char const *symbol_name, + uint64_t vma, void const *code, + const unsigned int code_size); + +int jvmti_write_debug_info(void *agent, + uint64_t code, + const char *file, + jvmti_line_info_t *li, + int nr_lines); + +#if defined(__cplusplus) +} + +#endif +#endif /* __JVMTI_H__ */ diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c new file mode 100644 index 000000000000..ac12e4b91a92 --- /dev/null +++ b/tools/perf/jvmti/libjvmti.c @@ -0,0 +1,304 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <err.h> +#include <jvmti.h> +#include <jvmticmlr.h> +#include <limits.h> + +#include "jvmti_agent.h" + +static int has_line_numbers; +void *jvmti_agent; + +static jvmtiError +do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, + jvmti_line_info_t *tab, jint *nr) +{ + jint i, lines = 0; + jint nr_lines = 0; + jvmtiLineNumberEntry *loc_tab = NULL; + jvmtiError ret; + + ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); + if (ret != JVMTI_ERROR_NONE) + return ret; + + for (i = 0; i < nr_lines; i++) { + if (loc_tab[i].start_location < bci) { + tab[lines].pc = (unsigned long)pc; + tab[lines].line_number = loc_tab[i].line_number; + tab[lines].discrim = 0; /* not yet used */ + lines++; + } else { + break; + } + } + (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab); + *nr = lines; + return JVMTI_ERROR_NONE; +} + +static jvmtiError +get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines) +{ + const jvmtiCompiledMethodLoadRecordHeader *hdr; + jvmtiCompiledMethodLoadInlineRecord *rec; + jvmtiLineNumberEntry *lne = NULL; + PCStackInfo *c; + jint nr, ret; + int nr_total = 0; + int i, lines_total = 0; + + if (!(tab && nr_lines)) + return JVMTI_ERROR_NULL_POINTER; + + /* + * Phase 1 -- get the number of lines necessary + */ + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + /* + * unfortunately, need a tab to get the number of lines! + */ + ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne); + if (ret == JVMTI_ERROR_NONE) { + /* free what was allocated for nothing */ + (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); + nr_total += (int)nr; + } + } + } + } + + if (nr_total == 0) + return JVMTI_ERROR_NOT_FOUND; + + /* + * Phase 2 -- allocate big enough line table + */ + *tab = malloc(nr_total * sizeof(**tab)); + if (!*tab) + return JVMTI_ERROR_OUT_OF_MEMORY; + + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + ret = do_get_line_numbers(jvmti, c->pc, + c->methods[0], + c->bcis[0], + *tab + lines_total, + &nr); + if (ret == JVMTI_ERROR_NONE) + lines_total += nr; + } + } + } + *nr_lines = lines_total; + return JVMTI_ERROR_NONE; +} + +static void JNICALL +compiled_method_load_cb(jvmtiEnv *jvmti, + jmethodID method, + jint code_size, + void const *code_addr, + jint map_length, + jvmtiAddrLocationMap const *map, + const void *compile_info) +{ + jvmti_line_info_t *line_tab = NULL; + jclass decl_class; + char *class_sign = NULL; + char *func_name = NULL; + char *func_sign = NULL; + char *file_name= NULL; + char fn[PATH_MAX]; + uint64_t addr = (uint64_t)(uintptr_t)code_addr; + jvmtiError ret; + int nr_lines = 0; /* in line_tab[] */ + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, + &decl_class); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get declaring class"); + return; + } + + if (has_line_numbers && map && map_length) { + ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get line table for method"); + nr_lines = 0; + } + } + + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get source filename ret=%d", ret); + goto error; + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, + &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: getclassignature failed"); + goto error; + } + + ret = (*jvmti)->GetMethodName(jvmti, method, &func_name, + &func_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: failed getmethodname"); + goto error; + } + + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + fn[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) + fn[i] = file_name[j]; + fn[i] = '\0'; + } else { + /* fallback case */ + strcpy(fn, file_name); + } + /* + * write source line info record if we have it + */ + if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) + warnx("jvmti: write_debug_info() failed"); + + len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; + { + char str[len]; + snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign); + + if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size)) + warnx("jvmti: write_code() failed"); + } +error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + free(line_tab); +} + +static void JNICALL +code_generated_cb(jvmtiEnv *jvmti, + char const *name, + void const *code_addr, + jint code_size) +{ + uint64_t addr = (uint64_t)(unsigned long)code_addr; + int ret; + + ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size); + if (ret) + warnx("jvmti: write_code() failed for code_generated"); +} + +JNIEXPORT jint JNICALL +Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) +{ + jvmtiEventCallbacks cb; + jvmtiCapabilities caps1; + jvmtiJlocationFormat format; + jvmtiEnv *jvmti = NULL; + jint ret; + + jvmti_agent = jvmti_open(); + if (!jvmti_agent) { + warnx("jvmti: open_agent failed"); + return -1; + } + + /* + * Request a JVMTI interface version 1 environment + */ + ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1); + if (ret != JNI_OK) { + warnx("jvmti: jvmti version 1 not supported"); + return -1; + } + + /* + * acquire method_load capability, we require it + * request line numbers (optional) + */ + memset(&caps1, 0, sizeof(caps1)); + caps1.can_generate_compiled_method_load_events = 1; + + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: acquire compiled_method capability failed"); + return -1; + } + ret = (*jvmti)->GetJLocationFormat(jvmti, &format); + if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) { + memset(&caps1, 0, sizeof(caps1)); + caps1.can_get_line_numbers = 1; + caps1.can_get_source_file_name = 1; + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret == JVMTI_ERROR_NONE) + has_line_numbers = 1; + } + + memset(&cb, 0, sizeof(cb)); + + cb.CompiledMethodLoad = compiled_method_load_cb; + cb.DynamicCodeGenerated = code_generated_cb; + + ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb)); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot set event callbacks"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed for method_load"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed on code_generated"); + return -1; + } + return 0; +} + +JNIEXPORT void JNICALL +Agent_OnUnload(JavaVM *jvm __unused) +{ + int ret; + + ret = jvmti_close(jvmti_agent); + if (ret) + errx(1, "Error: op_close_agent()"); +} diff --git a/tools/perf/perf.c b/tools/perf/perf.c index a929618b8eb6..aaee0a782747 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -454,11 +454,12 @@ static void handle_internal_command(int argc, const char **argv) static void execv_dashed_external(const char **argv) { - struct strbuf cmd = STRBUF_INIT; + char *cmd; const char *tmp; int status; - strbuf_addf(&cmd, "perf-%s", argv[0]); + if (asprintf(&cmd, "perf-%s", argv[0]) < 0) + goto do_die; /* * argv[0] must be the perf command, but the argv array @@ -467,7 +468,7 @@ static void execv_dashed_external(const char **argv) * restore it on error. */ tmp = argv[0]; - argv[0] = cmd.buf; + argv[0] = cmd; /* * if we fail because the command is not found, it is @@ -475,15 +476,16 @@ static void execv_dashed_external(const char **argv) */ status = run_command_v_opt(argv, 0); if (status != -ERR_RUN_COMMAND_EXEC) { - if (IS_RUN_COMMAND_ERR(status)) + if (IS_RUN_COMMAND_ERR(status)) { +do_die: die("unable to run '%s'", argv[0]); + } exit(-status); } errno = ENOENT; /* as if we called execvp */ argv[0] = tmp; - - strbuf_release(&cmd); + zfree(&cmd); } static int run_argv(int *argcp, const char ***argv) @@ -546,6 +548,8 @@ int main(int argc, const char **argv) srandom(time(NULL)); + perf_config(perf_default_config, NULL); + /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); @@ -613,6 +617,8 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); + perf_debug_setup(); + while (1) { static int done_help; int was_alias = run_argv(&argc, &argv); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 90129accffbe..5381a01c0610 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -58,6 +58,8 @@ struct record_opts { bool full_auxtrace; bool auxtrace_snapshot_mode; bool record_switch_events; + bool all_kernel; + bool all_user; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 15c8400240fd..1d95009592eb 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -71,7 +71,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names" + print "Install the audit-libs-python package to get syscall names.\n" \ + "For example:\n # apt-get install python-audit (Ubuntu)" \ + "\n # yum install audit-libs-python (Fedora)" \ + "\n etc.\n" def syscall_name(id): try: diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore index bf016c439fbd..8cc30e731c73 100644 --- a/tools/perf/tests/.gitignore +++ b/tools/perf/tests/.gitignore @@ -1,3 +1,4 @@ llvm-src-base.c llvm-src-kbuild.c llvm-src-prologue.c +llvm-src-relocation.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 614899b88b37..1ba628ed049a 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,7 +31,7 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o -perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o +perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o perf-y += bpf.o perf-y += topology.o perf-y += cpumap.o @@ -59,6 +59,13 @@ $(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ +$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include <tests/llvm.h>' > $@ + $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ + ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index fb80c9eb6a95..e7664fe3bd33 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -29,14 +29,59 @@ static int fd1; static int fd2; +static int fd3; static int overflows; +static int overflows_2; + +volatile long the_var; + + +/* + * Use ASM to ensure watchpoint and breakpoint can be triggered + * at one instruction. + */ +#if defined (__x86_64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "incq (%rdi)\n" + "ret\n"); +#elif defined (__aarch64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "str x30, [x0]\n" + "ret\n"); + +#else +static void __test_function(volatile long *ptr) +{ + *ptr = 0x1234; +} +#endif __attribute__ ((noinline)) static int test_function(void) { + __test_function(&the_var); + the_var++; return time(NULL); } +static void sig_handler_2(int signum __maybe_unused, + siginfo_t *oh __maybe_unused, + void *uc __maybe_unused) +{ + overflows_2++; + if (overflows_2 > 10) { + ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); + } +} + static void sig_handler(int signum __maybe_unused, siginfo_t *oh __maybe_unused, void *uc __maybe_unused) @@ -54,10 +99,11 @@ static void sig_handler(int signum __maybe_unused, */ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); } } -static int bp_event(void *fn, int setup_signal) +static int __event(bool is_x, void *addr, int sig) { struct perf_event_attr pe; int fd; @@ -67,8 +113,8 @@ static int bp_event(void *fn, int setup_signal) pe.size = sizeof(struct perf_event_attr); pe.config = 0; - pe.bp_type = HW_BREAKPOINT_X; - pe.bp_addr = (unsigned long) fn; + pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W; + pe.bp_addr = (unsigned long) addr; pe.bp_len = sizeof(long); pe.sample_period = 1; @@ -86,17 +132,25 @@ static int bp_event(void *fn, int setup_signal) return TEST_FAIL; } - if (setup_signal) { - fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); - fcntl(fd, F_SETSIG, SIGIO); - fcntl(fd, F_SETOWN, getpid()); - } + fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); + fcntl(fd, F_SETSIG, sig); + fcntl(fd, F_SETOWN, getpid()); ioctl(fd, PERF_EVENT_IOC_RESET, 0); return fd; } +static int bp_event(void *addr, int sig) +{ + return __event(true, addr, sig); +} + +static int wp_event(void *addr, int sig) +{ + return __event(false, addr, sig); +} + static long long bp_count(int fd) { long long count; @@ -114,7 +168,7 @@ static long long bp_count(int fd) int test__bp_signal(int subtest __maybe_unused) { struct sigaction sa; - long long count1, count2; + long long count1, count2, count3; /* setup SIGIO signal handler */ memset(&sa, 0, sizeof(struct sigaction)); @@ -126,21 +180,52 @@ int test__bp_signal(int subtest __maybe_unused) return TEST_FAIL; } + sa.sa_sigaction = (void *) sig_handler_2; + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + pr_debug("failed setting up signal handler 2\n"); + return TEST_FAIL; + } + /* * We create following events: * - * fd1 - breakpoint event on test_function with SIGIO + * fd1 - breakpoint event on __test_function with SIGIO * signal configured. We should get signal * notification each time the breakpoint is hit * - * fd2 - breakpoint event on sig_handler without SIGIO + * fd2 - breakpoint event on sig_handler with SIGUSR1 + * configured. We should get SIGUSR1 each time when + * breakpoint is hit + * + * fd3 - watchpoint event on __test_function with SIGIO * configured. * * Following processing should happen: - * - execute test_function - * - fd1 event breakpoint hit -> count1 == 1 - * - SIGIO is delivered -> overflows == 1 - * - fd2 event breakpoint hit -> count2 == 1 + * Exec: Action: Result: + * incq (%rdi) - fd1 event breakpoint hit -> count1 == 1 + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 1 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 1 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 1 + * sys_rt_sigreturn - return from sig_handler + * incq (%rdi) - fd3 event watchpoint hit -> count3 == 1 (wp and bp in one insn) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 2 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 2 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 2 + * sys_rt_sigreturn - return from sig_handler + * the_var++ - fd3 event watchpoint hit -> count3 == 2 (standalone watchpoint) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 3 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 3 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows == 3 + * sys_rt_sigreturn - return from sig_handler * * The test case check following error conditions: * - we get stuck in signal handler because of debug @@ -152,11 +237,13 @@ int test__bp_signal(int subtest __maybe_unused) * */ - fd1 = bp_event(test_function, 1); - fd2 = bp_event(sig_handler, 0); + fd1 = bp_event(__test_function, SIGIO); + fd2 = bp_event(sig_handler, SIGUSR1); + fd3 = wp_event((void *)&the_var, SIGIO); ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* * Kick off the test by trigering 'fd1' @@ -166,15 +253,18 @@ int test__bp_signal(int subtest __maybe_unused) ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); count1 = bp_count(fd1); count2 = bp_count(fd2); + count3 = bp_count(fd3); close(fd1); close(fd2); + close(fd3); - pr_debug("count1 %lld, count2 %lld, overflow %d\n", - count1, count2, overflows); + pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n", + count1, count2, count3, overflows, overflows_2); if (count1 != 1) { if (count1 == 11) @@ -183,12 +273,18 @@ int test__bp_signal(int subtest __maybe_unused) pr_debug("failed: wrong count for bp1%lld\n", count1); } - if (overflows != 1) + if (overflows != 3) pr_debug("failed: wrong overflow hit\n"); - if (count2 != 1) + if (overflows_2 != 3) + pr_debug("failed: wrong overflow_2 hit\n"); + + if (count2 != 3) pr_debug("failed: wrong count for bp2\n"); - return count1 == 1 && overflows == 1 && count2 == 1 ? + if (count3 != 2) + pr_debug("failed: wrong count for bp3\n"); + + return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; } diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c new file mode 100644 index 000000000000..93af77421816 --- /dev/null +++ b/tools/perf/tests/bpf-script-test-relocation.c @@ -0,0 +1,50 @@ +/* + * bpf-script-test-relocation.c + * Test BPF loader checking relocation + */ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define BPF_ANY 0 +#define BPF_MAP_TYPE_ARRAY 2 +#define BPF_FUNC_map_lookup_elem 1 +#define BPF_FUNC_map_update_elem 2 + +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = + (void *) BPF_FUNC_map_update_elem; + +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#define SEC(NAME) __attribute__((section(NAME), used)) +struct bpf_map_def SEC("maps") my_table = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +int this_is_a_global_val; + +SEC("func=sys_write") +int bpf_func__sys_write(void *ctx) +{ + int key = 0; + int value = 0; + + /* + * Incorrect relocation. Should not allow this program be + * loaded into kernel. + */ + bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0); + return 0; +} +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 33689a0cf821..199501c71e27 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,7 +1,11 @@ #include <stdio.h> #include <sys/epoll.h> +#include <util/util.h> #include <util/bpf-loader.h> #include <util/evlist.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <bpf/bpf.h> #include "tests.h" #include "llvm.h" #include "debug.h" @@ -71,6 +75,15 @@ static struct { (NR_ITERS + 1) / 4, }, #endif + { + LLVM_TESTCASE_BPF_RELOCATION, + "Test BPF relocation checker", + "[bpf_relocation_test]", + "fix 'perf test LLVM' first", + "libbpf error when dealing with relocation", + NULL, + 0, + }, }; static int do_test(struct bpf_object *obj, int (*func)(void), @@ -99,7 +112,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), parse_evlist.error = &parse_error; INIT_LIST_HEAD(&parse_evlist.list); - err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj); + err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL); if (err || list_empty(&parse_evlist.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; @@ -190,7 +203,7 @@ static int __test__bpf(int idx) ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, bpf_testcase_table[idx].prog_id, - true); + true, NULL); if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { pr_debug("Unable to get BPF object, %s\n", bpf_testcase_table[idx].msg_compile_fail); @@ -202,14 +215,21 @@ static int __test__bpf(int idx) obj = prepare_bpf(obj_buf, obj_buf_sz, bpf_testcase_table[idx].name); - if (!obj) { + if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) { + if (!obj) + pr_debug("Fail to load BPF object: %s\n", + bpf_testcase_table[idx].msg_load_fail); + else + pr_debug("Success unexpectedly: %s\n", + bpf_testcase_table[idx].msg_load_fail); ret = TEST_FAIL; goto out; } - ret = do_test(obj, - bpf_testcase_table[idx].target_func, - bpf_testcase_table[idx].expect_result); + if (obj) + ret = do_test(obj, + bpf_testcase_table[idx].target_func, + bpf_testcase_table[idx].expect_result); out: bpf__clear(); return ret; @@ -227,6 +247,36 @@ const char *test__bpf_subtest_get_desc(int i) return bpf_testcase_table[i].desc; } +static int check_env(void) +{ + int err; + unsigned int kver_int; + char license[] = "GPL"; + + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + err = fetch_kernel_version(&kver_int, NULL, 0); + if (err) { + pr_debug("Unable to get kernel version\n"); + return err; + } + + err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, + sizeof(insns) / sizeof(insns[0]), + license, kver_int, NULL, 0); + if (err < 0) { + pr_err("Missing basic BPF support, skip this test: %s\n", + strerror(errno)); + return err; + } + close(err); + + return 0; +} + int test__bpf(int i) { int err; @@ -239,6 +289,9 @@ int test__bpf(int i) return TEST_SKIP; } + if (check_env()) + return TEST_SKIP; + err = __test__bpf(i); return err; } diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 313a48c6b2bc..abd3f0ec0c0b 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -293,7 +293,6 @@ static int process_sample_event(struct machine *machine, { struct perf_sample sample; struct thread *thread; - u8 cpumode; int ret; if (perf_evlist__parse_sample(evlist, event, &sample)) { @@ -307,9 +306,7 @@ static int process_sample_event(struct machine *machine, return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - ret = read_object_code(sample.ip, READLEN, cpumode, thread, state); + ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state); thread__put(thread); return ret; } @@ -439,7 +436,7 @@ static int do_test_code_reading(bool try_kcore) .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, - .freq = 4000, + .freq = 500, .target = { .uses_mmap = true, }, @@ -559,7 +556,13 @@ static int do_test_code_reading(bool try_kcore) evlist = NULL; continue; } - pr_debug("perf_evlist__open failed\n"); + + if (verbose) { + char errbuf[512]; + perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); + } + goto out_put; } break; diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 1c5c0221cea2..8f6eb853aaf7 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -20,10 +20,10 @@ static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_mmap2_event(machine, event, NULL); + return machine__process_mmap2_event(machine, event, sample); } static int init_live_machine(struct machine *machine) diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 071a8b5f5232..f55f4bd47932 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -100,9 +100,11 @@ struct machine *setup_fake_machine(struct machines *machines) } for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { + struct perf_sample sample = { + .cpumode = PERF_RECORD_MISC_USER, + }; union perf_event fake_mmap_event = { .mmap = { - .header = { .misc = PERF_RECORD_MISC_USER, }, .pid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, @@ -114,7 +116,7 @@ struct machine *setup_fake_machine(struct machines *machines) strcpy(fake_mmap_event.mmap.filename, fake_mmap_info[i].filename); - machine__process_mmap_event(machine, &fake_mmap_event, NULL); + machine__process_mmap_event(machine, &fake_mmap_event, &sample); } for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 5e6a86e50fb9..ed5aa9eaeb6c 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -81,11 +81,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) size_t i; for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -97,13 +92,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) else iter.ops = &hist_iter_normal; + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; sample.callchain = (struct ip_callchain *)fake_callchains[i]; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, @@ -191,7 +186,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(hists_to_evsel(hists), NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 351a42463444..b825d24f8186 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -58,11 +58,6 @@ static int add_hist_entries(struct perf_evlist *evlist, */ evlist__for_each(evlist, evsel) { for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -76,12 +71,12 @@ static int add_hist_entries(struct perf_evlist *evlist, hists->dso_filter = NULL; hists->symbol_filter_str = NULL; + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; al.socket = fake_samples[i].socket; @@ -145,7 +140,7 @@ int test__hists_filter(int subtest __maybe_unused) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 64b257d8d557..358324e47805 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -76,17 +76,12 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct hists *hists = evsel__hists(evsel); for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; - + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_common_samples[k].pid; sample.tid = fake_common_samples[k].pid; sample.ip = fake_common_samples[k].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + + if (machine__resolve(machine, &al, &sample) < 0) goto out; he = __hists__add_entry(hists, &al, NULL, @@ -102,17 +97,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) } for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; - sample.pid = fake_samples[i][k].pid; sample.tid = fake_samples[i][k].pid; sample.ip = fake_samples[i][k].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; he = __hists__add_entry(hists, &al, NULL, diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index b231265148d8..d3556fbe8c5c 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -51,11 +51,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) size_t i; for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -63,13 +58,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) .hide_unresolved = false, }; + sample.cpumode = PERF_RECORD_MISC_USER; sample.cpu = fake_samples[i].cpu; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, @@ -156,7 +151,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -256,7 +251,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -310,7 +305,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -388,7 +383,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -491,7 +486,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 06f45c1d4256..cff564fb4b66 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -6,12 +6,6 @@ #include "tests.h" #include "debug.h" -static int perf_config_cb(const char *var, const char *val, - void *arg __maybe_unused) -{ - return perf_default_config(var, val, arg); -} - #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { @@ -35,6 +29,7 @@ static int test__bpf_parsing(void *obj_buf __maybe_unused, static struct { const char *source; const char *desc; + bool should_load_fail; } bpf_source_table[__LLVM_TESTCASE_MAX] = { [LLVM_TESTCASE_BASE] = { .source = test_llvm__bpf_base_prog, @@ -48,14 +43,19 @@ static struct { .source = test_llvm__bpf_test_prologue_prog, .desc = "Compile source for BPF prologue generation test", }, + [LLVM_TESTCASE_BPF_RELOCATION] = { + .source = test_llvm__bpf_test_relocation, + .desc = "Compile source for BPF relocation test", + .should_load_fail = true, + }, }; - int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, enum test_llvm__testcase idx, - bool force) + bool force, + bool *should_load_fail) { const char *source; const char *desc; @@ -68,8 +68,8 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, source = bpf_source_table[idx].source; desc = bpf_source_table[idx].desc; - - perf_config(perf_config_cb, NULL); + if (should_load_fail) + *should_load_fail = bpf_source_table[idx].should_load_fail; /* * Skip this test if user's .perfconfig doesn't set [llvm] section @@ -136,14 +136,15 @@ int test__llvm(int subtest) int ret; void *obj_buf = NULL; size_t obj_buf_sz = 0; + bool should_load_fail = false; if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) return TEST_FAIL; ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, - subtest, false); + subtest, false, &should_load_fail); - if (ret == TEST_OK) { + if (ret == TEST_OK && !should_load_fail) { ret = test__bpf_parsing(obj_buf, obj_buf_sz); if (ret != TEST_OK) { pr_debug("Failed to parse test case '%s'\n", diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h index 5150b4d6ef50..0eaa604be99d 100644 --- a/tools/perf/tests/llvm.h +++ b/tools/perf/tests/llvm.h @@ -7,14 +7,17 @@ extern const char test_llvm__bpf_base_prog[]; extern const char test_llvm__bpf_test_kbuild_prog[]; extern const char test_llvm__bpf_test_prologue_prog[]; +extern const char test_llvm__bpf_test_relocation[]; enum test_llvm__testcase { LLVM_TESTCASE_BASE, LLVM_TESTCASE_KBUILD, LLVM_TESTCASE_BPF_PROLOGUE, + LLVM_TESTCASE_BPF_RELOCATION, __LLVM_TESTCASE_MAX, }; int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, - enum test_llvm__testcase index, bool force); + enum test_llvm__testcase index, bool force, + bool *should_load_fail); #endif diff --git a/tools/perf/tests/make b/tools/perf/tests/make index f918015512af..cac15d93aea6 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -15,6 +15,7 @@ else PERF := . PERF_O := $(PERF) O_OPT := +FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O)) ifneq ($(O),) FULL_O := $(shell readlink -f $(O) || echo $(O)) @@ -79,6 +80,7 @@ make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 +make_no_libcrypto := NO_LIBCRYPTO=1 make_tags := tags make_cscope := cscope make_help := help @@ -102,6 +104,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 +make_minimal += NO_LIBCRYPTO=1 # $(run) contains all available tests run := make_pure @@ -110,6 +113,9 @@ run := make_pure # disable features detection ifeq ($(MK),Makefile) run += make_clean_all +MAKE_F := $(MAKE) +else +MAKE_F := $(MAKE) -f $(MK) endif run += make_python_perf_so run += make_debug @@ -260,6 +266,8 @@ run := $(shell shuf -e $(run)) run_O := $(shell shuf -e $(run_O)) endif +max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L) + ifdef DEBUG d := $(info run $(run)) d := $(info run_O $(run_O)) @@ -267,13 +275,13 @@ endif MAKEFLAGS := --no-print-directory -clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null) +clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null) $(run): $(call clean) @TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ $(call test,$@) && \ @@ -283,8 +291,8 @@ $(run_O): $(call clean) @TMP_O=$$(mktemp -d); \ TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ $(call test_O,$@) && \ @@ -313,11 +321,43 @@ make_kernelsrc_tools: (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) +FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP +FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC + all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) out: $(run_O) @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) + +ifeq ($(REUSE_FEATURES_DUMP),1) +$(FEATURES_DUMP_FILE): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +$(FEATURES_DUMP_FILE_STATIC): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +# Add feature dump dependency for run/run_O targets +$(foreach t,$(run) $(run_O),$(eval \ + $(t): $(if $(findstring make_static,$(t)),\ + $(FEATURES_DUMP_FILE_STATIC),\ + $(FEATURES_DUMP_FILE)))) + +# Append 'FEATURES_DUMP=' option to all test cases. For example: +# make_no_libbpf: NO_LIBBPF=1 --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP +# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC +$(foreach t,$(run),$(if $(findstring make_static,$(t)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE)))) +endif .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools endif # ifndef MK diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index abe8849d1d70..7865f68dc0d8 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1271,6 +1271,38 @@ static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist) return 0; } +static int test__checkevent_config_symbol(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0); + return 0; +} + +static int test__checkevent_config_raw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0); + return 0; +} + +static int test__checkevent_config_num(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0); + return 0; +} + +static int test__checkevent_config_cache(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1579,6 +1611,26 @@ static struct evlist_test test__events[] = { .check = test__checkevent_precise_max_modifier, .id = 47, }, + { + .name = "instructions/name=insn/", + .check = test__checkevent_config_symbol, + .id = 48, + }, + { + .name = "r1234/name=rawpmu/", + .check = test__checkevent_config_raw, + .id = 49, + }, + { + .name = "4:0x6530160/name=numpmu/", + .check = test__checkevent_config_num, + .id = 50, + }, + { + .name = "L1-dcache-misses/name=cachepmu/", + .check = test__checkevent_config_cache, + .id = 51, + }, }; static struct evlist_test test__events_pmu[] = { @@ -1666,7 +1718,7 @@ static int test_term(struct terms_test *t) } ret = t->check(&terms); - parse_events__free_terms(&terms); + parse_events_terms__purge(&terms); return ret; } diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg index 238aa3927c71..f2d9c5fe58e0 100755 --- a/tools/perf/tests/perf-targz-src-pkg +++ b/tools/perf/tests/perf-targz-src-pkg @@ -15,7 +15,7 @@ TMP_DEST=$(mktemp -d) tar xf ${TARBALL} -C $TMP_DEST rm -f ${TARBALL} cd - > /dev/null -make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1 +make -C $TMP_DEST/perf*/tools/perf > /dev/null RC=$? rm -rf ${TMP_DEST} exit $RC diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index f0bfc9e8fd9f..630b0b409b97 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -110,7 +110,6 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) */ for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { struct symbol *pair, *first_pair; - bool backwards = true; sym = rb_entry(nd, struct symbol, rb_node); @@ -151,27 +150,14 @@ next_pair: continue; } else { - struct rb_node *nnd; -detour: - nnd = backwards ? rb_prev(&pair->rb_node) : - rb_next(&pair->rb_node); - if (nnd) { - struct symbol *next = rb_entry(nnd, struct symbol, rb_node); - - if (UM(next->start) == mem_start) { - pair = next; + pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL); + if (pair) { + if (UM(pair->start) == mem_start) goto next_pair; - } - } - if (backwards) { - backwards = false; - pair = first_pair; - goto detour; + pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", + mem_start, sym->name, pair->name); } - - pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", - mem_start, sym->name, pair->name); } } else pr_debug("%#" PRIx64 ": %s not on kallsyms\n", diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d37202121689..af68a9d488bf 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -531,8 +531,8 @@ static struct ui_browser_colorset { .bg = "yellow", }, { - .colorset = HE_COLORSET_CODE, - .name = "code", + .colorset = HE_COLORSET_JUMP_ARROWS, + .name = "jump_arrows", .fg = "blue", .bg = "default", }, diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 01781de59532..be3b70eb5fca 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -7,7 +7,7 @@ #define HE_COLORSET_MEDIUM 51 #define HE_COLORSET_NORMAL 52 #define HE_COLORSET_SELECTED 53 -#define HE_COLORSET_CODE 54 +#define HE_COLORSET_JUMP_ARROWS 54 #define HE_COLORSET_ADDR 55 #define HE_COLORSET_ROOT 56 diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 718bd46d47fa..4fc208e82c6f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -284,7 +284,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - ui_browser__set_color(browser, HE_COLORSET_CODE); + ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 08c09ad755d2..2a83414159a6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -32,6 +32,7 @@ struct hist_browser { bool show_headers; float min_pcnt; u64 nr_non_filtered_entries; + u64 nr_hierarchy_entries; u64 nr_callchain_rows; }; @@ -58,11 +59,11 @@ static int hist_browser__get_folding(struct hist_browser *browser) for (nd = rb_first(&hists->entries); (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; - nd = rb_next(nd)) { + nd = rb_hierarchy_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - if (he->unfolded) + if (he->leaf && he->unfolded) unfolded_rows += he->nr_rows; } return unfolded_rows; @@ -72,7 +73,9 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; - if (hist_browser__has_filter(hb)) + if (symbol_conf.report_hierarchy) + nr_entries = hb->nr_hierarchy_entries; + else if (hist_browser__has_filter(hb)) nr_entries = hb->nr_non_filtered_entries; else nr_entries = hb->hists->nr_entries; @@ -247,6 +250,38 @@ static int callchain__count_rows(struct rb_root *chain) return n; } +static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, + bool include_children) +{ + int count = 0; + struct rb_node *node; + struct hist_entry *child; + + if (he->leaf) + return callchain__count_rows(&he->sorted_chain); + + if (he->has_no_entry) + return 1; + + node = rb_first(&he->hroot_out); + while (node) { + float percent; + + child = rb_entry(node, struct hist_entry, rb_node); + percent = hist_entry__get_percent_limit(child); + + if (!child->filtered && percent >= hb->min_pcnt) { + count++; + + if (include_children && child->unfolded) + count += hierarchy_count_rows(hb, child, true); + } + + node = rb_next(node); + } + return count; +} + static bool hist_entry__toggle_fold(struct hist_entry *he) { if (!he) @@ -302,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node, chain = list_entry(node->val.next, struct callchain_list, list); chain->has_children = has_sibling; - if (node->val.next != node->val.prev) { + if (!list_empty(&node->val)) { chain = list_entry(node->val.prev, struct callchain_list, list); chain->has_children = !RB_EMPTY_ROOT(&node->rb_root); } @@ -326,11 +361,17 @@ static void callchain__init_have_children(struct rb_root *root) static void hist_entry__init_have_children(struct hist_entry *he) { - if (!he->init_have_children) { + if (he->init_have_children) + return; + + if (he->leaf) { he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); callchain__init_have_children(&he->sorted_chain); - he->init_have_children = true; + } else { + he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); } + + he->init_have_children = true; } static bool hist_browser__toggle_fold(struct hist_browser *browser) @@ -349,17 +390,49 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) has_children = callchain_list__toggle_fold(cl); if (has_children) { + int child_rows = 0; + hist_entry__init_have_children(he); browser->b.nr_entries -= he->nr_rows; - browser->nr_callchain_rows -= he->nr_rows; - if (he->unfolded) - he->nr_rows = callchain__count_rows(&he->sorted_chain); + if (he->leaf) + browser->nr_callchain_rows -= he->nr_rows; else + browser->nr_hierarchy_entries -= he->nr_rows; + + if (symbol_conf.report_hierarchy) + child_rows = hierarchy_count_rows(browser, he, true); + + if (he->unfolded) { + if (he->leaf) + he->nr_rows = callchain__count_rows(&he->sorted_chain); + else + he->nr_rows = hierarchy_count_rows(browser, he, false); + + /* account grand children */ + if (symbol_conf.report_hierarchy) + browser->b.nr_entries += child_rows - he->nr_rows; + + if (!he->leaf && he->nr_rows == 0) { + he->has_no_entry = true; + he->nr_rows = 1; + } + } else { + if (symbol_conf.report_hierarchy) + browser->b.nr_entries -= child_rows - he->nr_rows; + + if (he->has_no_entry) + he->has_no_entry = false; + he->nr_rows = 0; + } browser->b.nr_entries += he->nr_rows; - browser->nr_callchain_rows += he->nr_rows; + + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else + browser->nr_hierarchy_entries += he->nr_rows; return true; } @@ -422,13 +495,38 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold) return n; } -static void hist_entry__set_folding(struct hist_entry *he, bool unfold) +static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, + bool unfold __maybe_unused) +{ + float percent; + struct rb_node *nd; + struct hist_entry *child; + int n = 0; + + for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { + child = rb_entry(nd, struct hist_entry, rb_node); + percent = hist_entry__get_percent_limit(child); + if (!child->filtered && percent >= hb->min_pcnt) + n++; + } + + return n; +} + +static void hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *hb, bool unfold) { hist_entry__init_have_children(he); he->unfolded = unfold ? he->has_children : false; if (he->has_children) { - int n = callchain__set_folding(&he->sorted_chain, unfold); + int n; + + if (he->leaf) + n = callchain__set_folding(&he->sorted_chain, unfold); + else + n = hierarchy_set_folding(hb, he, unfold); + he->nr_rows = unfold ? n : 0; } else he->nr_rows = 0; @@ -438,19 +536,38 @@ static void __hist_browser__set_folding(struct hist_browser *browser, bool unfold) { struct rb_node *nd; - struct hists *hists = browser->hists; + struct hist_entry *he; + double percent; - for (nd = rb_first(&hists->entries); - (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; - nd = rb_next(nd)) { - struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - hist_entry__set_folding(he, unfold); - browser->nr_callchain_rows += he->nr_rows; + nd = rb_first(&browser->hists->entries); + while (nd) { + he = rb_entry(nd, struct hist_entry, rb_node); + + /* set folding state even if it's currently folded */ + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); + + hist_entry__set_folding(he, browser, unfold); + + percent = hist_entry__get_percent_limit(he); + if (he->filtered || percent < browser->min_pcnt) + continue; + + if (!he->depth || unfold) + browser->nr_hierarchy_entries++; + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; } } static void hist_browser__set_folding(struct hist_browser *browser, bool unfold) { + browser->nr_hierarchy_entries = 0; browser->nr_callchain_rows = 0; __hist_browser__set_folding(browser, unfold); @@ -657,9 +774,24 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, return 1; } +static bool check_percent_display(struct rb_node *node, u64 parent_total) +{ + struct callchain_node *child; + + if (node == NULL) + return false; + + if (rb_next(node)) + return true; + + child = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(child) != parent_total; +} + static int hist_browser__show_callchain_flat(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -669,7 +801,7 @@ static int hist_browser__show_callchain_flat(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -763,6 +895,7 @@ static char *hist_browser__folded_callchain_str(struct hist_browser *browser, static int hist_browser__show_callchain_folded(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -772,7 +905,7 @@ static int hist_browser__show_callchain_folded(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -844,20 +977,24 @@ next: return row - first_row; } -static int hist_browser__show_callchain(struct hist_browser *browser, +static int hist_browser__show_callchain_graph(struct hist_browser *browser, struct rb_root *root, int level, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) { struct rb_node *node; int first_row = row, offset = level * LEVEL_OFFSET_STEP; - u64 new_total; bool need_percent; + u64 percent_total = total; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + percent_total = parent_total; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -878,7 +1015,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, folded_sign = callchain_list__folded(chain); row += hist_browser__show_callchain_list(browser, child, - chain, row, total, + chain, row, percent_total, was_first && need_percent, offset + extra_offset, print, arg); @@ -893,13 +1030,9 @@ static int hist_browser__show_callchain(struct hist_browser *browser, if (folded_sign == '-') { const int new_level = level + (extra_offset ? 2 : 1); - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total; - - row += hist_browser__show_callchain(browser, &child->rb_root, - new_level, row, new_total, + row += hist_browser__show_callchain_graph(browser, &child->rb_root, + new_level, row, total, + child->children_hit, print, arg, is_output_full); } if (is_output_full(browser, row)) @@ -910,6 +1043,45 @@ out: return row - first_row; } +static int hist_browser__show_callchain(struct hist_browser *browser, + struct hist_entry *entry, int level, + unsigned short row, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) +{ + u64 total = hists__total_period(entry->hists); + u64 parent_total; + int printed; + + if (symbol_conf.cumulate_callchain) + parent_total = entry->stat_acc->period; + else + parent_total = entry->stat.period; + + if (callchain_param.mode == CHAIN_FLAT) { + printed = hist_browser__show_callchain_flat(browser, + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); + } else if (callchain_param.mode == CHAIN_FOLDED) { + printed = hist_browser__show_callchain_folded(browser, + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); + } else { + printed = hist_browser__show_callchain_graph(browser, + &entry->sorted_chain, level, row, + total, parent_total, print, arg, + is_output_full); + } + + if (arg->is_current_entry) + browser->he_selection = entry; + + return printed; +} + struct hpp_arg { struct ui_browser *b; char folded_sign; @@ -1006,7 +1178,6 @@ static int hist_browser__show_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row) { - char s[256]; int printed = 0; int width = browser->b.width; char folded_sign = ' '; @@ -1031,16 +1202,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, .folded_sign = folded_sign, .current_entry = current_entry, }; - struct perf_hpp hpp = { - .buf = s, - .size = sizeof(s), - .ptr = &arg, - }; int column = 0; hist_browser__gotorc(browser, row, 0); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + if (perf_hpp__should_skip(fmt, entry->hists) || column++ < browser->b.horiz_scroll) continue; @@ -1065,11 +1238,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (fmt->color) { - width -= fmt->color(fmt, &hpp, entry); + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); } else { - width -= fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry)); ui_browser__printf(&browser->b, "%s", s); } + width -= hpp.buf - s; } /* The scroll bar isn't being used */ @@ -1084,43 +1264,246 @@ static int hist_browser__show_entry(struct hist_browser *browser, --row_offset; if (folded_sign == '-' && row != browser->b.rows) { - u64 total = hists__total_period(entry->hists); struct callchain_print_arg arg = { .row_offset = row_offset, .is_current_entry = current_entry, }; - if (callchain_param.mode == CHAIN_GRAPH_REL) { - if (symbol_conf.cumulate_callchain) - total = entry->stat_acc->period; - else - total = entry->stat.period; - } - - if (callchain_param.mode == CHAIN_FLAT) { - printed += hist_browser__show_callchain_flat(browser, - &entry->sorted_chain, row, total, - hist_browser__show_callchain_entry, &arg, - hist_browser__check_output_full); - } else if (callchain_param.mode == CHAIN_FOLDED) { - printed += hist_browser__show_callchain_folded(browser, - &entry->sorted_chain, row, total, + printed += hist_browser__show_callchain(browser, entry, 1, row, hist_browser__show_callchain_entry, &arg, hist_browser__check_output_full); + } + + return printed; +} + +static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, + struct hist_entry *entry, + unsigned short row, + int level) +{ + int printed = 0; + int width = browser->b.width; + char folded_sign = ' '; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + off_t row_offset = entry->row_offset; + bool first = true; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + struct hpp_arg arg = { + .b = &browser->b, + .current_entry = current_entry, + }; + int column = 0; + int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; + + if (current_entry) { + browser->he_selection = entry; + browser->selection = &entry->ms; + } + + hist_entry__init_have_children(entry); + folded_sign = hist_entry__folded(entry); + arg.folded_sign = folded_sign; + + if (entry->leaf && row_offset) { + row_offset--; + goto show_callchain; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&entry->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (perf_hpp__should_skip(fmt, entry->hists) || + column++ < browser->b.horiz_scroll) + continue; + + if (current_entry && browser->b.navkeypressed) { + ui_browser__set_color(&browser->b, + HE_COLORSET_SELECTED); } else { - printed += hist_browser__show_callchain(browser, - &entry->sorted_chain, 1, row, total, - hist_browser__show_callchain_entry, &arg, - hist_browser__check_output_full); + ui_browser__set_color(&browser->b, + HE_COLORSET_NORMAL); + } + + if (first) { + ui_browser__printf(&browser->b, "%c", folded_sign); + width--; + first = false; + } else { + ui_browser__printf(&browser->b, " "); + width -= 2; + } + + if (fmt->color) { + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); + } else { + int ret = fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + ui_browser__printf(&browser->b, "%s", s); + } + width -= hpp.buf - s; + } + + ui_browser__write_nstring(&browser->b, "", hierarchy_indent); + width -= hierarchy_indent; + + if (column >= browser->b.horiz_scroll) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (current_entry && browser->b.navkeypressed) { + ui_browser__set_color(&browser->b, + HE_COLORSET_SELECTED); + } else { + ui_browser__set_color(&browser->b, + HE_COLORSET_NORMAL); } - if (arg.is_current_entry) - browser->he_selection = entry; + perf_hpp_list__for_each_format(entry->hpp_list, fmt) { + ui_browser__write_nstring(&browser->b, "", 2); + width -= 2; + + /* + * No need to call hist_entry__snprintf_alignment() + * since this fmt is always the last column in the + * hierarchy mode. + */ + if (fmt->color) { + width -= fmt->color(fmt, &hpp, entry); + } else { + int i = 0; + + width -= fmt->entry(fmt, &hpp, entry); + ui_browser__printf(&browser->b, "%s", ltrim(s)); + + while (isspace(s[i++])) + width++; + } + } + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + + ++row; + ++printed; + +show_callchain: + if (entry->leaf && folded_sign == '-' && row != browser->b.rows) { + struct callchain_print_arg carg = { + .row_offset = row_offset, + }; + + printed += hist_browser__show_callchain(browser, entry, + level + 1, row, + hist_browser__show_callchain_entry, &carg, + hist_browser__check_output_full); } return printed; } +static int hist_browser__show_no_entry(struct hist_browser *browser, + unsigned short row, int level) +{ + int width = browser->b.width; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + bool first = true; + int column = 0; + int ret; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + int indent = browser->hists->nr_hpp_node - 2; + + if (current_entry) { + browser->he_selection = NULL; + browser->selection = NULL; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&browser->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, browser->hists) || + column++ < browser->b.horiz_scroll) + continue; + + ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists)); + + if (first) { + /* for folded sign */ + first = false; + ret++; + } else { + /* space between columns */ + ret += 2; + } + + ui_browser__write_nstring(&browser->b, "", ret); + width -= ret; + } + + ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT); + width -= indent * HIERARCHY_INDENT; + + if (column >= browser->b.horiz_scroll) { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt); + ui_browser__printf(&browser->b, " %s", buf); + width -= ret + 2; + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + return 1; +} + static int advance_hpp_check(struct perf_hpp *hpp, int inc) { advance_hpp(hpp, inc); @@ -1144,7 +1527,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll) continue; @@ -1160,11 +1543,96 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } +static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size) +{ + struct hists *hists = browser->hists; + struct perf_hpp dummy_hpp = { + .buf = buf, + .size = size, + }; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + size_t ret = 0; + int column = 0; + int indent = hists->nr_hpp_node - 2; + bool first_node, first_col; + + ret = scnprintf(buf, size, " "); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (column++ < browser->b.horiz_scroll) + continue; + + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " "); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", + indent * HIERARCHY_INDENT, ""); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / "); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + first_node = false; + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + char *start; + + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+"); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + first_col = false; + + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + dummy_hpp.buf[ret] = '\0'; + rtrim(dummy_hpp.buf); + + start = ltrim(dummy_hpp.buf); + ret = strlen(start); + + if (start != dummy_hpp.buf) + memmove(dummy_hpp.buf, start, ret + 1); + + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + } + + return ret; +} + static void hist_browser__show_headers(struct hist_browser *browser) { char headers[1024]; - hists_browser__scnprintf_headers(browser, headers, sizeof(headers)); + if (symbol_conf.report_hierarchy) + hists_browser__scnprintf_hierarchy_headers(browser, headers, + sizeof(headers)); + else + hists_browser__scnprintf_headers(browser, headers, + sizeof(headers)); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); @@ -1196,18 +1664,34 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) hb->he_selection = NULL; hb->selection = NULL; - for (nd = browser->top; nd; nd = rb_next(nd)) { + for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; - if (h->filtered) + if (h->filtered) { + /* let it move to sibling */ + h->unfolded = false; continue; + } percent = hist_entry__get_percent_limit(h); if (percent < hb->min_pcnt) continue; - row += hist_browser__show_entry(hb, h, row); + if (symbol_conf.report_hierarchy) { + row += hist_browser__show_hierarchy_entry(hb, h, row, + h->depth); + if (row == browser->rows) + break; + + if (h->has_no_entry) { + hist_browser__show_no_entry(hb, row, h->depth + 1); + row++; + } + } else { + row += hist_browser__show_entry(hb, h, row); + } + if (row == browser->rows) break; } @@ -1225,7 +1709,14 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, if (!h->filtered && percent >= min_pcnt) return nd; - nd = rb_next(nd); + /* + * If it's filtered, its all children also were filtered. + * So move to sibling node. + */ + if (rb_next(nd)) + nd = rb_next(nd); + else + nd = rb_hierarchy_next(nd); } return NULL; @@ -1241,7 +1732,7 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, if (!h->filtered && percent >= min_pcnt) return nd; - nd = rb_prev(nd); + nd = rb_hierarchy_prev(nd); } return NULL; @@ -1271,8 +1762,8 @@ static void ui_browser__hists_seek(struct ui_browser *browser, nd = browser->top; goto do_offset; case SEEK_END: - nd = hists__filter_prev_entries(rb_last(browser->entries), - hb->min_pcnt); + nd = rb_hierarchy_last(rb_last(browser->entries)); + nd = hists__filter_prev_entries(nd, hb->min_pcnt); first = false; break; default: @@ -1306,7 +1797,7 @@ do_offset: if (offset > 0) { do { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) { + if (h->unfolded && h->leaf) { u16 remaining = h->nr_rows - h->row_offset; if (offset > remaining) { offset -= remaining; @@ -1318,7 +1809,8 @@ do_offset: break; } } - nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); + nd = hists__filter_entries(rb_hierarchy_next(nd), + hb->min_pcnt); if (nd == NULL) break; --offset; @@ -1327,7 +1819,7 @@ do_offset: } else if (offset < 0) { while (1) { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) { + if (h->unfolded && h->leaf) { if (first) { if (-offset > h->row_offset) { offset += h->row_offset; @@ -1351,7 +1843,7 @@ do_offset: } } - nd = hists__filter_prev_entries(rb_prev(nd), + nd = hists__filter_prev_entries(rb_hierarchy_prev(nd), hb->min_pcnt); if (nd == NULL) break; @@ -1364,7 +1856,7 @@ do_offset: * row_offset at its last entry. */ h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) + if (h->unfolded && h->leaf) h->row_offset = h->nr_rows; break; } @@ -1378,17 +1870,14 @@ do_offset: } static int hist_browser__fprintf_callchain(struct hist_browser *browser, - struct hist_entry *he, FILE *fp) + struct hist_entry *he, FILE *fp, + int level) { - u64 total = hists__total_period(he->hists); struct callchain_print_arg arg = { .fp = fp, }; - if (symbol_conf.cumulate_callchain) - total = he->stat_acc->period; - - hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total, + hist_browser__show_callchain(browser, he, level, 0, hist_browser__fprintf_callchain_entry, &arg, hist_browser__check_dump_full); return arg.printed; @@ -1414,7 +1903,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -1425,12 +1914,71 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, first = false; ret = fmt->entry(fmt, &hpp, he); + ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret); advance_hpp(&hpp, ret); } - printed += fprintf(fp, "%s\n", rtrim(s)); + printed += fprintf(fp, "%s\n", s); if (folded_sign == '-') - printed += hist_browser__fprintf_callchain(browser, he, fp); + printed += hist_browser__fprintf_callchain(browser, he, fp, 1); + + return printed; +} + + +static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, + struct hist_entry *he, + FILE *fp, int level) +{ + char s[8192]; + int printed = 0; + char folded_sign = ' '; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + }; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + bool first = true; + int ret; + int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; + + printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, ""); + + folded_sign = hist_entry__folded(he); + printed += fprintf(fp, "%c", folded_sign); + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&he->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (!first) { + ret = scnprintf(hpp.buf, hpp.size, " "); + advance_hpp(&hpp, ret); + } else + first = false; + + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + } + + ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, ""); + advance_hpp(&hpp, ret); + + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + ret = scnprintf(hpp.buf, hpp.size, " "); + advance_hpp(&hpp, ret); + + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + } + + printed += fprintf(fp, "%s\n", rtrim(s)); + + if (he->leaf && folded_sign == '-') { + printed += hist_browser__fprintf_callchain(browser, he, fp, + he->depth + 1); + } return printed; } @@ -1444,8 +1992,16 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - printed += hist_browser__fprintf_entry(browser, h, fp); - nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); + if (symbol_conf.report_hierarchy) { + printed += hist_browser__fprintf_hierarchy_entry(browser, + h, fp, + h->depth); + } else { + printed += hist_browser__fprintf_entry(browser, h, fp); + } + + nd = hists__filter_entries(rb_hierarchy_next(nd), + browser->min_pcnt); } return printed; @@ -1580,11 +2136,18 @@ static int hists__browser_title(struct hists *hists, if (hists->uid_filter_str) printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); - if (thread) - printed += scnprintf(bf + printed, size - printed, + if (thread) { + if (sort__has_thread) { + printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), thread->tid); + } else { + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s", + (thread->comm_set ? thread__comm_str(thread) : "")); + } + } if (dso) printed += scnprintf(bf + printed, size - printed, ", DSO: %s", dso->short_name); @@ -1759,15 +2322,24 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + return 0; + if (browser->hists->thread_filter) { pstack__remove(browser->pstack, &browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid); + if (sort__has_thread) { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"", + thread->comm_set ? thread__comm_str(thread) : ""); + } + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(browser->pstack, &browser->hists->thread_filter); @@ -1782,13 +2354,22 @@ static int add_thread_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct thread *thread) { - if (thread == NULL) + int ret; + + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) return 0; - if (asprintf(optstr, "Zoom %s %s(%d) thread", - browser->hists->thread_filter ? "out of" : "into", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid) < 0) + if (sort__has_thread) { + ret = asprintf(optstr, "Zoom %s %s(%d) thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ret = asprintf(optstr, "Zoom %s %s thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : ""); + } + if (ret < 0) return 0; act->thread = thread; @@ -1801,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; + if (!sort__has_dso || map == NULL) + return 0; + if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); perf_hpp__set_elide(HISTC_DSO, false); @@ -1825,7 +2409,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -1850,7 +2434,7 @@ static int add_map_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) @@ -1952,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { + if (!sort__has_socket || act->socket < 0) + return 0; + if (browser->hists->socket_filter > -1) { pstack__remove(browser->pstack, &browser->hists->socket_filter); browser->hists->socket_filter = -1; @@ -1971,7 +2558,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (socket_id < 0) + if (!sort__has_socket || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", @@ -1989,17 +2576,60 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) u64 nr_entries = 0; struct rb_node *nd = rb_first(&hb->hists->entries); - if (hb->min_pcnt == 0) { + if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; return; } while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { nr_entries++; - nd = rb_next(nd); + nd = rb_hierarchy_next(nd); } hb->nr_non_filtered_entries = nr_entries; + hb->nr_hierarchy_entries = nr_entries; +} + +static void hist_browser__update_percent_limit(struct hist_browser *hb, + double percent) +{ + struct hist_entry *he; + struct rb_node *nd = rb_first(&hb->hists->entries); + u64 total = hists__total_period(hb->hists); + u64 min_callchain_hits = total * (percent / 100); + + hb->min_pcnt = callchain_param.min_percent = percent; + + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { + he = rb_entry(nd, struct hist_entry, rb_node); + + if (he->has_no_entry) { + he->has_no_entry = false; + he->nr_rows = 0; + } + + if (!he->leaf || !symbol_conf.use_callchain) + goto next; + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + +next: + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); + + /* force to re-evaluate folding state of callchains */ + he->init_have_children = false; + hist_entry__set_folding(he, hb, false); + } } static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, @@ -2037,6 +2667,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "L Change percent limit\n" \ "m Display context menu\n" \ "S Zoom into current Processor Socket\n" \ @@ -2077,7 +2708,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { perf_hpp__reset_width(fmt, hists); /* * This is done just once, and activates the horizontal scrolling @@ -2192,6 +2823,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, top->zero = !top->zero; } continue; + case 'L': + if (ui_browser__input_window("Percent Limit", + "Please enter the value you want to hide entries under that percent.", + buf, "ENTER: OK, ESC: Cancel", + delay_secs * 2) == K_ENTER) { + char *end; + double new_percent = strtod(buf, &end); + + if (new_percent < 0 || new_percent > 100) { + ui_browser__warning(&browser->b, delay_secs * 2, + "Invalid percent: %.2f", new_percent); + continue; + } + + hist_browser__update_percent_limit(browser, new_percent); + hist_browser__reset(browser); + } + continue; case K_F1: case 'h': case '?': @@ -2263,10 +2912,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym) - goto add_exit_option; - - if (browser->selection == NULL) + if (!sort__has_sym || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2306,11 +2952,16 @@ skip_annotation: &options[nr_options], socked_id); /* perf script support */ + if (!is_report_browser(hbt)) + goto skip_scripting; + if (browser->he_selection) { - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - thread, NULL); + if (sort__has_thread && thread) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + thread, NULL); + } /* * Note that browser->selection != NULL * when browser->he_selection is not NULL, @@ -2320,16 +2971,18 @@ skip_annotation: * * See hist_browser__show_entry. */ - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - NULL, browser->selection->sym); + if (sort__has_sym && browser->selection->sym) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); + } } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); -add_exit_option: +skip_scripting: nr_options += add_exit_opt(browser, &actions[nr_options], &options[nr_options]); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 0f8dcfdfb10f..2aa45b606fa4 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -55,7 +55,7 @@ static u64 he_get_acc_##_field(struct hist_entry *he) \ return he->stat_acc->_field; \ } \ \ -static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt, \ struct perf_hpp *hpp, \ struct hist_entry *he) \ { \ @@ -306,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, nr_cols = 0; - perf_hpp__for_each_format(fmt) + hists__for_each_format(hists, fmt) col_types[nr_cols++] = G_TYPE_STRING; store = gtk_tree_store_newv(nr_cols, col_types); @@ -317,7 +317,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -367,7 +367,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, h->hists)) continue; @@ -396,6 +396,194 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_container_add(GTK_CONTAINER(window), view); } +static void perf_gtk__add_hierarchy_entries(struct hists *hists, + struct rb_root *root, + GtkTreeStore *store, + GtkTreeIter *parent, + struct perf_hpp *hpp, + float min_pcnt) +{ + int col_idx = 0; + struct rb_node *node; + struct hist_entry *he; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + u64 total = hists__total_period(hists); + int size; + + for (node = rb_first(root); node; node = rb_next(node)) { + GtkTreeIter iter; + float percent; + char *bf; + + he = rb_entry(node, struct hist_entry, rb_node); + if (he->filtered) + continue; + + percent = hist_entry__get_percent_limit(he); + if (percent < min_pcnt) + continue; + + gtk_tree_store_append(store, &iter, parent); + + col_idx = 0; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1); + } + + bf = hpp->buf; + size = hpp->size; + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + int ret; + + if (fmt->color) + ret = fmt->color(fmt, hpp, he); + else + ret = fmt->entry(fmt, hpp, he); + + snprintf(hpp->buf + ret, hpp->size - ret, " "); + advance_hpp(hpp, ret + 2); + } + + gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1); + + if (!he->leaf) { + hpp->buf = bf; + hpp->size = size; + + perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, + store, &iter, hpp, + min_pcnt); + + if (!hist_entry__has_hierarchy_children(he, min_pcnt)) { + char buf[32]; + GtkTreeIter child; + + snprintf(buf, sizeof(buf), "no entry >= %.2f%%", + min_pcnt); + + gtk_tree_store_append(store, &child, &iter); + gtk_tree_store_set(store, &child, col_idx, buf, -1); + } + } + + if (symbol_conf.use_callchain && he->leaf) { + if (callchain_param.mode == CHAIN_GRAPH_REL) + total = symbol_conf.cumulate_callchain ? + he->stat_acc->period : he->stat.period; + + perf_gtk__add_callchain(&he->sorted_chain, store, &iter, + col_idx, total); + } + } + +} + +static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, + float min_pcnt) +{ + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + GType col_types[MAX_COLUMNS]; + GtkCellRenderer *renderer; + GtkTreeStore *store; + GtkWidget *view; + int col_idx; + int nr_cols = 0; + char s[512]; + char buf[512]; + bool first_node, first_col; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + }; + + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + col_types[nr_cols++] = G_TYPE_STRING; + } + col_types[nr_cols++] = G_TYPE_STRING; + + store = gtk_tree_store_newv(nr_cols, col_types); + view = gtk_tree_view_new(); + renderer = gtk_cell_renderer_text_new(); + + col_idx = 0; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, fmt->name, + renderer, "markup", + col_idx++, NULL); + } + + /* construct merged column header since sort keys share single column */ + buf[0] = '\0'; + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) + strcat(buf, " / "); + first_node = false; + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + strcat(buf, "+"); + first_col = false; + + fmt->header(fmt, &hpp, hists_to_evsel(hists)); + strcat(buf, ltrim(rtrim(hpp.buf))); + } + } + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, buf, + renderer, "markup", + col_idx++, NULL); + + for (col_idx = 0; col_idx < nr_cols; col_idx++) { + GtkTreeViewColumn *column; + + column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx); + gtk_tree_view_column_set_resizable(column, TRUE); + + if (col_idx == 0) { + gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view), + column); + } + } + + gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); + g_object_unref(GTK_TREE_MODEL(store)); + + perf_gtk__add_hierarchy_entries(hists, &hists->entries, store, + NULL, &hpp, min_pcnt); + + gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE); + + g_signal_connect(view, "row-activated", + G_CALLBACK(on_row_activated), NULL); + gtk_container_add(GTK_CONTAINER(window), view); +} + int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt __maybe_unused, @@ -463,7 +651,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, GTK_POLICY_AUTOMATIC, GTK_POLICY_AUTOMATIC); - perf_gtk__show_hists(scrolled_window, hists, min_pcnt); + if (symbol_conf.report_hierarchy) + perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt); + else + perf_gtk__show_hists(scrolled_window, hists, min_pcnt); tab_label = gtk_label_new(evname); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index bf2a66e254ea..3baeaa6e71b5 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -5,6 +5,7 @@ #include "../util/util.h" #include "../util/sort.h" #include "../util/evsel.h" +#include "../util/evlist.h" /* hist period print (hpp) functions */ @@ -371,7 +372,20 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } -#define HPP__COLOR_PRINT_FNS(_name, _fn) \ +static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a) +{ + return a->header == hpp__header_fn; +} + +static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b)) + return false; + + return a->idx == b->idx; +} + +#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -381,9 +395,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } -#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn) \ +#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -393,9 +409,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } -#define HPP__PRINT_FNS(_name, _fn) \ +#define HPP__PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -404,22 +422,25 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } struct perf_hpp_fmt perf_hpp__format[] = { - HPP__COLOR_PRINT_FNS("Overhead", overhead), - HPP__COLOR_PRINT_FNS("sys", overhead_sys), - HPP__COLOR_PRINT_FNS("usr", overhead_us), - HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys), - HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us), - HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc), - HPP__PRINT_FNS("Samples", samples), - HPP__PRINT_FNS("Period", period) + HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD), + HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS), + HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US), + HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS), + HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US), + HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC), + HPP__PRINT_FNS("Samples", samples, SAMPLES), + HPP__PRINT_FNS("Period", period, PERIOD) }; -LIST_HEAD(perf_hpp__list); -LIST_HEAD(perf_hpp__sort_list); - +struct perf_hpp_list perf_hpp_list = { + .fields = LIST_HEAD_INIT(perf_hpp_list.fields), + .sorts = LIST_HEAD_INIT(perf_hpp_list.sorts), +}; #undef HPP__COLOR_PRINT_FNS #undef HPP__COLOR_ACC_PRINT_FNS @@ -485,63 +506,60 @@ void perf_hpp__init(void) hpp_dimension__add_output(PERF_HPP__PERIOD); } -void perf_hpp__column_register(struct perf_hpp_fmt *format) +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - list_add_tail(&format->list, &perf_hpp__list); + list_add_tail(&format->list, &list->fields); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - list_del(&format->list); + list_add_tail(&format->sort_list, &list->sorts); } -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) -{ - list_add_tail(&format->sort_list, &perf_hpp__sort_list); -} - -void perf_hpp__column_enable(unsigned col) -{ - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_register(&perf_hpp__format[col]); -} - -void perf_hpp__column_disable(unsigned col) +void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_unregister(&perf_hpp__format[col]); + list_del(&format->list); } void perf_hpp__cancel_cumulate(void) { + struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp; + if (is_strict_order(field_order)) return; - perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); - perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead"; + ovh = &perf_hpp__format[PERF_HPP__OVERHEAD]; + acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC]; + + perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) { + if (acc->equal(acc, fmt)) { + perf_hpp__column_unregister(fmt); + continue; + } + + if (ovh->equal(ovh, fmt)) + fmt->name = "Overhead"; + } } -void perf_hpp__setup_output_field(void) +static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + return a->equal && a->equal(a, b); +} + +void perf_hpp__setup_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append sort keys to output field */ - perf_hpp__for_each_sort_list(fmt) { - if (!list_empty(&fmt->list)) - continue; - - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; + perf_hpp_list__for_each_sort_list(list, fmt) { + struct perf_hpp_fmt *pos; - perf_hpp__for_each_format(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) - goto next; - } + perf_hpp_list__for_each_format(list, pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__column_register(fmt); @@ -550,27 +568,17 @@ next: } } -void perf_hpp__append_sort_keys(void) +void perf_hpp__append_sort_keys(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append output fields to sort keys */ - perf_hpp__for_each_format(fmt) { - if (!list_empty(&fmt->sort_list)) - continue; - - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; + perf_hpp_list__for_each_format(list, fmt) { + struct perf_hpp_fmt *pos; - perf_hpp__for_each_sort_list(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) - goto next; - } + perf_hpp_list__for_each_sort_list(list, pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__register_sort_field(fmt); @@ -579,20 +587,29 @@ next: } } -void perf_hpp__reset_output_field(void) + +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + if (fmt->free) + fmt->free(fmt); +} + +void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; /* reset output fields */ - perf_hpp__for_each_format_safe(fmt, tmp) { + perf_hpp_list__for_each_format_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } /* reset sort keys */ - perf_hpp__for_each_sort_list_safe(fmt, tmp) { + perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } } @@ -606,7 +623,7 @@ unsigned int hists__sort_list_width(struct hists *hists) bool first = true; struct perf_hpp dummy_hpp; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -624,22 +641,39 @@ unsigned int hists__sort_list_width(struct hists *hists) return ret; } -void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) +unsigned int hists__overhead_width(struct hists *hists) { - int idx; - - if (perf_hpp__is_sort_entry(fmt)) - return perf_hpp__reset_sort_width(fmt, hists); + struct perf_hpp_fmt *fmt; + int ret = 0; + bool first = true; + struct perf_hpp dummy_hpp; - for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { - if (fmt == &perf_hpp__format[idx]) + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) break; + + if (first) + first = false; + else + ret += 2; + + ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); } - if (idx == PERF_HPP__MAX_INDEX) + return ret; +} + +void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) +{ + if (perf_hpp__is_sort_entry(fmt)) + return perf_hpp__reset_sort_width(fmt, hists); + + if (perf_hpp__is_dynamic_entry(fmt)) return; - switch (idx) { + BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX); + + switch (fmt->idx) { case PERF_HPP__OVERHEAD: case PERF_HPP__OVERHEAD_SYS: case PERF_HPP__OVERHEAD_US: @@ -667,7 +701,7 @@ void perf_hpp__set_user_width(const char *width_list_str) struct perf_hpp_fmt *fmt; const char *ptr = width_list_str; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { char *p; int len = strtol(ptr, &p, 10); @@ -679,3 +713,71 @@ void perf_hpp__set_user_width(const char *width_list_str) break; } } + +static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) +{ + struct perf_hpp_list_node *node = NULL; + struct perf_hpp_fmt *fmt_copy; + bool found = false; + bool skip = perf_hpp__should_skip(fmt, hists); + + list_for_each_entry(node, &hists->hpp_formats, list) { + if (node->level == fmt->level) { + found = true; + break; + } + } + + if (!found) { + node = malloc(sizeof(*node)); + if (node == NULL) + return -1; + + node->skip = skip; + node->level = fmt->level; + perf_hpp_list__init(&node->hpp); + + hists->nr_hpp_node++; + list_add_tail(&node->list, &hists->hpp_formats); + } + + fmt_copy = perf_hpp_fmt__dup(fmt); + if (fmt_copy == NULL) + return -1; + + if (!skip) + node->skip = false; + + list_add_tail(&fmt_copy->list, &node->hpp.fields); + list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts); + + return 0; +} + +int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct perf_hpp_fmt *fmt; + struct hists *hists; + int ret; + + if (!symbol_conf.report_hierarchy) + return 0; + + evlist__for_each(evlist, evsel) { + hists = evsel__hists(evsel); + + perf_hpp_list__for_each_sort_list(list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + + ret = add_hierarchy_fmt(hists, fmt); + if (ret < 0) + return ret; + } + } + + return 0; +} diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 387110d50b00..7aff5acf3265 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -165,8 +165,28 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, return ret; } +/* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + * + * However when percent-limit applied, it's possible that single callchain + * node have different (non-100% in fractal mode) percentage. + */ +static bool need_percent_display(struct rb_node *node, u64 parent_samples) +{ + struct callchain_node *cnode; + + if (rb_next(node)) + return true; + + cnode = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(cnode) != parent_samples; +} + static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) + u64 total_samples, u64 parent_samples, + int left_margin) { struct callchain_node *cnode; struct callchain_list *chain; @@ -177,13 +197,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, int ret = 0; char bf[1024]; - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ node = rb_first(root); - if (node && !rb_next(node)) { + if (node && !need_percent_display(node, parent_samples)) { cnode = rb_entry(node, struct callchain_node, rb_node); list_for_each_entry(chain, &cnode->val, list) { /* @@ -213,9 +228,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, root = &cnode->rb_root; } + if (callchain_param.mode == CHAIN_GRAPH_REL) + total_samples = parent_samples; + ret += __callchain__fprintf_graph(fp, root, total_samples, 1, 1, left_margin); - ret += fprintf(fp, "\n"); + if (ret) { + /* do not add a blank line if it printed nothing */ + ret += fprintf(fp, "\n"); + } return ret; } @@ -323,16 +344,19 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, u64 total_samples, int left_margin, FILE *fp) { + u64 parent_samples = he->stat.period; + + if (symbol_conf.cumulate_callchain) + parent_samples = he->stat_acc->period; + switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, - symbol_conf.cumulate_callchain ? - he->stat_acc->period : he->stat.period, - left_margin); + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + parent_samples, left_margin); break; case CHAIN_GRAPH_ABS: return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); + parent_samples, left_margin); break; case CHAIN_FLAT: return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); @@ -349,45 +373,66 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static size_t hist_entry__callchain_fprintf(struct hist_entry *he, - struct hists *hists, - FILE *fp) +static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) { - int left_margin = 0; - u64 total_period = hists->stats.total_period; + const char *sep = symbol_conf.field_sep; + struct perf_hpp_fmt *fmt; + char *start = hpp->buf; + int ret; + bool first = true; - if (field_order == NULL && (sort_order == NULL || - !prefixcmp(sort_order, "comm"))) { - struct perf_hpp_fmt *fmt; + if (symbol_conf.exclude_other && !he->parent) + return 0; - perf_hpp__for_each_format(fmt) { - if (!perf_hpp__is_sort_entry(fmt)) - continue; + hists__for_each_format(he->hists, fmt) { + if (perf_hpp__should_skip(fmt, he->hists)) + continue; - /* must be 'comm' sort entry */ - left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists)); - left_margin -= thread__comm_len(he->thread); - break; - } + /* + * If there's no field_sep, we still need + * to display initial ' '. + */ + if (!sep || !first) { + ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); + advance_hpp(hpp, ret); + } else + first = false; + + if (perf_hpp__use_color() && fmt->color) + ret = fmt->color(fmt, hpp, he); + else + ret = fmt->entry(fmt, hpp, he); + + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); + advance_hpp(hpp, ret); } - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); + + return hpp->buf - start; } -static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) +static int hist_entry__hierarchy_fprintf(struct hist_entry *he, + struct perf_hpp *hpp, + struct hists *hists, + FILE *fp) { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; - char *start = hpp->buf; - int ret; + struct perf_hpp_list_node *fmt_node; + char *buf = hpp->buf; + size_t size = hpp->size; + int ret, printed = 0; bool first = true; if (symbol_conf.exclude_other && !he->parent) return 0; - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt, he->hists)) - continue; + ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, ""); + advance_hpp(hpp, ret); + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { /* * If there's no field_sep, we still need * to display initial ' '. @@ -403,10 +448,47 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) else ret = fmt->entry(fmt, hpp, he); + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); advance_hpp(hpp, ret); } - return hpp->buf - start; + if (!sep) + ret = scnprintf(hpp->buf, hpp->size, "%*s", + (hists->nr_hpp_node - 2) * HIERARCHY_INDENT, ""); + advance_hpp(hpp, ret); + + printed += fprintf(fp, "%s", buf); + + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + hpp->buf = buf; + hpp->size = size; + + /* + * No need to call hist_entry__snprintf_alignment() since this + * fmt is always the last column in the hierarchy mode. + */ + if (perf_hpp__use_color() && fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + /* + * dynamic entries are right-aligned but we want left-aligned + * in the hierarchy mode + */ + printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf)); + } + printed += putc('\n', fp); + + if (symbol_conf.use_callchain && he->leaf) { + u64 total = hists__total_period(hists); + + printed += hist_entry_callchain__fprintf(he, total, 0, fp); + goto out; + } + +out: + return printed; } static int hist_entry__fprintf(struct hist_entry *he, size_t size, @@ -418,24 +500,134 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .buf = bf, .size = size, }; + u64 total_period = hists->stats.total_period; if (size == 0 || size > bfsz) size = hpp.size = bfsz; + if (symbol_conf.report_hierarchy) + return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); + hist_entry__snprintf(he, &hpp); ret = fprintf(fp, "%s\n", bf); if (symbol_conf.use_callchain) - ret += hist_entry__callchain_fprintf(he, hists, fp); + ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); return ret; } +static int print_hierarchy_indent(const char *sep, int indent, + const char *line, FILE *fp) +{ + if (sep != NULL || indent < 2) + return 0; + + return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line); +} + +static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, + const char *sep, FILE *fp) +{ + bool first_node, first_col; + int indent; + int depth; + unsigned width = 0; + unsigned header_width = 0; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + + indent = hists->nr_hpp_node; + + /* preserve max indent depth for column headers */ + print_hierarchy_indent(sep, indent, spaces, fp); + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + fmt->header(fmt, hpp, hists_to_evsel(hists)); + fprintf(fp, "%s%s", hpp->buf, sep ?: " "); + } + + /* combine sort headers with ' / ' */ + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) + header_width += fprintf(fp, " / "); + first_node = false; + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + header_width += fprintf(fp, "+"); + first_col = false; + + fmt->header(fmt, hpp, hists_to_evsel(hists)); + rtrim(hpp->buf); + + header_width += fprintf(fp, "%s", ltrim(hpp->buf)); + } + } + + fprintf(fp, "\n# "); + + /* preserve max indent depth for initial dots */ + print_hierarchy_indent(sep, indent, dots, fp); + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (!first_col) + fprintf(fp, "%s", sep ?: ".."); + first_col = false; + + width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + fprintf(fp, "%.*s", width, dots); + } + + depth = 0; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + first_col = true; + width = depth * HIERARCHY_INDENT; + + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + width++; /* for '+' sign between column header */ + first_col = false; + + width += fmt->width(fmt, hpp, hists_to_evsel(hists)); + } + + if (width > header_width) + header_width = width; + + depth++; + } + + fprintf(fp, "%s%-.*s", sep ?: " ", header_width, dots); + + fprintf(fp, "\n#\n"); + + return 2; +} + size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp) { struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; struct rb_node *nd; size_t ret = 0; unsigned int width; @@ -449,10 +641,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, bool first = true; size_t linesz; char *line = NULL; + unsigned indent; init_rem_hits(); - perf_hpp__for_each_format(fmt) + hists__for_each_format(hists, fmt) perf_hpp__reset_width(fmt, hists); if (symbol_conf.col_width_list_str) @@ -463,7 +656,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + if (symbol_conf.report_hierarchy) { + list_for_each_entry(fmt_node, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + perf_hpp__reset_width(fmt, hists); + } + nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp); + goto print_entries; + } + + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -487,7 +689,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { unsigned int i; if (perf_hpp__should_skip(fmt, hists)) @@ -520,7 +722,9 @@ print_entries: goto out; } - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + indent = hists__overhead_width(hists) + 4; + + for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; @@ -536,6 +740,20 @@ print_entries: if (max_rows && ++nr_rows >= max_rows) break; + /* + * If all children are filtered out or percent-limited, + * display "no entry >= x.xx%" message. + */ + if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { + int depth = hists->nr_hpp_node + h->depth + 1; + + print_hierarchy_indent(sep, depth, spaces, fp); + fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); + + if (max_rows && ++nr_rows >= max_rows) + break; + } + if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(h->thread->mg, MAP__FUNCTION, fp); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5eec53a3f4ac..da48fd843438 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -libperf-y += abspath.o libperf-y += alias.o libperf-y += annotate.o libperf-y += build-id.o @@ -82,6 +81,7 @@ libperf-y += parse-branch-options.o libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o +libperf-y += mem-events.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o @@ -105,8 +105,17 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o +libperf-y += demangle-java.o + +ifdef CONFIG_JITDUMP +libperf-$(CONFIG_LIBELF) += jitdump.o +libperf-$(CONFIG_LIBELF) += genelf.o +libperf-$(CONFIG_LIBELF) += genelf_debug.o +endif CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +# avoid compiler warnings in 32-bit mode +CFLAGS_genelf_debug.o += -Wno-packed $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(call rule_mkdir) diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c deleted file mode 100644 index 0e76affe9c36..000000000000 --- a/tools/perf/util/abspath.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "cache.h" - -static const char *get_pwd_cwd(void) -{ - static char cwd[PATH_MAX + 1]; - char *pwd; - struct stat cwd_stat, pwd_stat; - if (getcwd(cwd, PATH_MAX) == NULL) - return NULL; - pwd = getenv("PWD"); - if (pwd && strcmp(pwd, cwd)) { - stat(cwd, &cwd_stat); - if (!stat(pwd, &pwd_stat) && - pwd_stat.st_dev == cwd_stat.st_dev && - pwd_stat.st_ino == cwd_stat.st_ino) { - strlcpy(cwd, pwd, PATH_MAX); - } - } - return cwd; -} - -const char *make_nonrelative_path(const char *path) -{ - static char buf[PATH_MAX + 1]; - - if (is_absolute_path(path)) { - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } else { - const char *cwd = get_pwd_cwd(); - if (!cwd) - die("Cannot determine the current working directory"); - if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } - return buf; -} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cea323d9ee7e..9241f8c2b7e1 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -158,7 +158,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); int hist_entry__annotate(struct hist_entry *he, size_t privsize); -int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym); +int symbol__annotate_init(struct map *map, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 360fda01f3b0..ec164fe70718 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -478,10 +478,11 @@ void auxtrace_heap__pop(struct auxtrace_heap *heap) heap_array[last].ordinal); } -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr) +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist) { if (itr) - return itr->info_priv_size(itr); + return itr->info_priv_size(itr, evlist); return 0; } @@ -852,7 +853,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, int err; pr_debug2("Synthesizing auxtrace information\n"); - priv_size = auxtrace_record__info_priv_size(itr); + priv_size = auxtrace_record__info_priv_size(itr, session->evlist); ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size); if (!ev) return -ENOMEM; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index b86f90db1352..57ff31ecb8e4 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -293,7 +293,8 @@ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); - size_t (*info_priv_size)(struct auxtrace_record *itr); + size_t (*info_priv_size)(struct auxtrace_record *itr, + struct perf_evlist *evlist); int (*info_fill)(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -429,7 +430,8 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_record__options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr); +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist); int auxtrace_record__info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -515,7 +517,7 @@ static inline void auxtrace__free(struct perf_session *session) static inline struct auxtrace_record * auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, - int *err __maybe_unused) + int *err) { *err = 0; return NULL; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 540a7efa657e..0967ce601931 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -7,6 +7,7 @@ #include <linux/bpf.h> #include <bpf/libbpf.h> +#include <bpf/bpf.h> #include <linux/err.h> #include <linux/string.h> #include "perf.h" @@ -16,6 +17,7 @@ #include "llvm-utils.h" #include "probe-event.h" #include "probe-finder.h" // for MAX_PROBES +#include "parse-events.h" #include "llvm-utils.h" #define DEFINE_PRINT_FN(name, level) \ @@ -108,8 +110,8 @@ void bpf__clear(void) } static void -bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused, - void *_priv) +clear_prog_priv(struct bpf_program *prog __maybe_unused, + void *_priv) { struct bpf_prog_priv *priv = _priv; @@ -337,7 +339,7 @@ config_bpf_program(struct bpf_program *prog) } pr_debug("bpf: config '%s' is ok\n", config_str); - err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear); + err = bpf_program__set_private(prog, priv, clear_prog_priv); if (err) { pr_debug("Failed to set priv for program '%s'\n", config_str); goto errout; @@ -739,6 +741,682 @@ int bpf__foreach_tev(struct bpf_object *obj, return 0; } +enum bpf_map_op_type { + BPF_MAP_OP_SET_VALUE, + BPF_MAP_OP_SET_EVSEL, +}; + +enum bpf_map_key_type { + BPF_MAP_KEY_ALL, + BPF_MAP_KEY_RANGES, +}; + +struct bpf_map_op { + struct list_head list; + enum bpf_map_op_type op_type; + enum bpf_map_key_type key_type; + union { + struct parse_events_array array; + } k; + union { + u64 value; + struct perf_evsel *evsel; + } v; +}; + +struct bpf_map_priv { + struct list_head ops_list; +}; + +static void +bpf_map_op__delete(struct bpf_map_op *op) +{ + if (!list_empty(&op->list)) + list_del(&op->list); + if (op->key_type == BPF_MAP_KEY_RANGES) + parse_events__clear_array(&op->k.array); + free(op); +} + +static void +bpf_map_priv__purge(struct bpf_map_priv *priv) +{ + struct bpf_map_op *pos, *n; + + list_for_each_entry_safe(pos, n, &priv->ops_list, list) { + list_del_init(&pos->list); + bpf_map_op__delete(pos); + } +} + +static void +bpf_map_priv__clear(struct bpf_map *map __maybe_unused, + void *_priv) +{ + struct bpf_map_priv *priv = _priv; + + bpf_map_priv__purge(priv); + free(priv); +} + +static int +bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) +{ + op->key_type = BPF_MAP_KEY_ALL; + if (!term) + return 0; + + if (term->array.nr_ranges) { + size_t memsz = term->array.nr_ranges * + sizeof(op->k.array.ranges[0]); + + op->k.array.ranges = memdup(term->array.ranges, memsz); + if (!op->k.array.ranges) { + pr_debug("No enough memory to alloc indices for map\n"); + return -ENOMEM; + } + op->key_type = BPF_MAP_KEY_RANGES; + op->k.array.nr_ranges = term->array.nr_ranges; + } + return 0; +} + +static struct bpf_map_op * +bpf_map_op__new(struct parse_events_term *term) +{ + struct bpf_map_op *op; + int err; + + op = zalloc(sizeof(*op)); + if (!op) { + pr_debug("Failed to alloc bpf_map_op\n"); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&op->list); + + err = bpf_map_op_setkey(op, term); + if (err) { + free(op); + return ERR_PTR(err); + } + return op; +} + +static int +bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) +{ + struct bpf_map_priv *priv; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + err = bpf_map__get_private(map, (void **)&priv); + if (err) { + pr_debug("Failed to get private from map %s\n", map_name); + return err; + } + + if (!priv) { + priv = zalloc(sizeof(*priv)); + if (!priv) { + pr_debug("No enough memory to alloc map private\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&priv->ops_list); + + if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) { + free(priv); + return -BPF_LOADER_ERRNO__INTERNAL; + } + } + + list_add_tail(&op->list, &priv->ops_list); + return 0; +} + +static struct bpf_map_op * +bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term) +{ + struct bpf_map_op *op; + int err; + + op = bpf_map_op__new(term); + if (IS_ERR(op)) + return op; + + err = bpf_map__add_op(map, op); + if (err) { + bpf_map_op__delete(op); + return ERR_PTR(err); + } + return op; +} + +static int +__bpf_map__config_value(struct bpf_map *map, + struct parse_events_term *term) +{ + struct bpf_map_def def; + struct bpf_map_op *op; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("Unable to get map definition from '%s'\n", + map_name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (def.type != BPF_MAP_TYPE_ARRAY) { + pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", + map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + if (def.key_size < sizeof(unsigned int)) { + pr_debug("Map %s has incorrect key size\n", map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; + } + switch (def.value_size) { + case 1: + case 2: + case 4: + case 8: + break; + default: + pr_debug("Map %s has incorrect value size\n", map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; + } + + op = bpf_map__add_newop(map, term); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_VALUE; + op->v.value = term->val.num; + return 0; +} + +static int +bpf_map__config_value(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist __maybe_unused) +{ + if (!term->err_val) { + pr_debug("Config value not set\n"); + return -BPF_LOADER_ERRNO__OBJCONF_CONF; + } + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { + pr_debug("ERROR: wrong value type for 'value'\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; + } + + return __bpf_map__config_value(map, term); +} + +static int +__bpf_map__config_event(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct bpf_map_def def; + struct bpf_map_op *op; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); + if (!evsel) { + pr_debug("Event (for '%s') '%s' doesn't exist\n", + map_name, term->val.str); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("Unable to get map definition from '%s'\n", + map_name); + return err; + } + + /* + * No need to check key_size and value_size: + * kernel has already checked them. + */ + if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", + map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + + op = bpf_map__add_newop(map, term); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_EVSEL; + op->v.evsel = evsel; + return 0; +} + +static int +bpf_map__config_event(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist) +{ + if (!term->err_val) { + pr_debug("Config value not set\n"); + return -BPF_LOADER_ERRNO__OBJCONF_CONF; + } + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) { + pr_debug("ERROR: wrong value type for 'event'\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; + } + + return __bpf_map__config_event(map, term, evlist); +} + +struct bpf_obj_config__map_func { + const char *config_opt; + int (*config_func)(struct bpf_map *, struct parse_events_term *, + struct perf_evlist *); +}; + +struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { + {"value", bpf_map__config_value}, + {"event", bpf_map__config_event}, +}; + +static int +config_map_indices_range_check(struct parse_events_term *term, + struct bpf_map *map, + const char *map_name) +{ + struct parse_events_array *array = &term->array; + struct bpf_map_def def; + unsigned int i; + int err; + + if (!array->nr_ranges) + return 0; + if (!array->ranges) { + pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n", + map_name, (int)array->nr_ranges); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("ERROR: Unable to get map definition from '%s'\n", + map_name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + for (i = 0; i < array->nr_ranges; i++) { + unsigned int start = array->ranges[i].start; + size_t length = array->ranges[i].length; + unsigned int idx = start + length - 1; + + if (idx >= def.max_entries) { + pr_debug("ERROR: index %d too large\n", idx); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; + } + } + return 0; +} + +static int +bpf__obj_config_map(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *key_scan_pos) +{ + /* key is "map:<mapname>.<config opt>" */ + char *map_name = strdup(term->config + sizeof("map:") - 1); + struct bpf_map *map; + int err = -BPF_LOADER_ERRNO__OBJCONF_OPT; + char *map_opt; + size_t i; + + if (!map_name) + return -ENOMEM; + + map_opt = strchr(map_name, '.'); + if (!map_opt) { + pr_debug("ERROR: Invalid map config: %s\n", map_name); + goto out; + } + + *map_opt++ = '\0'; + if (*map_opt == '\0') { + pr_debug("ERROR: Invalid map option: %s\n", term->config); + goto out; + } + + map = bpf_object__get_map_by_name(obj, map_name); + if (!map) { + pr_debug("ERROR: Map %s doesn't exist\n", map_name); + err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; + goto out; + } + + *key_scan_pos += strlen(map_opt); + err = config_map_indices_range_check(term, map, map_name); + if (err) + goto out; + *key_scan_pos -= strlen(map_opt); + + for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { + struct bpf_obj_config__map_func *func = + &bpf_obj_config__map_funcs[i]; + + if (strcmp(map_opt, func->config_opt) == 0) { + err = func->config_func(map, term, evlist); + goto out; + } + } + + pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); + err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; +out: + free(map_name); + if (!err) + key_scan_pos += strlen(map_opt); + return err; +} + +int bpf__config_obj(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *error_pos) +{ + int key_scan_pos = 0; + int err; + + if (!obj || !term || !term->config) + return -EINVAL; + + if (!prefixcmp(term->config, "map:")) { + key_scan_pos = sizeof("map:") - 1; + err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos); + goto out; + } + err = -BPF_LOADER_ERRNO__OBJCONF_OPT; +out: + if (error_pos) + *error_pos = key_scan_pos; + return err; + +} + +typedef int (*map_config_func_t)(const char *name, int map_fd, + struct bpf_map_def *pdef, + struct bpf_map_op *op, + void *pkey, void *arg); + +static int +foreach_key_array_all(map_config_func_t func, + void *arg, const char *name, + int map_fd, struct bpf_map_def *pdef, + struct bpf_map_op *op) +{ + unsigned int i; + int err; + + for (i = 0; i < pdef->max_entries; i++) { + err = func(name, map_fd, pdef, op, &i, arg); + if (err) { + pr_debug("ERROR: failed to insert value to %s[%u]\n", + name, i); + return err; + } + } + return 0; +} + +static int +foreach_key_array_ranges(map_config_func_t func, void *arg, + const char *name, int map_fd, + struct bpf_map_def *pdef, + struct bpf_map_op *op) +{ + unsigned int i, j; + int err; + + for (i = 0; i < op->k.array.nr_ranges; i++) { + unsigned int start = op->k.array.ranges[i].start; + size_t length = op->k.array.ranges[i].length; + + for (j = 0; j < length; j++) { + unsigned int idx = start + j; + + err = func(name, map_fd, pdef, op, &idx, arg); + if (err) { + pr_debug("ERROR: failed to insert value to %s[%u]\n", + name, idx); + return err; + } + } + } + return 0; +} + +static int +bpf_map_config_foreach_key(struct bpf_map *map, + map_config_func_t func, + void *arg) +{ + int err, map_fd; + const char *name; + struct bpf_map_op *op; + struct bpf_map_def def; + struct bpf_map_priv *priv; + + name = bpf_map__get_name(map); + + err = bpf_map__get_private(map, (void **)&priv); + if (err) { + pr_debug("ERROR: failed to get private from map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + if (!priv || list_empty(&priv->ops_list)) { + pr_debug("INFO: nothing to config for map %s\n", name); + return 0; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("ERROR: failed to get definition from map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + map_fd = bpf_map__get_fd(map); + if (map_fd < 0) { + pr_debug("ERROR: failed to get fd from map %s\n", name); + return map_fd; + } + + list_for_each_entry(op, &priv->ops_list, list) { + switch (def.type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + switch (op->key_type) { + case BPF_MAP_KEY_ALL: + err = foreach_key_array_all(func, arg, name, + map_fd, &def, op); + break; + case BPF_MAP_KEY_RANGES: + err = foreach_key_array_ranges(func, arg, name, + map_fd, &def, + op); + break; + default: + pr_debug("ERROR: keytype for map '%s' invalid\n", + name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + if (err) + return err; + break; + default: + pr_debug("ERROR: type of '%s' incorrect\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + } + + return 0; +} + +static int +apply_config_value_for_key(int map_fd, void *pkey, + size_t val_size, u64 val) +{ + int err = 0; + + switch (val_size) { + case 1: { + u8 _val = (u8)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 2: { + u16 _val = (u16)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 4: { + u32 _val = (u32)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 8: { + err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY); + break; + } + default: + pr_debug("ERROR: invalid value size\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; + } + if (err && errno) + err = -errno; + return err; +} + +static int +apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, + struct perf_evsel *evsel) +{ + struct xyarray *xy = evsel->fd; + struct perf_event_attr *attr; + unsigned int key, events; + bool check_pass = false; + int *evt_fd; + int err; + + if (!xy) { + pr_debug("ERROR: evsel not ready for map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (xy->row_size / xy->entry_size != 1) { + pr_debug("ERROR: Dimension of target event is incorrect for map %s\n", + name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM; + } + + attr = &evsel->attr; + if (attr->inherit) { + pr_debug("ERROR: Can't put inherit event into map %s\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; + } + + if (perf_evsel__is_bpf_output(evsel)) + check_pass = true; + if (attr->type == PERF_TYPE_RAW) + check_pass = true; + if (attr->type == PERF_TYPE_HARDWARE) + check_pass = true; + if (!check_pass) { + pr_debug("ERROR: Event type is wrong for map %s\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE; + } + + events = xy->entries / (xy->row_size / xy->entry_size); + key = *((unsigned int *)pkey); + if (key >= events) { + pr_debug("ERROR: there is no event %d for map %s\n", + key, name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE; + } + evt_fd = xyarray__entry(xy, key, 0); + err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY); + if (err && errno) + err = -errno; + return err; +} + +static int +apply_obj_config_map_for_key(const char *name, int map_fd, + struct bpf_map_def *pdef __maybe_unused, + struct bpf_map_op *op, + void *pkey, void *arg __maybe_unused) +{ + int err; + + switch (op->op_type) { + case BPF_MAP_OP_SET_VALUE: + err = apply_config_value_for_key(map_fd, pkey, + pdef->value_size, + op->v.value); + break; + case BPF_MAP_OP_SET_EVSEL: + err = apply_config_evsel_for_key(name, map_fd, pkey, + op->v.evsel); + break; + default: + pr_debug("ERROR: unknown value type for '%s'\n", name); + err = -BPF_LOADER_ERRNO__INTERNAL; + } + return err; +} + +static int +apply_obj_config_map(struct bpf_map *map) +{ + return bpf_map_config_foreach_key(map, + apply_obj_config_map_for_key, + NULL); +} + +static int +apply_obj_config_object(struct bpf_object *obj) +{ + struct bpf_map *map; + int err; + + bpf_map__for_each(map, obj) { + err = apply_obj_config_map(map); + if (err) + return err; + } + return 0; +} + +int bpf__apply_obj_config(void) +{ + struct bpf_object *obj, *tmp; + int err; + + bpf_object__for_each_safe(obj, tmp) { + err = apply_obj_config_object(obj); + if (err) + return err; + } + + return 0; +} + #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) #define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) @@ -753,6 +1431,20 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue", [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program", [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue", + [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option", + [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')", + [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option", + [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist", + [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map", + [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", + [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", + [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", + [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting", + [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map", + [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large", }; static int @@ -872,3 +1564,29 @@ int bpf__strerror_load(struct bpf_object *obj, bpf__strerror_end(buf, size); return 0; } + +int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused, int err, + char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, + "Can't use this config term with this map type"); + bpf__strerror_end(buf, size); + return 0; +} + +int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, + "Cannot set event to BPF map in multi-thread tracing"); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, + "%s (Hint: use -i to turn off inherit)", emsg); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, + "Can only put raw, hardware and BPF output event into a BPF map"); + bpf__strerror_end(buf, size); + return 0; +} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 6fdc0457e2b6..be4311944e3d 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -10,6 +10,7 @@ #include <string.h> #include <bpf/libbpf.h> #include "probe-event.h" +#include "evlist.h" #include "debug.h" enum bpf_loader_errno { @@ -24,10 +25,25 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */ BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */ BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */ + BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */ + BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */ + BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object map config option */ + BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */ + BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */ + BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ + BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ + BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ + BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */ + BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */ + BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ __BPF_LOADER_ERRNO__END, }; struct bpf_object; +struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev, @@ -53,6 +69,16 @@ int bpf__strerror_load(struct bpf_object *obj, int err, char *buf, size_t size); int bpf__foreach_tev(struct bpf_object *obj, bpf_prog_iter_callback_t func, void *arg); + +int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term, + struct perf_evlist *evlist, int *error_pos); +int bpf__strerror_config_obj(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *error_pos, int err, char *buf, + size_t size); +int bpf__apply_obj_config(void); +int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); #else static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, @@ -84,6 +110,21 @@ bpf__foreach_tev(struct bpf_object *obj __maybe_unused, } static inline int +bpf__config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused) +{ + return 0; +} + +static inline int +bpf__apply_obj_config(void) +{ + return 0; +} + +static inline int __bpf_strerror(char *buf, size_t size) { if (!size) @@ -118,5 +159,23 @@ static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused, { return __bpf_strerror(buf, size); } + +static inline int +bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused, + int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} + +static inline int +bpf__strerror_apply_obj_config(int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} #endif #endif diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6a7e273a514a..0573c2ec861d 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -28,7 +28,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, struct machine *machine) { struct addr_location al; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -38,7 +37,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) al.map->dso->hit = 1; @@ -166,6 +165,50 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return build_id__filename(build_id_hex, bf, size); } +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) +{ + char *id_name, *ch; + struct stat sb; + + id_name = dso__build_id_filename(dso, bf, size); + if (!id_name) + goto err; + if (access(id_name, F_OK)) + goto err; + if (lstat(id_name, &sb) == -1) + goto err; + if ((size_t)sb.st_size > size - 1) + goto err; + if (readlink(id_name, bf, size - 1) < 0) + goto err; + + bf[sb.st_size] = '\0'; + + /* + * link should be: + * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 + */ + ch = strrchr(bf, '/'); + if (!ch) + goto err; + if (ch - 3 < bf) + goto err; + + return strncmp(".ko", ch - 3, 3) == 0; +err: + /* + * If dso__build_id_filename work, get id_name again, + * because id_name points to bf and is broken. + */ + if (id_name) + id_name = dso__build_id_filename(dso, bf, size); + pr_err("Invalid build id: %s\n", id_name ? : + dso->long_name ? : + dso->short_name ? : + "[unknown]"); + return false; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -211,6 +254,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd) dsos__for_each_with_build_id(pos, &machine->dsos.head) { const char *name; size_t name_len; + bool in_kernel = false; if (!pos->hit) continue; @@ -227,8 +271,11 @@ static int machine__write_buildid_table(struct machine *machine, int fd) name_len = pos->long_name_len + 1; } + in_kernel = pos->kernel || + is_kernel_module(name, + PERF_RECORD_MISC_CPUMODE_UNKNOWN); err = write_buildid(name, name_len, pos->build_id, machine->pid, - pos->kernel ? kmisc : umisc, fd); + in_kernel ? kmisc : umisc, fd); if (err) break; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 27a14a8a945b..64af3e20610d 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -16,6 +16,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 07b5d63947b1..1f5a93c2c9a2 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -23,14 +23,17 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" +extern const char *config_exclusive_filename; + typedef int (*config_fn_t)(const char *, const char *, void *); -extern int perf_default_config(const char *, const char *, void *); -extern int perf_config(config_fn_t fn, void *); -extern int perf_config_int(const char *, const char *); -extern u64 perf_config_u64(const char *, const char *); -extern int perf_config_bool(const char *, const char *); -extern int config_error_nonbool(const char *); -extern const char *perf_config_dirname(const char *, const char *); +int perf_default_config(const char *, const char *, void *); +int perf_config(config_fn_t fn, void *); +int perf_config_int(const char *, const char *); +u64 perf_config_u64(const char *, const char *); +int perf_config_bool(const char *, const char *); +int config_error_nonbool(const char *); +const char *perf_config_dirname(const char *, const char *); +const char *perf_etc_perfconfig(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); @@ -61,13 +64,9 @@ static inline int is_absolute_path(const char *path) return path[0] == '/'; } -const char *make_nonrelative_path(const char *path); char *strip_path_suffix(const char *path, const char *suffix); -extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); - -extern char *perf_pathdup(const char *fmt, ...) - __attribute__((format (printf, 1, 2))); +char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 53c43eb9489e..24b4bd0d7754 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -416,7 +416,7 @@ create_child(struct callchain_node *parent, bool inherit_children) /* * Fill the node with callchain values */ -static void +static int fill_node(struct callchain_node *node, struct callchain_cursor *cursor) { struct callchain_cursor_node *cursor_node; @@ -433,7 +433,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call = zalloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); - return; + return -1; } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; @@ -443,6 +443,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) callchain_cursor_advance(cursor); cursor_node = callchain_cursor_current(cursor); } + return 0; } static struct callchain_node * @@ -453,7 +454,19 @@ add_child(struct callchain_node *parent, struct callchain_node *new; new = create_child(parent, false); - fill_node(new, cursor); + if (new == NULL) + return NULL; + + if (fill_node(new, cursor) < 0) { + struct callchain_list *call, *tmp; + + list_for_each_entry_safe(call, tmp, &new->val, list) { + list_del(&call->list); + free(call); + } + free(new); + return NULL; + } new->children_hit = 0; new->hit = period; @@ -462,16 +475,32 @@ add_child(struct callchain_node *parent, return new; } -static s64 match_chain(struct callchain_cursor_node *node, - struct callchain_list *cnode) +enum match_result { + MATCH_ERROR = -1, + MATCH_EQ, + MATCH_LT, + MATCH_GT, +}; + +static enum match_result match_chain(struct callchain_cursor_node *node, + struct callchain_list *cnode) { struct symbol *sym = node->sym; + u64 left, right; if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) - return cnode->ms.sym->start - sym->start; - else - return cnode->ip - node->ip; + callchain_param.key == CCKEY_FUNCTION) { + left = cnode->ms.sym->start; + right = sym->start; + } else { + left = cnode->ip; + right = node->ip; + } + + if (left == right) + return MATCH_EQ; + + return left > right ? MATCH_GT : MATCH_LT; } /* @@ -479,7 +508,7 @@ static s64 match_chain(struct callchain_cursor_node *node, * give a part of its callchain to the created child. * Then create another child to host the given callchain of new branch */ -static void +static int split_add_child(struct callchain_node *parent, struct callchain_cursor *cursor, struct callchain_list *to_split, @@ -491,6 +520,8 @@ split_add_child(struct callchain_node *parent, /* split */ new = create_child(parent, true); + if (new == NULL) + return -1; /* split the callchain and move a part to the new child */ old_tail = parent->val.prev; @@ -524,6 +555,8 @@ split_add_child(struct callchain_node *parent, node = callchain_cursor_current(cursor); new = add_child(parent, cursor, period); + if (new == NULL) + return -1; /* * This is second child since we moved parent's children @@ -534,7 +567,7 @@ split_add_child(struct callchain_node *parent, cnode = list_first_entry(&first->val, struct callchain_list, list); - if (match_chain(node, cnode) < 0) + if (match_chain(node, cnode) == MATCH_LT) pp = &p->rb_left; else pp = &p->rb_right; @@ -545,14 +578,15 @@ split_add_child(struct callchain_node *parent, parent->hit = period; parent->count = 1; } + return 0; } -static int +static enum match_result append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period); -static void +static int append_chain_children(struct callchain_node *root, struct callchain_cursor *cursor, u64 period) @@ -564,36 +598,42 @@ append_chain_children(struct callchain_node *root, node = callchain_cursor_current(cursor); if (!node) - return; + return -1; /* lookup in childrens */ while (*p) { - s64 ret; + enum match_result ret; parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node_in); /* If at least first entry matches, rely to children */ ret = append_chain(rnode, cursor, period); - if (ret == 0) + if (ret == MATCH_EQ) goto inc_children_hit; + if (ret == MATCH_ERROR) + return -1; - if (ret < 0) + if (ret == MATCH_LT) p = &parent->rb_left; else p = &parent->rb_right; } /* nothing in children, add to the current node */ rnode = add_child(root, cursor, period); + if (rnode == NULL) + return -1; + rb_link_node(&rnode->rb_node_in, parent, p); rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); inc_children_hit: root->children_hit += period; root->children_count++; + return 0; } -static int +static enum match_result append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period) @@ -602,7 +642,7 @@ append_chain(struct callchain_node *root, u64 start = cursor->pos; bool found = false; u64 matches; - int cmp = 0; + enum match_result cmp = MATCH_ERROR; /* * Lookup in the current node @@ -618,7 +658,7 @@ append_chain(struct callchain_node *root, break; cmp = match_chain(node, cnode); - if (cmp) + if (cmp != MATCH_EQ) break; found = true; @@ -628,7 +668,7 @@ append_chain(struct callchain_node *root, /* matches not, relay no the parent */ if (!found) { - WARN_ONCE(!cmp, "Chain comparison error\n"); + WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n"); return cmp; } @@ -636,21 +676,25 @@ append_chain(struct callchain_node *root, /* we match only a part of the node. Split it and add the new chain */ if (matches < root->val_nr) { - split_add_child(root, cursor, cnode, start, matches, period); - return 0; + if (split_add_child(root, cursor, cnode, start, matches, + period) < 0) + return MATCH_ERROR; + + return MATCH_EQ; } /* we match 100% of the path, increment the hit */ if (matches == root->val_nr && cursor->pos == cursor->nr) { root->hit += period; root->count++; - return 0; + return MATCH_EQ; } /* We match the node and still have a part remaining */ - append_chain_children(root, cursor, period); + if (append_chain_children(root, cursor, period) < 0) + return MATCH_ERROR; - return 0; + return MATCH_EQ; } int callchain_append(struct callchain_root *root, @@ -662,7 +706,8 @@ int callchain_append(struct callchain_root *root, callchain_cursor_commit(cursor); - append_chain_children(&root->node, cursor, period); + if (append_chain_children(&root->node, cursor, period) < 0) + return -1; if (cursor->nr > root->max_depth) root->max_depth = cursor->nr; @@ -690,7 +735,8 @@ merge_chain_branch(struct callchain_cursor *cursor, if (src->hit) { callchain_cursor_commit(cursor); - append_chain_children(dst, cursor, src->hit); + if (append_chain_children(dst, cursor, src->hit) < 0) + return -1; } n = rb_first(&src->rb_root_in); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 18dd22269764..d2a9e694810c 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -220,7 +220,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * bool hide_unresolved); extern const char record_callchain_help[]; -extern int parse_callchain_record(const char *arg, struct callchain_param *param); +int parse_callchain_record(const char *arg, struct callchain_param *param); int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); int parse_callchain_top_opt(const char *arg); @@ -236,7 +236,7 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, } #ifdef HAVE_SKIP_CALLCHAIN_IDX -extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); +int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); #else static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, struct ip_callchain *chain __maybe_unused) diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index b4b8cb42fe5e..31f8dcdbd7ef 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -13,7 +13,7 @@ struct cgroup_sel { extern int nr_cgroups; /* number of explicit cgroups defined */ -extern void close_cgroup(struct cgroup_sel *cgrp); -extern int parse_cgroups(const struct option *opt, const char *str, int unset); +void close_cgroup(struct cgroup_sel *cgrp); +int parse_cgroups(const struct option *opt, const char *str, int unset); #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 3bee6773ddb0..d0d465953d36 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -5,7 +5,7 @@ unsigned long perf_event_open_cloexec_flag(void); #ifdef __GLIBC_PREREQ #if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) -extern int sched_getcpu(void) __THROW; +int sched_getcpu(void) __THROW; #endif #endif diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e5fb88bab9e1..43e84aa27e4a 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -32,14 +32,15 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) return 0; } -int perf_color_default_config(const char *var, const char *value, void *cb) +int perf_color_default_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "color.ui")) { perf_use_color_default = perf_config_colorbool(var, value, -1); return 0; } - return perf_default_config(var, value, cb); + return 0; } static int __color_vsnprintf(char *bf, size_t size, const char *color, diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index d3e12e30e1d5..4e727635476e 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -26,7 +26,7 @@ static const char *config_file_name; static int config_linenr; static int config_file_eof; -static const char *config_exclusive_filename; +const char *config_exclusive_filename; static int get_next_char(void) { @@ -434,7 +434,7 @@ static int perf_config_from_file(config_fn_t fn, const char *filename, void *dat return ret; } -static const char *perf_etc_perfconfig(void) +const char *perf_etc_perfconfig(void) { static const char *system_wide; if (!system_wide) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index fa935093a599..9bcf2bed3a6d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -8,6 +8,10 @@ #include <linux/bitmap.h> #include "asm/bug.h" +static int max_cpu_num; +static int max_node_num; +static int *cpunode_map; + static struct cpu_map *cpu_map__default_new(void) { struct cpu_map *cpus; @@ -486,6 +490,32 @@ out: pr_err("Failed to read max nodes, using default of %d\n", max_node_num); } +int cpu__max_node(void) +{ + if (unlikely(!max_node_num)) + set_max_node_num(); + + return max_node_num; +} + +int cpu__max_cpu(void) +{ + if (unlikely(!max_cpu_num)) + set_max_cpu_num(); + + return max_cpu_num; +} + +int cpu__get_node(int cpu) +{ + if (unlikely(cpunode_map == NULL)) { + pr_debug("cpu_map not initialized\n"); + return -1; + } + + return cpunode_map[cpu]; +} + static int init_cpunode_map(void) { int i; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 71c41b9efabb..81a2562aaa2b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -57,37 +57,11 @@ static inline bool cpu_map__empty(const struct cpu_map *map) return map ? map->map[0] == -1 : true; } -int max_cpu_num; -int max_node_num; -int *cpunode_map; - int cpu__setup_cpunode_map(void); -static inline int cpu__max_node(void) -{ - if (unlikely(!max_node_num)) - pr_debug("cpu_map not initialized\n"); - - return max_node_num; -} - -static inline int cpu__max_cpu(void) -{ - if (unlikely(!max_cpu_num)) - pr_debug("cpu_map not initialized\n"); - - return max_cpu_num; -} - -static inline int cpu__get_node(int cpu) -{ - if (unlikely(cpunode_map == NULL)) { - pr_debug("cpu_map not initialized\n"); - return -1; - } - - return cpunode_map[cpu]; -} +int cpu__max_node(void); +int cpu__max_cpu(void); +int cpu__get_node(int cpu); int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int (*f)(struct cpu_map *map, int cpu, void *data), diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index aada3ac5e891..d4a5a21c2a7e 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -32,8 +32,17 @@ unsigned char sane_ctype[256] = { const char *graph_line = "_____________________________________________________________________" + "_____________________________________________________________________" "_____________________________________________________________________"; const char *graph_dotted_line = "---------------------------------------------------------------------" "---------------------------------------------------------------------" "---------------------------------------------------------------------"; +const char *spaces = + " " + " " + " "; +const char *dots = + "....................................................................." + "....................................................................." + "....................................................................."; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 34cd1e4039d3..bbf69d248ec5 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw, return ret; } +static int +add_bpf_output_values(struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample) +{ + struct bt_ctf_field_type *len_type, *seq_type; + struct bt_ctf_field *len_field, *seq_field; + unsigned int raw_size = sample->raw_size; + unsigned int nr_elements = raw_size / sizeof(u32); + unsigned int i; + int ret; + + if (nr_elements * sizeof(u32) != raw_size) + pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n", + raw_size, nr_elements * sizeof(u32) - raw_size); + + len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len"); + len_field = bt_ctf_field_create(len_type); + if (!len_field) { + pr_err("failed to create 'raw_len' for bpf output event\n"); + ret = -1; + goto put_len_type; + } + + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); + if (ret) { + pr_err("failed to set field value for raw_len\n"); + goto put_len_field; + } + ret = bt_ctf_event_set_payload(event, "raw_len", len_field); + if (ret) { + pr_err("failed to set payload to raw_len\n"); + goto put_len_field; + } + + seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data"); + seq_field = bt_ctf_field_create(seq_type); + if (!seq_field) { + pr_err("failed to create 'raw_data' for bpf output event\n"); + ret = -1; + goto put_seq_type; + } + + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); + if (ret) { + pr_err("failed to set length of 'raw_data'\n"); + goto put_seq_field; + } + + for (i = 0; i < nr_elements; i++) { + struct bt_ctf_field *elem_field = + bt_ctf_field_sequence_get_field(seq_field, i); + + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, + ((u32 *)(sample->raw_data))[i]); + + bt_ctf_field_put(elem_field); + if (ret) { + pr_err("failed to set raw_data[%d]\n", i); + goto put_seq_field; + } + } + + ret = bt_ctf_event_set_payload(event, "raw_data", seq_field); + if (ret) + pr_err("failed to set payload for raw_data\n"); + +put_seq_field: + bt_ctf_field_put(seq_field); +put_seq_type: + bt_ctf_field_type_put(seq_type); +put_len_field: + bt_ctf_field_put(len_field); +put_len_type: + bt_ctf_field_type_put(len_type); + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -554,7 +632,7 @@ static bool is_flush_needed(struct ctf_stream *cs) } static int process_sample_event(struct perf_tool *tool, - union perf_event *_event __maybe_unused, + union perf_event *_event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused) @@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_values(event_class, event, sample); + if (ret) + return -1; + } + cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); if (cs) { if (is_flush_needed(cs)) @@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw, return ret; } +static int add_bpf_output_types(struct ctf_writer *cw, + struct bt_ctf_event_class *class) +{ + struct bt_ctf_field_type *len_type = cw->data.u32; + struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex; + struct bt_ctf_field_type *seq_type; + int ret; + + ret = bt_ctf_event_class_add_field(class, len_type, "raw_len"); + if (ret) + return ret; + + seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len"); + if (!seq_type) + return -1; + + return bt_ctf_event_class_add_field(class, seq_type, "raw_data"); +} + static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *event_class) { @@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, * ctf event header * PERF_SAMPLE_READ - TODO * PERF_SAMPLE_CALLCHAIN - TODO - * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_RAW - tracepoint fields and BPF output + * are handled separately * PERF_SAMPLE_BRANCH_STACK - TODO * PERF_SAMPLE_REGS_USER - TODO * PERF_SAMPLE_STACK_USER - TODO @@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) goto err; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_types(cw, event_class); + if (ret) + goto err; + } + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); if (ret) { pr("Failed to add event class into stream.\n"); @@ -858,6 +968,23 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session) return 0; } +static void cleanup_events(struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + struct evsel_priv *priv; + + priv = evsel->priv; + bt_ctf_event_class_put(priv->event_class); + zfree(&evsel->priv); + } + + perf_evlist__delete(evlist); + session->evlist = NULL; +} + static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; @@ -953,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; +#if __BYTE_ORDER == __BIG_ENDIAN + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); +#else + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); +#endif + pr2("Created type: INTEGER %d-bit %ssigned %s\n", size, sign ? "un" : "", hex ? "hex" : ""); return type; @@ -1100,7 +1233,7 @@ static int convert__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int bt_convert__perf2ctf(const char *input, const char *path, bool force) @@ -1171,6 +1304,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) (double) c.events_size / 1024.0 / 1024.0, c.events_count); + cleanup_events(session); perf_session__delete(session); ctf_writer__cleanup(cw); diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 1c9689e4cc17..049438d51b9a 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -333,7 +333,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, sample_addr_correlates_sym(&evsel->attr)) { struct addr_location addr_al; - perf_event__preprocess_sample_addr(event, sample, thread, &addr_al); + thread__resolve(thread, &addr_al, sample); err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id, &es.addr_sym_db_id, &es.addr_offset); if (err) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 86d9c7302598..8c4212abd19b 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -5,6 +5,7 @@ #include <string.h> #include <stdarg.h> #include <stdio.h> +#include <api/debug.h> #include "cache.h" #include "color.h" @@ -22,7 +23,7 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static int _eprintf(int level, int var, const char *fmt, va_list args) +int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; @@ -36,24 +37,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } -int veprintf(int level, int var, const char *fmt, va_list args) -{ - return _eprintf(level, var, fmt, args); -} - int eprintf(int level, int var, const char *fmt, ...) { va_list args; int ret; va_start(args, fmt); - ret = _eprintf(level, var, fmt, args); + ret = veprintf(level, var, fmt, args); va_end(args); return ret; } -static int __eprintf_time(u64 t, const char *fmt, va_list args) +static int veprintf_time(u64 t, const char *fmt, va_list args) { int ret = 0; u64 secs, usecs, nsecs = t; @@ -75,7 +71,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) if (var >= level) { va_start(args, fmt); - ret = __eprintf_time(t, fmt, args); + ret = veprintf_time(t, fmt, args); va_end(args); } @@ -91,7 +87,7 @@ void pr_stat(const char *fmt, ...) va_list args; va_start(args, fmt); - _eprintf(1, verbose, fmt, args); + veprintf(1, verbose, fmt, args); va_end(args); eprintf(1, verbose, "\n"); } @@ -110,40 +106,61 @@ int dump_printf(const char *fmt, ...) return ret; } +static void trace_event_printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + const char *color = PERF_COLOR_BLUE; + union perf_event *event = (union perf_event *)extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_DATA_BEGIN: + printf("."); + color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", + event->header.size); + break; + case BINARY_PRINT_LINE_BEGIN: + printf("."); + break; + case BINARY_PRINT_ADDR: + color_fprintf(stdout, color, " %04x: ", val); + break; + case BINARY_PRINT_NUM_DATA: + color_fprintf(stdout, color, " %02x", val); + break; + case BINARY_PRINT_NUM_PAD: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_SEP: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_CHAR_DATA: + color_fprintf(stdout, color, "%c", + isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_CHAR_PAD: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_LINE_END: + color_fprintf(stdout, color, "\n"); + break; + case BINARY_PRINT_DATA_END: + printf("\n"); + break; + default: + break; + } +} + void trace_event(union perf_event *event) { unsigned char *raw_event = (void *)event; - const char *color = PERF_COLOR_BLUE; - int i, j; if (!dump_trace) return; - printf("."); - color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", - event->header.size); - - for (i = 0; i < event->header.size; i++) { - if ((i & 15) == 0) { - printf("."); - color_fprintf(stdout, color, " %04x: ", i); - } - - color_fprintf(stdout, color, " %02x", raw_event[i]); - - if (((i & 15) == 15) || i == event->header.size-1) { - color_fprintf(stdout, color, " "); - for (j = 0; j < 15-(i & 15); j++) - color_fprintf(stdout, color, " "); - for (j = i & ~15; j <= i; j++) { - color_fprintf(stdout, color, "%c", - isprint(raw_event[j]) ? - raw_event[j] : '.'); - } - color_fprintf(stdout, color, "\n"); - } - } - printf(".\n"); + print_binary(raw_event, event->header.size, 16, + trace_event_printer, event); } static struct debug_variable { @@ -192,3 +209,23 @@ int perf_debug_option(const char *str) free(s); return 0; } + +#define DEBUG_WRAPPER(__n, __l) \ +static int pr_ ## __n ## _wrapper(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + ret = veprintf(__l, verbose, fmt, args); \ + va_end(args); \ + return ret; \ +} + +DEBUG_WRAPPER(warning, 0); +DEBUG_WRAPPER(debug, 1); + +void perf_debug_setup(void) +{ + libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8b9a088c32ab..14bafda79eda 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__( int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void perf_debug_setup(void); #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c new file mode 100644 index 000000000000..3e6062ab2cdd --- /dev/null +++ b/tools/perf/util/demangle-java.c @@ -0,0 +1,199 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include "util.h" +#include "debug.h" +#include "symbol.h" + +#include "demangle-java.h" + +enum { + MODE_PREFIX = 0, + MODE_CLASS = 1, + MODE_FUNC = 2, + MODE_TYPE = 3, + MODE_CTYPE = 3, /* class arg */ +}; + +#define BASE_ENT(c, n) [c - 'A']=n +static const char *base_types['Z' - 'A' + 1] = { + BASE_ENT('B', "byte" ), + BASE_ENT('C', "char" ), + BASE_ENT('D', "double" ), + BASE_ENT('F', "float" ), + BASE_ENT('I', "int" ), + BASE_ENT('J', "long" ), + BASE_ENT('S', "short" ), + BASE_ENT('Z', "bool" ), +}; + +/* + * demangle Java symbol between str and end positions and stores + * up to maxlen characters into buf. The parser starts in mode. + * + * Use MODE_PREFIX to process entire prototype till end position + * Use MODE_TYPE to process return type if str starts on return type char + * + * Return: + * success: buf + * error : NULL + */ +static char * +__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode) +{ + int rlen = 0; + int array = 0; + int narg = 0; + const char *q; + + if (!end) + end = str + strlen(str); + + for (q = str; q != end; q++) { + + if (rlen == (maxlen - 1)) + break; + + switch (*q) { + case 'L': + if (mode == MODE_PREFIX || mode == MODE_CTYPE) { + if (mode == MODE_CTYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + narg++; + } + rlen += scnprintf(buf + rlen, maxlen - rlen, "class "); + if (mode == MODE_PREFIX) + mode = MODE_CLASS; + } else + buf[rlen++] = *q; + break; + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': + if (mode == MODE_TYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + narg++; + } else + buf[rlen++] = *q; + break; + case 'V': + if (mode == MODE_TYPE) { + rlen += scnprintf(buf + rlen, maxlen - rlen, "void"); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + } else + buf[rlen++] = *q; + break; + case '[': + if (mode != MODE_TYPE) + goto error; + array++; + break; + case '(': + if (mode != MODE_FUNC) + goto error; + buf[rlen++] = *q; + mode = MODE_TYPE; + break; + case ')': + if (mode != MODE_TYPE) + goto error; + buf[rlen++] = *q; + narg = 0; + break; + case ';': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + /* safe because at least one other char to process */ + if (isalpha(*(q + 1))) + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + if (mode == MODE_CLASS) + mode = MODE_FUNC; + else if (mode == MODE_CTYPE) + mode = MODE_TYPE; + break; + case '/': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + break; + default : + buf[rlen++] = *q; + } + } + buf[rlen] = '\0'; + return buf; +error: + return NULL; +} + +/* + * Demangle Java function signature (openJDK, not GCJ) + * input: + * str: string to parse. String is not modified + * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * return: + * if input can be demangled, then a newly allocated string is returned. + * if input cannot be demangled, then NULL is returned + * + * Note: caller is responsible for freeing demangled string + */ +char * +java_demangle_sym(const char *str, int flags) +{ + char *buf, *ptr; + char *p; + size_t len, l1 = 0; + + if (!str) + return NULL; + + /* find start of retunr type */ + p = strrchr(str, ')'); + if (!p) + return NULL; + + /* + * expansion factor estimated to 3x + */ + len = strlen(str) * 3 + 1; + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + if (!(flags & JAVA_DEMANGLE_NORET)) { + /* + * get return type first + */ + ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE); + if (!ptr) + goto error; + + /* add space between return type and function prototype */ + l1 = strlen(buf); + buf[l1++] = ' '; + } + + /* process function up to return type */ + ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX); + if (!ptr) + goto error; + + return buf; +error: + free(buf); + return NULL; +} diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h new file mode 100644 index 000000000000..a981c1f968fe --- /dev/null +++ b/tools/perf/util/demangle-java.h @@ -0,0 +1,10 @@ +#ifndef __PERF_DEMANGLE_JAVA +#define __PERF_DEMANGLE_JAVA 1 +/* + * demangle function flags + */ +#define JAVA_DEMANGLE_NORET 0x1 /* do not process return type */ + +char * java_demangle_sym(const char *str, int flags); + +#endif /* __PERF_DEMANGLE_JAVA */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index e8e9a9dbf5e3..8e6395439ca0 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -52,6 +52,11 @@ int dso__read_binary_type_filename(const struct dso *dso, debuglink--; if (*debuglink == '/') debuglink++; + + ret = -1; + if (!is_regular_file(filename)) + break; + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 45ec4d0a50ed..0953280629cf 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -162,6 +162,7 @@ struct dso { u8 loaded; u8 rel; u8 build_id[BUILD_ID_SIZE]; + u64 text_offset; const char *short_name; const char *long_name; u16 long_name_len; @@ -301,7 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, * TODO */ int dso__data_get_fd(struct dso *dso, struct machine *machine); -void dso__data_put_fd(struct dso *dso __maybe_unused); +void dso__data_put_fd(struct dso *dso); void dso__data_close(struct dso *dso); off_t dso__data_size(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index a509aa8433a1..577e600c8eb1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -915,7 +915,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ - strbuf_addf(buf, "(function_type)"); + strbuf_add(buf, "(function_type)", 15); return 0; } else { if (!dwarf_diename(&type)) @@ -932,7 +932,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) } ret = die_get_typename(&type, buf); if (ret == 0) - strbuf_addf(buf, "%s", tmp); + strbuf_addstr(buf, tmp); return ret; } @@ -951,7 +951,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - strbuf_addf(buf, "(unknown_type)"); + strbuf_add(buf, " (unknown_type)", 14); } strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); @@ -1013,7 +1013,7 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, } if (!first) - strbuf_addf(buf, "]>"); + strbuf_add(buf, "]>", 2); out: free(scopes); @@ -1076,7 +1076,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) } if (!first) - strbuf_addf(buf, "]>"); + strbuf_add(buf, "]>", 2); return ret; } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index c42ec366f2a7..dc0ce1adb075 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -25,48 +25,48 @@ #include <elfutils/version.h> /* Find the realpath of the target file */ -extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); +const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); /* Get DW_AT_comp_dir (should be NULL with older gcc) */ -extern const char *cu_get_comp_dir(Dwarf_Die *cu_die); +const char *cu_get_comp_dir(Dwarf_Die *cu_die); /* Get a line number and file name for given address */ -extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, - const char **fname, int *lineno); +int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, + const char **fname, int *lineno); /* Walk on funcitons at given address */ -extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, - int (*callback)(Dwarf_Die *, void *), void *data); +int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, + int (*callback)(Dwarf_Die *, void *), void *data); /* Ensure that this DIE is a subprogram and definition (not declaration) */ -extern bool die_is_func_def(Dwarf_Die *dw_die); +bool die_is_func_def(Dwarf_Die *dw_die); /* Ensure that this DIE is an instance of a subprogram */ -extern bool die_is_func_instance(Dwarf_Die *dw_die); +bool die_is_func_instance(Dwarf_Die *dw_die); /* Compare diename and tname */ -extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); +bool die_compare_name(Dwarf_Die *dw_die, const char *tname); /* Matching diename with glob pattern */ -extern bool die_match_name(Dwarf_Die *dw_die, const char *glob); +bool die_match_name(Dwarf_Die *dw_die, const char *glob); /* Get callsite line number of inline-function instance */ -extern int die_get_call_lineno(Dwarf_Die *in_die); +int die_get_call_lineno(Dwarf_Die *in_die); /* Get callsite file name of inlined function instance */ -extern const char *die_get_call_file(Dwarf_Die *in_die); +const char *die_get_call_file(Dwarf_Die *in_die); /* Get type die */ -extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Get a type die, but skip qualifiers and typedef */ -extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Check whether the DIE is signed or not */ -extern bool die_is_signed_type(Dwarf_Die *tp_die); +bool die_is_signed_type(Dwarf_Die *tp_die); /* Get data_member_location offset */ -extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); +int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); /* Return values for die_find_child() callbacks */ enum { @@ -77,29 +77,29 @@ enum { }; /* Search child DIEs */ -extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die, - int (*callback)(Dwarf_Die *, void *), - void *data, Dwarf_Die *die_mem); +Dwarf_Die *die_find_child(Dwarf_Die *rt_die, + int (*callback)(Dwarf_Die *, void *), + void *data, Dwarf_Die *die_mem); /* Search a non-inlined function including given address */ -extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Search a non-inlined function with tail call at given address */ Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, Dwarf_Die *die_mem); /* Search the top inlined function including given address */ -extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Search the deepest inlined function including given address */ -extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Walk on the instances of given DIE */ -extern int die_walk_instances(Dwarf_Die *in_die, - int (*callback)(Dwarf_Die *, void *), void *data); +int die_walk_instances(Dwarf_Die *in_die, + int (*callback)(Dwarf_Die *, void *), void *data); /* Walker on lines (Note: line number will not be sorted) */ typedef int (* line_walk_callback_t) (const char *fname, int lineno, @@ -109,22 +109,20 @@ typedef int (* line_walk_callback_t) (const char *fname, int lineno, * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on * the lines inside the subprogram, otherwise the DIE must be a CU DIE. */ -extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, - void *data); +int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data); /* Find a variable called 'name' at given address */ -extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, - Dwarf_Addr addr, Dwarf_Die *die_mem); +Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, + Dwarf_Addr addr, Dwarf_Die *die_mem); /* Find a member called 'name' */ -extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, + Dwarf_Die *die_mem); /* Get the name of given variable DIE */ -extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ -extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, - struct strbuf *buf); +int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); #endif diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 7dd5939dea2e..49a11d9d8b8f 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,8 @@ struct perf_env perf_env; void perf_env__exit(struct perf_env *env) { + int i; + zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env) zfree(&env->numa_nodes); zfree(&env->pmu_mappings); zfree(&env->cpu); + + for (i = 0; i < env->caches_cnt; i++) + cpu_cache_level__free(&env->caches[i]); + zfree(&env->caches); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) @@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) env->nr_cpus_avail = nr_cpus; return 0; } + +void cpu_cache_level__free(struct cpu_cache_level *cache) +{ + free(cache->type); + free(cache->map); + free(cache->size); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 0132b9557c02..56cffb60a0b4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -1,11 +1,23 @@ #ifndef __PERF_ENV_H #define __PERF_ENV_H +#include <linux/types.h> + struct cpu_topology_map { int socket_id; int core_id; }; +struct cpu_cache_level { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + char *type; + char *size; + char *map; +}; + struct perf_env { char *hostname; char *os_release; @@ -31,6 +43,8 @@ struct perf_env { char *numa_nodes; char *pmu_mappings; struct cpu_topology_map *cpu; + struct cpu_cache_level *caches; + int caches_cnt; }; extern struct perf_env perf_env; @@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); +void cpu_cache_level__free(struct cpu_cache_level *cache); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 85155e91b61b..dad55d04ffdd 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -56,13 +56,22 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } -static struct perf_sample synth_sample = { +static int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process) +{ + struct perf_sample synth_sample = { .pid = -1, .tid = -1, .time = -1, .stream_id = -1, .cpu = -1, .period = 1, + .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK, + }; + + return process(tool, event, &synth_sample, machine); }; /* @@ -186,7 +195,7 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) return -1; - if (process(tool, event, &synth_sample, machine) != 0) + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) return -1; return tgid; @@ -218,7 +227,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); - if (process(tool, event, &synth_sample, machine) != 0) + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) return -1; return 0; @@ -282,7 +291,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n", &event->mmap2.start, &event->mmap2.len, prot, &event->mmap2.pgoff, &event->mmap2.maj, &event->mmap2.min, @@ -344,7 +353,7 @@ out: event->mmap2.pid = tgid; event->mmap2.tid = pid; - if (process(tool, event, &synth_sample, machine) != 0) { + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; } @@ -402,7 +411,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, memcpy(event->mmap.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - if (process(tool, event, &synth_sample, machine) != 0) { + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; } @@ -472,7 +481,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, /* * Send the prepared comm event */ - if (process(tool, comm_event, &synth_sample, machine) != 0) + if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0) break; rc = 0; @@ -701,7 +710,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; - err = process(tool, event, &synth_sample, machine); + err = perf_tool__process_synth_event(tool, event, machine, process); free(event); return err; @@ -1295,12 +1304,9 @@ void thread__find_addr_location(struct thread *thread, * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() */ -int perf_event__preprocess_sample(const union perf_event *event, - struct machine *machine, - struct addr_location *al, - struct perf_sample *sample) +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -1315,11 +1321,11 @@ int perf_event__preprocess_sample(const union perf_event *event, * events, but for older perf.data files there was no such thing, so do * it now. */ - if (cpumode == PERF_RECORD_MISC_KERNEL && + if (sample->cpumode == PERF_RECORD_MISC_KERNEL && machine__kernel_map(machine) == NULL) machine__create_kernel_maps(machine); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); @@ -1395,16 +1401,12 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) return false; } -void perf_event__preprocess_sample_addr(union perf_event *event, - struct perf_sample *sample, - struct thread *thread, - struct addr_location *al) +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al); if (!al->map) - thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE, sample->addr, al); al->cpu = sample->cpu; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b7ffb7ee9971..6bb1c928350d 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -192,6 +192,7 @@ struct perf_sample { u64 data_src; u32 flags; u16 insn_len; + u8 cpumode; void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; @@ -597,10 +598,8 @@ int perf_event__process(struct perf_tool *tool, struct addr_location; -int perf_event__preprocess_sample(const union perf_event *event, - struct machine *machine, - struct addr_location *al, - struct perf_sample *sample); +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample); void addr_location__put(struct addr_location *al); @@ -608,10 +607,8 @@ struct thread; bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); -void perf_event__preprocess_sample_addr(union perf_event *event, - struct perf_sample *sample, - struct thread *thread, - struct addr_location *al); +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d81f13de2476..86a03836a83f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1181,12 +1181,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, */ if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + evlist->cpus = cpu_map__get(cpus); } if (threads != evlist->threads) { thread_map__put(evlist->threads); - evlist->threads = threads; + evlist->threads = thread_map__get(threads); } perf_evlist__propagate_maps(evlist); @@ -1223,6 +1223,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) int err = 0; evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + continue; + err = perf_evsel__set_filter(evsel, filter); if (err) break; @@ -1624,7 +1627,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, +int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size) { int printed, value; @@ -1652,7 +1655,25 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" "Hint:\tThe current value is %d.", value); break; + case EINVAL: { + struct perf_evsel *first = perf_evlist__first(evlist); + int max_freq; + + if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) + goto out_default; + + if (first->attr.sample_freq < (u64)max_freq) + goto out_default; + + printed = scnprintf(buf, size, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" + "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", + emsg, max_freq, first->attr.sample_freq); + break; + } default: +out_default: scnprintf(buf, size, "%s", emsg); break; } @@ -1723,3 +1744,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, tracking_evsel->tracking = true; } + +struct perf_evsel * +perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, + const char *str) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (!evsel->name) + continue; + if (strcmp(str, evsel->name) == 0) + return evsel; + } + + return NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7c4d9a206776..a0d15221db6e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -294,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); + +struct perf_evsel * +perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cdbaf9b51e42..738ce226002b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -225,6 +225,11 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) if (evsel != NULL) perf_evsel__init(evsel, attr, idx); + if (perf_evsel__is_bpf_output(evsel)) { + evsel->attr.sample_type |= PERF_SAMPLE_RAW; + evsel->attr.sample_period = 1; + } + return evsel; } @@ -898,6 +903,16 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (evsel->precise_max) perf_event_attr__set_max_precise_ip(attr); + if (opts->all_user) { + attr->exclude_kernel = 1; + attr->exclude_user = 0; + } + + if (opts->all_kernel) { + attr->exclude_kernel = 0; + attr->exclude_user = 1; + } + /* * Apply event specific term settings, * it overloads any global configuration. @@ -1628,6 +1643,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->weight = 0; + data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -2362,12 +2378,15 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, case EPERM: case EACCES: return scnprintf(msg, size, - "You may not have permission to collect %sstats.\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" - " -1 - Not paranoid at all\n" - " 0 - Disallow raw tracepoint access for unpriv\n" - " 1 - Disallow cpu events for unpriv\n" - " 2 - Disallow kernel profiling for unpriv", + "You may not have permission to collect %sstats.\n\n" + "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" + "which controls use of the performance events system by\n" + "unprivileged users (without CAP_SYS_ADMIN).\n\n" + "The default value is 1:\n\n" + " -1: Allow use of (almost) all events by all users\n" + ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" + ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" + ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", target->system_wide ? "system-wide " : ""); case ENOENT: return scnprintf(msg, size, "The %s event is not supported.", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8e75434bd01c..501ea6e565f1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -93,10 +93,8 @@ struct perf_evsel { const char *unit; struct event_format *tp_format; off_t id_offset; - union { - void *priv; - u64 db_id; - }; + void *priv; + u64 db_id; struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; @@ -364,6 +362,14 @@ static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel) #undef FUNCTION_EVENT } +static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel) +{ + struct perf_event_attr *attr = &evsel->attr; + + return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) && + (attr->type == PERF_TYPE_SOFTWARE); +} + struct perf_attr_details { bool freq; bool verbose; diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c new file mode 100644 index 000000000000..c1ef805c6a8f --- /dev/null +++ b/tools/perf/util/genelf.c @@ -0,0 +1,449 @@ +/* + * genelf.c + * Copyright (C) 2014, Google, Inc + * + * Contributed by: + * Stephane Eranian <eranian@gmail.com> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <sys/types.h> +#include <stdio.h> +#include <getopt.h> +#include <stddef.h> +#include <libelf.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <err.h> +#include <dwarf.h> + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define JVMTI + +#define BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef HAVE_LIBCRYPTO + +#define BUILD_ID_MD5 +#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ +#undef BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef BUILD_ID_SHA +#include <openssl/sha.h> +#endif + +#ifdef BUILD_ID_MD5 +#include <openssl/md5.h> +#endif +#endif + + +typedef struct { + unsigned int namesz; /* Size of entry's owner string */ + unsigned int descsz; /* Size of the note descriptor */ + unsigned int type; /* Interpretation of the descriptor */ + char name[0]; /* Start of the name+desc data */ +} Elf_Note; + +struct options { + char *output; + int fd; +}; + +static char shd_string_table[] = { + 0, + '.', 't', 'e', 'x', 't', 0, /* 1 */ + '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /* 7 */ + '.', 's', 'y', 'm', 't', 'a', 'b', 0, /* 17 */ + '.', 's', 't', 'r', 't', 'a', 'b', 0, /* 25 */ + '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */ +}; + +static struct buildid_note { + Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ + char name[4]; /* GNU\0 */ + char build_id[20]; +} bnote; + +static Elf_Sym symtab[]={ + /* symbol 0 MUST be the undefined symbol */ + { .st_name = 0, /* index in sym_string table */ + .st_info = ELF_ST_TYPE(STT_NOTYPE), + .st_shndx = 0, /* for now */ + .st_value = 0x0, + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, + }, + { .st_name = 1, /* index in sym_string table */ + .st_info = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC), + .st_shndx = 1, + .st_value = 0, /* for now */ + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, /* for now */ + } +}; + +#ifdef BUILD_ID_URANDOM +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code __maybe_unused, + size_t csize __maybe_unused) +{ + int fd; + size_t sz = sizeof(note->build_id); + ssize_t sret; + + fd = open("/dev/urandom", O_RDONLY); + if (fd == -1) + err(1, "cannot access /dev/urandom for builid"); + + sret = read(fd, note->build_id, sz); + + close(fd); + + if (sret != (ssize_t)sz) + memset(note->build_id, 0, sz); +} +#endif + +#ifdef BUILD_ID_SHA +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code, + size_t csize) +{ + if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) + errx(1, "build_id too small for SHA1"); + + SHA1(code, csize, (unsigned char *)note->build_id); +} +#endif + +#ifdef BUILD_ID_MD5 +static void +gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) +{ + MD5_CTX context; + + if (sizeof(note->build_id) < 16) + errx(1, "build_id too small for MD5"); + + MD5_Init(&context); + MD5_Update(&context, &load_addr, sizeof(load_addr)); + MD5_Update(&context, code, csize); + MD5_Final((unsigned char *)note->build_id, &context); +} +#endif + +/* + * fd: file descriptor open for writing for the output file + * load_addr: code load address (could be zero, just used for buildid) + * sym: function name (for native code - used as the symbol) + * code: the native code + * csize: the code size in bytes + */ +int +jit_write_elf(int fd, uint64_t load_addr, const char *sym, + const void *code, int csize, + void *debug, int nr_debug_entries) +{ + Elf *e; + Elf_Data *d; + Elf_Scn *scn; + Elf_Ehdr *ehdr; + Elf_Shdr *shdr; + char *strsym = NULL; + int symlen; + int retval = -1; + + if (elf_version(EV_CURRENT) == EV_NONE) { + warnx("ELF initialization failed"); + return -1; + } + + e = elf_begin(fd, ELF_C_WRITE, NULL); + if (!e) { + warnx("elf_begin failed"); + goto error; + } + + /* + * setup ELF header + */ + ehdr = elf_newehdr(e); + if (!ehdr) { + warnx("cannot get ehdr"); + goto error; + } + + ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN; + ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS; + ehdr->e_machine = GEN_ELF_ARCH; + ehdr->e_type = ET_DYN; + ehdr->e_entry = GEN_ELF_TEXT_OFFSET; + ehdr->e_version = EV_CURRENT; + ehdr->e_shstrndx= 2; /* shdr index for section name */ + + /* + * setup text section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 16; + d->d_off = 0LL; + d->d_buf = (void *)code; + d->d_type = ELF_T_BYTE; + d->d_size = csize; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 1; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = GEN_ELF_TEXT_OFFSET; + shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + shdr->sh_entsize = 0; + + /* + * setup section headers string table + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = shd_string_table; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(shd_string_table); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup symtab section + */ + symtab[1].st_size = csize; + symtab[1].st_value = GEN_ELF_TEXT_OFFSET; + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 8; + d->d_off = 0LL; + d->d_buf = symtab; + d->d_type = ELF_T_SYM; + d->d_size = sizeof(symtab); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */ + shdr->sh_type = SHT_SYMTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = sizeof(Elf_Sym); + shdr->sh_link = 4; /* index of .strtab section */ + + /* + * setup symbols string table + * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry + */ + symlen = 2 + strlen(sym); + strsym = calloc(1, symlen); + if (!strsym) { + warnx("cannot allocate strsym"); + goto error; + } + strcpy(strsym + 1, sym); + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = strsym; + d->d_type = ELF_T_BYTE; + d->d_size = symlen; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 25; /* offset in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup build-id section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + /* + * build-id generation + */ + gen_build_id(&bnote, load_addr, code, csize); + bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ + bnote.desc.descsz = sizeof(bnote.build_id); + bnote.desc.type = NT_GNU_BUILD_ID; + strcpy(bnote.name, "GNU"); + + d->d_align = 4; + d->d_off = 0LL; + d->d_buf = &bnote; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(bnote); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 33; /* offset in shd_string_table */ + shdr->sh_type = SHT_NOTE; + shdr->sh_addr = 0x0; + shdr->sh_flags = SHF_ALLOC; + shdr->sh_size = sizeof(bnote); + shdr->sh_entsize = 0; + + if (debug && nr_debug_entries) { + retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); + if (retval) + goto error; + } else { + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update 4 failed"); + goto error; + } + } + + retval = 0; +error: + (void)elf_end(e); + + free(strsym); + + + return retval; +} + +#ifndef JVMTI + +static unsigned char x86_code[] = { + 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */ + 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */ + 0xCD, 0x80 /* int $0x80 */ +}; + +static struct options options; + +int main(int argc, char **argv) +{ + int c, fd, ret; + + while ((c = getopt(argc, argv, "o:h")) != -1) { + switch (c) { + case 'o': + options.output = optarg; + break; + case 'h': + printf("Usage: genelf -o output_file [-h]\n"); + return 0; + default: + errx(1, "unknown option"); + } + } + + fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + err(1, "cannot create file %s", options.output); + + ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code)); + close(fd); + + if (ret != 0) + unlink(options.output); + + return ret; +} +#endif diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h new file mode 100644 index 000000000000..2fbeb59c4bdd --- /dev/null +++ b/tools/perf/util/genelf.h @@ -0,0 +1,61 @@ +#ifndef __GENELF_H__ +#define __GENELF_H__ + +/* genelf.c */ +int jit_write_elf(int fd, uint64_t code_addr, const char *sym, + const void *code, int csize, void *debug, int nr_debug_entries); +/* genelf_debug.c */ +int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); + +#if defined(__arm__) +#define GEN_ELF_ARCH EM_ARM +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__aarch64__) +#define GEN_ELF_ARCH EM_AARCH64 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__x86_64__) +#define GEN_ELF_ARCH EM_X86_64 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__i386__) +#define GEN_ELF_ARCH EM_386 +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__powerpc64__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpc__) +#define GEN_ELF_ARCH EM_PPC +#define GEN_ELF_CLASS ELFCLASS32 +#else +#error "unsupported architecture" +#endif + +#if __BYTE_ORDER == __BIG_ENDIAN +#define GEN_ELF_ENDIAN ELFDATA2MSB +#else +#define GEN_ELF_ENDIAN ELFDATA2LSB +#endif + +#if GEN_ELF_CLASS == ELFCLASS64 +#define elf_newehdr elf64_newehdr +#define elf_getshdr elf64_getshdr +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF64_ST_BIND(a) +#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a) +#else +#define elf_newehdr elf32_newehdr +#define elf_getshdr elf32_getshdr +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF32_ST_BIND(a) +#define ELF_ST_VIS(a) ELF32_ST_VISIBILITY(a) +#endif + +/* The .text section is directly after the ELF header */ +#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr) + +#endif diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c new file mode 100644 index 000000000000..5980f7d256b1 --- /dev/null +++ b/tools/perf/util/genelf_debug.c @@ -0,0 +1,610 @@ +/* + * genelf_debug.c + * Copyright (C) 2015, Google, Inc + * + * Contributed by: + * Stephane Eranian <eranian@google.com> + * + * Released under the GPL v2. + * + * based on GPLv2 source code from Oprofile + * @remark Copyright 2007 OProfile authors + * @author Philippe Elie + */ +#include <sys/types.h> +#include <stdio.h> +#include <getopt.h> +#include <stddef.h> +#include <libelf.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <err.h> +#include <dwarf.h> + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define BUFFER_EXT_DFL_SIZE (4 * 1024) + +typedef uint32_t uword; +typedef uint16_t uhalf; +typedef int32_t sword; +typedef int16_t shalf; +typedef uint8_t ubyte; +typedef int8_t sbyte; + +struct buffer_ext { + size_t cur_pos; + size_t max_sz; + void *data; +}; + +static void +buffer_ext_dump(struct buffer_ext *be, const char *msg) +{ + size_t i; + warnx("DUMP for %s", msg); + for (i = 0 ; i < be->cur_pos; i++) + warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff); +} + +static inline int +buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz) +{ + void *tmp; + size_t be_sz = be->max_sz; + +retry: + if ((be->cur_pos + sz) < be_sz) { + memcpy(be->data + be->cur_pos, addr, sz); + be->cur_pos += sz; + return 0; + } + + if (!be_sz) + be_sz = BUFFER_EXT_DFL_SIZE; + else + be_sz <<= 1; + + tmp = realloc(be->data, be_sz); + if (!tmp) + return -1; + + be->data = tmp; + be->max_sz = be_sz; + + goto retry; +} + +static void +buffer_ext_init(struct buffer_ext *be) +{ + be->data = NULL; + be->cur_pos = 0; + be->max_sz = 0; +} + +static inline size_t +buffer_ext_size(struct buffer_ext *be) +{ + return be->cur_pos; +} + +static inline void * +buffer_ext_addr(struct buffer_ext *be) +{ + return be->data; +} + +struct debug_line_header { + // Not counting this field + uword total_length; + // version number (2 currently) + uhalf version; + // relative offset from next field to + // program statement + uword prolog_length; + ubyte minimum_instruction_length; + ubyte default_is_stmt; + // line_base - see DWARF 2 specs + sbyte line_base; + // line_range - see DWARF 2 specs + ubyte line_range; + // number of opcode + 1 + ubyte opcode_base; + /* follow the array of opcode args nr: ubytes [nr_opcode_base] */ + /* follow the search directories index, zero terminated string + * terminated by an empty string. + */ + /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is + * the directory index entry, 0 means current directory, then mtime + * and filesize, last entry is followed by en empty string. + */ + /* follow the first program statement */ +} __attribute__((packed)); + +/* DWARF 2 spec talk only about one possible compilation unit header while + * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not + * related to the used arch, an ELF 32 can hold more than 4 Go of debug + * information. For now we handle only DWARF 2 32 bits comp unit. It'll only + * become a problem if we generate more than 4GB of debug information. + */ +struct compilation_unit_header { + uword total_length; + uhalf version; + uword debug_abbrev_offset; + ubyte pointer_size; +} __attribute__((packed)); + +#define DW_LNS_num_opcode (DW_LNS_set_isa + 1) + +/* field filled at run time are marked with -1 */ +static struct debug_line_header const default_debug_line_header = { + .total_length = -1, + .version = 2, + .prolog_length = -1, + .minimum_instruction_length = 1, /* could be better when min instruction size != 1 */ + .default_is_stmt = 1, /* we don't take care about basic block */ + .line_base = -5, /* sensible value for line base ... */ + .line_range = -14, /* ... and line range are guessed statically */ + .opcode_base = DW_LNS_num_opcode +}; + +static ubyte standard_opcode_length[] = +{ + 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 +}; +#if 0 +{ + [DW_LNS_advance_pc] = 1, + [DW_LNS_advance_line] = 1, + [DW_LNS_set_file] = 1, + [DW_LNS_set_column] = 1, + [DW_LNS_fixed_advance_pc] = 1, + [DW_LNS_set_isa] = 1, +}; +#endif + +/* field filled at run time are marked with -1 */ +static struct compilation_unit_header default_comp_unit_header = { + .total_length = -1, + .version = 2, + .debug_abbrev_offset = 0, /* we reuse the same abbrev entries for all comp unit */ + .pointer_size = sizeof(void *) +}; + +static void emit_uword(struct buffer_ext *be, uword data) +{ + buffer_ext_add(be, &data, sizeof(uword)); +} + +static void emit_string(struct buffer_ext *be, const char *s) +{ + buffer_ext_add(be, (void *)s, strlen(s) + 1); +} + +static void emit_unsigned_LEB128(struct buffer_ext *be, + unsigned long data) +{ + do { + ubyte cur = data & 0x7F; + data >>= 7; + if (data) + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } while (data); +} + +static void emit_signed_LEB128(struct buffer_ext *be, long data) +{ + int more = 1; + int negative = data < 0; + int size = sizeof(long) * CHAR_BIT; + while (more) { + ubyte cur = data & 0x7F; + data >>= 7; + if (negative) + data |= - (1 << (size - 7)); + if ((data == 0 && !(cur & 0x40)) || + (data == -1l && (cur & 0x40))) + more = 0; + else + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } +} + +static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode, + void *data, size_t data_len) +{ + buffer_ext_add(be, (char *)"", 1); + + emit_unsigned_LEB128(be, data_len + 1); + + buffer_ext_add(be, &opcode, 1); + buffer_ext_add(be, data, data_len); +} + +static void emit_opcode(struct buffer_ext *be, ubyte opcode) +{ + buffer_ext_add(be, &opcode, 1); +} + +static void emit_opcode_signed(struct buffer_ext *be, + ubyte opcode, long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_signed_LEB128(be, data); +} + +static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode, + unsigned long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_unsigned_LEB128(be, data); +} + +static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc) +{ + emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc); +} + +static void emit_advance_lineno(struct buffer_ext *be, long delta_lineno) +{ + emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno); +} + +static void emit_lne_end_of_sequence(struct buffer_ext *be) +{ + emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0); +} + +static void emit_set_file(struct buffer_ext *be, unsigned long idx) +{ + emit_opcode_unsigned(be, DW_LNS_set_file, idx); +} + +static void emit_lne_define_filename(struct buffer_ext *be, + const char *filename) +{ + buffer_ext_add(be, (void *)"", 1); + + /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */ + emit_unsigned_LEB128(be, strlen(filename) + 5); + emit_opcode(be, DW_LNE_define_file); + emit_string(be, filename); + /* directory index 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* last modification date on file 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* filesize 0=do not know */ + emit_unsigned_LEB128(be, 0); +} + +static void emit_lne_set_address(struct buffer_ext *be, + void *address) +{ + emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long)); +} + +static ubyte get_special_opcode(struct debug_entry *ent, + unsigned int last_line, + unsigned long last_vma) +{ + unsigned int temp; + unsigned long delta_addr; + + /* + * delta from line_base + */ + temp = (ent->lineno - last_line) - default_debug_line_header.line_base; + + if (temp >= default_debug_line_header.line_range) + return 0; + + /* + * delta of addresses + */ + delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length; + + /* This is not sufficient to ensure opcode will be in [0-256] but + * sufficient to ensure when summing with the delta lineno we will + * not overflow the unsigned long opcode */ + + if (delta_addr <= 256 / default_debug_line_header.line_range) { + unsigned long opcode = temp + + (delta_addr * default_debug_line_header.line_range) + + default_debug_line_header.opcode_base; + + return opcode <= 255 ? opcode : 0; + } + return 0; +} + +static void emit_lineno_info(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + size_t i; + + /* + * Machine state at start of a statement program + * address = 0 + * file = 1 + * line = 1 + * column = 0 + * is_stmt = default_is_stmt as given in the debug_line_header + * basic block = 0 + * end sequence = 0 + */ + + /* start state of the state machine we take care of */ + unsigned long last_vma = code_addr; + char const *cur_filename = NULL; + unsigned long cur_file_idx = 0; + int last_line = 1; + + emit_lne_set_address(be, (void *)code_addr); + + for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) { + int need_copy = 0; + ubyte special_opcode; + + /* + * check if filename changed, if so add it + */ + if (!cur_filename || strcmp(cur_filename, ent->name)) { + emit_lne_define_filename(be, ent->name); + cur_filename = ent->name; + emit_set_file(be, ++cur_file_idx); + need_copy = 1; + } + + special_opcode = get_special_opcode(ent, last_line, last_vma); + if (special_opcode != 0) { + last_line = ent->lineno; + last_vma = ent->addr; + emit_opcode(be, special_opcode); + } else { + /* + * lines differ, emit line delta + */ + if (last_line != ent->lineno) { + emit_advance_lineno(be, ent->lineno - last_line); + last_line = ent->lineno; + need_copy = 1; + } + /* + * addresses differ, emit address delta + */ + if (last_vma != ent->addr) { + emit_advance_pc(be, ent->addr - last_vma); + last_vma = ent->addr; + need_copy = 1; + } + /* + * add new row to matrix + */ + if (need_copy) + emit_opcode(be, DW_LNS_copy); + } + } +} + +static void add_debug_line(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + struct debug_line_header * dbg_header; + size_t old_size; + + old_size = buffer_ext_size(be); + + buffer_ext_add(be, (void *)&default_debug_line_header, + sizeof(default_debug_line_header)); + + buffer_ext_add(be, &standard_opcode_length, sizeof(standard_opcode_length)); + + // empty directory entry + buffer_ext_add(be, (void *)"", 1); + + // empty filename directory + buffer_ext_add(be, (void *)"", 1); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->prolog_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, minimum_instruction_length); + + emit_lineno_info(be, ent, nr_entry, code_addr); + + emit_lne_end_of_sequence(be); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, version); +} + +static void +add_debug_abbrev(struct buffer_ext *be) +{ + emit_unsigned_LEB128(be, 1); + emit_unsigned_LEB128(be, DW_TAG_compile_unit); + emit_unsigned_LEB128(be, DW_CHILDREN_yes); + emit_unsigned_LEB128(be, DW_AT_stmt_list); + emit_unsigned_LEB128(be, DW_FORM_data4); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); +} + +static void +add_compilation_unit(struct buffer_ext *be, + size_t offset_debug_line) +{ + struct compilation_unit_header *comp_unit_header; + size_t old_size = buffer_ext_size(be); + + buffer_ext_add(be, &default_comp_unit_header, + sizeof(default_comp_unit_header)); + + emit_unsigned_LEB128(be, 1); + emit_uword(be, offset_debug_line); + + comp_unit_header = buffer_ext_addr(be) + old_size; + comp_unit_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct compilation_unit_header, version); +} + +static int +jit_process_debug_info(uint64_t code_addr, + void *debug, int nr_debug_entries, + struct buffer_ext *dl, + struct buffer_ext *da, + struct buffer_ext *di) +{ + struct debug_entry *ent = debug; + int i; + + for (i = 0; i < nr_debug_entries; i++) { + ent->addr = ent->addr - code_addr; + ent = debug_entry_next(ent); + } + add_compilation_unit(di, buffer_ext_size(dl)); + add_debug_line(dl, debug, nr_debug_entries, 0); + add_debug_abbrev(da); + if (0) buffer_ext_dump(da, "abbrev"); + + return 0; +} + +int +jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries) +{ + Elf_Data *d; + Elf_Scn *scn; + Elf_Shdr *shdr; + struct buffer_ext dl, di, da; + int ret; + + buffer_ext_init(&dl); + buffer_ext_init(&di); + buffer_ext_init(&da); + + ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di); + if (ret) + return -1; + /* + * setup .debug_line section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&dl); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&dl); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 52; /* .debug_line */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_info section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&di); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&di); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 64; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_abbrev section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&da); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&da); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 76; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * now we update the ELF image with all the sections + */ + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update debug failed"); + return -1; + } + return 0; +} diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f50b7235ecb6..90680ec9f8b8 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -23,6 +23,8 @@ #include "strbuf.h" #include "build-id.h" #include "data.h" +#include <api/fs/fs.h> +#include "asm/bug.h" /* * magic2 = "PERFILE2" @@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h, return err; } +static int cpu_cache_level__sort(const void *a, const void *b) +{ + struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; + struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b; + + return cache_a->level - cache_b->level; +} + +static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b) +{ + if (a->level != b->level) + return false; + + if (a->line_size != b->line_size) + return false; + + if (a->sets != b->sets) + return false; + + if (a->ways != b->ways) + return false; + + if (strcmp(a->type, b->type)) + return false; + + if (strcmp(a->size, b->size)) + return false; + + if (strcmp(a->map, b->map)) + return false; + + return true; +} + +static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level) +{ + char path[PATH_MAX], file[PATH_MAX]; + struct stat st; + size_t len; + + scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level); + scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path); + + if (stat(file, &st)) + return 1; + + scnprintf(file, PATH_MAX, "%s/level", path); + if (sysfs__read_int(file, (int *) &cache->level)) + return -1; + + scnprintf(file, PATH_MAX, "%s/coherency_line_size", path); + if (sysfs__read_int(file, (int *) &cache->line_size)) + return -1; + + scnprintf(file, PATH_MAX, "%s/number_of_sets", path); + if (sysfs__read_int(file, (int *) &cache->sets)) + return -1; + + scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path); + if (sysfs__read_int(file, (int *) &cache->ways)) + return -1; + + scnprintf(file, PATH_MAX, "%s/type", path); + if (sysfs__read_str(file, &cache->type, &len)) + return -1; + + cache->type[len] = 0; + cache->type = rtrim(cache->type); + + scnprintf(file, PATH_MAX, "%s/size", path); + if (sysfs__read_str(file, &cache->size, &len)) { + free(cache->type); + return -1; + } + + cache->size[len] = 0; + cache->size = rtrim(cache->size); + + scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); + if (sysfs__read_str(file, &cache->map, &len)) { + free(cache->map); + free(cache->type); + return -1; + } + + cache->map[len] = 0; + cache->map = rtrim(cache->map); + return 0; +} + +static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c) +{ + fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map); +} + +static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) +{ + u32 i, cnt = 0; + long ncpus; + u32 nr, cpu; + u16 level; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return -1; + + nr = (u32)(ncpus & UINT_MAX); + + for (cpu = 0; cpu < nr; cpu++) { + for (level = 0; level < 10; level++) { + struct cpu_cache_level c; + int err; + + err = cpu_cache_level__read(&c, cpu, level); + if (err < 0) + return err; + + if (err == 1) + break; + + for (i = 0; i < cnt; i++) { + if (cpu_cache_level__cmp(&c, &caches[i])) + break; + } + + if (i == cnt) + caches[cnt++] = c; + else + cpu_cache_level__free(&c); + + if (WARN_ONCE(cnt == size, "way too many cpu caches..")) + goto out; + } + } + out: + *cntp = cnt; + return 0; +} + +#define MAX_CACHES 2000 + +static int write_cache(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + struct cpu_cache_level caches[MAX_CACHES]; + u32 cnt = 0, i, version = 1; + int ret; + + ret = build_caches(caches, MAX_CACHES, &cnt); + if (ret) + goto out; + + qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort); + + ret = do_write(fd, &version, sizeof(u32)); + if (ret < 0) + goto out; + + ret = do_write(fd, &cnt, sizeof(u32)); + if (ret < 0) + goto out; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level *c = &caches[i]; + + #define _W(v) \ + ret = do_write(fd, &c->v, sizeof(u32)); \ + if (ret < 0) \ + goto out; + + _W(level) + _W(line_size) + _W(sets) + _W(ways) + #undef _W + + #define _W(v) \ + ret = do_write_string(fd, (const char *) c->v); \ + if (ret < 0) \ + goto out; + + _W(type) + _W(size) + _W(map) + #undef _W + } + +out: + for (i = 0; i < cnt; i++) + cpu_cache_level__free(&caches[i]); + return ret; +} + static int write_stat(int fd __maybe_unused, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) @@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused, fprintf(fp, "# contains stat data\n"); } +static void print_cache(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp __maybe_unused) +{ + int i; + + fprintf(fp, "# CPU cache info:\n"); + for (i = 0; i < ph->env.caches_cnt; i++) { + fprintf(fp, "# "); + cpu_cache_level__fprintf(fp, &ph->env.caches[i]); + } +} + static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1665,11 +1872,6 @@ static int process_cpu_topology(struct perf_file_section *section, if (ph->needs_swap) nr = bswap_32(nr); - if (nr > (u32)cpu_nr) { - pr_debug("core_id number is too big." - "You may need to upgrade the perf tool.\n"); - goto free_cpu; - } ph->env.cpu[i].core_id = nr; ret = readn(fd, &nr, sizeof(nr)); @@ -1920,6 +2122,68 @@ static int process_auxtrace(struct perf_file_section *section, return err; } +static int process_cache(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, int fd __maybe_unused, + void *data __maybe_unused) +{ + struct cpu_cache_level *caches; + u32 cnt, i, version; + + if (readn(fd, &version, sizeof(version)) != sizeof(version)) + return -1; + + if (ph->needs_swap) + version = bswap_32(version); + + if (version != 1) + return -1; + + if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt)) + return -1; + + if (ph->needs_swap) + cnt = bswap_32(cnt); + + caches = zalloc(sizeof(*caches) * cnt); + if (!caches) + return -1; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level c; + + #define _R(v) \ + if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\ + goto out_free_caches; \ + if (ph->needs_swap) \ + c.v = bswap_32(c.v); \ + + _R(level) + _R(line_size) + _R(sets) + _R(ways) + #undef _R + + #define _R(v) \ + c.v = do_read_string(fd, ph); \ + if (!c.v) \ + goto out_free_caches; + + _R(type) + _R(size) + _R(map) + #undef _R + + caches[i] = c; + } + + ph->env.caches = caches; + ph->env.caches_cnt = cnt; + return 0; +out_free_caches: + free(caches); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1962,6 +2226,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_GROUP_DESC, group_desc), FEAT_OPP(HEADER_AUXTRACE, auxtrace), FEAT_OPA(HEADER_STAT, stat), + FEAT_OPF(HEADER_CACHE, cache), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index cff9892452ee..d306ca118449 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -32,6 +32,7 @@ enum { HEADER_GROUP_DESC, HEADER_AUXTRACE, HEADER_STAT, + HEADER_CACHE, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -120,7 +121,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, perf_event__handler_t process); int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, +int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index dc1e41c9b054..43a98a4dc1e1 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -6,7 +6,8 @@ static int autocorrect; static struct cmdnames aliases; -static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) +static int perf_unknown_cmd_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) autocorrect = perf_config_int(var,value); @@ -14,7 +15,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "alias.")) add_cmdname(&aliases, var + 6, strlen(var + 6)); - return perf_default_config(var, value, cb); + return 0; } static int levenshtein_compare(const void *p1, const void *p2) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 68a7612019dc..31c4641fe5ff 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -179,6 +179,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); + + if (h->trace_output) + hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output)); } void hists__output_recalc_col_len(struct hists *hists, int max_rows) @@ -245,6 +248,8 @@ static void he_stat__decay(struct he_stat *he_stat) /* XXX need decay for weight too? */ } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he); + static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; @@ -260,21 +265,45 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) diff = prev_period - he->stat.period; - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + if (!he->depth) { + hists->stats.total_period -= diff; + if (!he->filtered) + hists->stats.total_non_filtered_period -= diff; + } + + if (!he->leaf) { + struct hist_entry *child; + struct rb_node *node = rb_first(&he->hroot_out); + while (node) { + child = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + if (hists__decay_entry(hists, child)) + hists__delete_entry(hists, child); + } + } return he->stat.period == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) { - rb_erase(&he->rb_node, &hists->entries); + struct rb_root *root_in; + struct rb_root *root_out; - if (sort__need_collapse) - rb_erase(&he->rb_node_in, &hists->entries_collapsed); - else - rb_erase(&he->rb_node_in, hists->entries_in); + if (he->parent_he) { + root_in = &he->parent_he->hroot_in; + root_out = &he->parent_he->hroot_out; + } else { + if (sort__need_collapse) + root_in = &hists->entries_collapsed; + else + root_in = hists->entries_in; + root_out = &hists->entries; + } + + rb_erase(&he->rb_node_in, root_in); + rb_erase(&he->rb_node, root_out); --hists->nr_entries; if (!he->filtered) @@ -393,6 +422,9 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, } INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); + + if (!symbol_conf.report_hierarchy) + he->leaf = true; } return he; @@ -405,6 +437,16 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +{ + if (!symbol_conf.use_callchain) + return; + + he->hists->callchain_period += period; + if (!he->filtered) + he->hists->callchain_non_filtered_period += period; +} + static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *entry, struct addr_location *al, @@ -432,8 +474,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - if (sample_self) + if (sample_self) { he_stat__add_period(&he->stat, period, weight); + hist_entry__add_callchain_period(he, period); + } if (symbol_conf.cumulate_callchain) he_stat__add_period(he->stat_acc, period, weight); @@ -466,6 +510,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!he) return NULL; + if (sample_self) + hist_entry__add_callchain_period(he, period); hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); @@ -624,7 +670,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al } static int -iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, +iter_add_single_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { /* to avoid calling callback function */ @@ -951,10 +997,15 @@ out: int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + cmp = fmt->cmp(fmt, left, right); if (cmp) break; @@ -966,10 +1017,15 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + cmp = fmt->collapse(fmt, left, right); if (cmp) break; @@ -1006,17 +1062,250 @@ void hist_entry__delete(struct hist_entry *he) } /* + * If this is not the last column, then we need to pad it according to the + * pre-calculated max lenght for this column, otherwise don't bother adding + * spaces because that would break viewing this with, for instance, 'less', + * that would show tons of trailing spaces when a long C++ demangled method + * names is sampled. +*/ +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed) +{ + if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) { + const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists)); + if (printed < width) { + advance_hpp(hpp, printed); + printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " "); + } + } + + return printed; +} + +/* * collapse the histogram */ -bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, - struct rb_root *root, struct hist_entry *he) +static void hists__apply_filters(struct hists *hists, struct hist_entry *he); +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he, + enum hist_filter type); + +typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt); + +static bool check_thread_entry(struct perf_hpp_fmt *fmt) +{ + return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt); +} + +static void hist_entry__check_and_remove_filter(struct hist_entry *he, + enum hist_filter type, + fmt_chk_fn check) +{ + struct perf_hpp_fmt *fmt; + bool type_match = false; + struct hist_entry *parent = he->parent_he; + + switch (type) { + case HIST_FILTER__THREAD: + if (symbol_conf.comm_list == NULL && + symbol_conf.pid_list == NULL && + symbol_conf.tid_list == NULL) + return; + break; + case HIST_FILTER__DSO: + if (symbol_conf.dso_list == NULL) + return; + break; + case HIST_FILTER__SYMBOL: + if (symbol_conf.sym_list == NULL) + return; + break; + case HIST_FILTER__PARENT: + case HIST_FILTER__GUEST: + case HIST_FILTER__HOST: + case HIST_FILTER__SOCKET: + default: + return; + } + + /* if it's filtered by own fmt, it has to have filter bits */ + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (check(fmt)) { + type_match = true; + break; + } + } + + if (type_match) { + /* + * If the filter is for current level entry, propagate + * filter marker to parents. The marker bit was + * already set by default so it only needs to clear + * non-filtered entries. + */ + if (!(he->filtered & (1 << type))) { + while (parent) { + parent->filtered &= ~(1 << type); + parent = parent->parent_he; + } + } + } else { + /* + * If current entry doesn't have matching formats, set + * filter marker for upper level entries. it will be + * cleared if its lower level entries is not filtered. + * + * For lower-level entries, it inherits parent's + * filter bit so that lower level entries of a + * non-filtered entry won't set the filter marker. + */ + if (parent == NULL) + he->filtered |= (1 << type); + else + he->filtered |= (parent->filtered & (1 << type)); + } +} + +static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) +{ + hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD, + check_thread_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO, + perf_hpp__is_dso_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, + perf_hpp__is_sym_entry); + + hists__apply_filters(he->hists, he); +} + +static struct hist_entry *hierarchy_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he, + struct hist_entry *parent_he, + struct perf_hpp_list *hpp_list) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter, *new; + struct perf_hpp_fmt *fmt; + int64_t cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node_in); + + cmp = 0; + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + cmp = fmt->collapse(fmt, iter, he); + if (cmp) + break; + } + + if (!cmp) { + he_stat__add_stat(&iter->stat, &he->stat); + return iter; + } + + if (cmp < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + + new = hist_entry__new(he, true); + if (new == NULL) + return NULL; + + hists->nr_entries++; + + /* save related format list for output */ + new->hpp_list = hpp_list; + new->parent_he = parent_he; + + hist_entry__apply_hierarchy_filters(new); + + /* some fields are now passed to 'new' */ + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + he->trace_output = NULL; + else + new->trace_output = NULL; + + if (perf_hpp__is_srcline_entry(fmt)) + he->srcline = NULL; + else + new->srcline = NULL; + + if (perf_hpp__is_srcfile_entry(fmt)) + he->srcfile = NULL; + else + new->srcfile = NULL; + } + + rb_link_node(&new->rb_node_in, parent, p); + rb_insert_color(&new->rb_node_in, root); + return new; +} + +static int hists__hierarchy_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he) +{ + struct perf_hpp_list_node *node; + struct hist_entry *new_he = NULL; + struct hist_entry *parent = NULL; + int depth = 0; + int ret = 0; + + list_for_each_entry(node, &hists->hpp_formats, list) { + /* skip period (overhead) and elided columns */ + if (node->level == 0 || node->skip) + continue; + + /* insert copy of 'he' for each fmt into the hierarchy */ + new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp); + if (new_he == NULL) { + ret = -1; + break; + } + + root = &new_he->hroot_in; + new_he->depth = depth++; + parent = new_he; + } + + if (new_he) { + new_he->leaf = true; + + if (symbol_conf.use_callchain) { + callchain_cursor_reset(&callchain_cursor); + if (callchain_merge(&callchain_cursor, + new_he->callchain, + he->callchain) < 0) + ret = -1; + } + } + + /* 'he' is no longer used */ + hist_entry__delete(he); + + /* return 0 (or -1) since it already applied filters */ + return ret; +} + +int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, + struct hist_entry *he) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; int64_t cmp; + if (symbol_conf.report_hierarchy) + return hists__hierarchy_insert_entry(hists, root, he); + while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); @@ -1024,18 +1313,21 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, cmp = hist_entry__collapse(iter, he); if (!cmp) { + int ret = 0; + he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); - callchain_merge(&callchain_cursor, - iter->callchain, - he->callchain); + if (callchain_merge(&callchain_cursor, + iter->callchain, + he->callchain) < 0) + ret = -1; } hist_entry__delete(he); - return false; + return ret; } if (cmp < 0) @@ -1047,7 +1339,7 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); - return true; + return 1; } struct rb_root *hists__get_rotate_entries_in(struct hists *hists) @@ -1073,14 +1365,15 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_socket(hists, he); } -void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) +int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; + int ret; if (!sort__need_collapse) - return; + return 0; hists->nr_entries = 0; @@ -1095,7 +1388,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) next = rb_next(&n->rb_node_in); rb_erase(&n->rb_node_in, root); - if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) { + ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); + if (ret < 0) + return -1; + + if (ret) { /* * If it wasn't combined with one of the entries already * collapsed, we need to apply the filters that may have @@ -1106,14 +1403,16 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (prog) ui_progress__update(prog, 1); } + return 0; } static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { + struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + hists__for_each_sort_list(hists, fmt) { if (perf_hpp__should_skip(fmt, a->hists)) continue; @@ -1154,6 +1453,113 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->stats.total_period += h->stat.period; } +static void hierarchy_recalc_total_periods(struct hists *hists) +{ + struct rb_node *node; + struct hist_entry *he; + + node = rb_first(&hists->entries); + + hists->stats.total_period = 0; + hists->stats.total_non_filtered_period = 0; + + /* + * recalculate total period using top-level entries only + * since lower level entries only see non-filtered entries + * but upper level entries have sum of both entries. + */ + while (node) { + he = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + hists->stats.total_period += he->stat.period; + if (!he->filtered) + hists->stats.total_non_filtered_period += he->stat.period; + } +} + +static void hierarchy_insert_output_entry(struct rb_root *root, + struct hist_entry *he) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + struct perf_hpp_fmt *fmt; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (hist_entry__sort(he, iter) > 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, root); + + /* update column width of dynamic entry */ + perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt)) + fmt->sort(fmt, he, NULL); + } +} + +static void hists__hierarchy_output_resort(struct hists *hists, + struct ui_progress *prog, + struct rb_root *root_in, + struct rb_root *root_out, + u64 min_callchain_hits, + bool use_callchain) +{ + struct rb_node *node; + struct hist_entry *he; + + *root_out = RB_ROOT; + node = rb_first(root_in); + + while (node) { + he = rb_entry(node, struct hist_entry, rb_node_in); + node = rb_next(node); + + hierarchy_insert_output_entry(root_out, he); + + if (prog) + ui_progress__update(prog, 1); + + if (!he->leaf) { + hists__hierarchy_output_resort(hists, prog, + &he->hroot_in, + &he->hroot_out, + min_callchain_hits, + use_callchain); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + + continue; + } + + if (!use_callchain) + continue; + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (callchain_param.min_percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + } +} + static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, u64 min_callchain_hits, @@ -1162,10 +1568,20 @@ static void __hists__insert_output_entry(struct rb_root *entries, struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; + struct perf_hpp_fmt *fmt; + + if (use_callchain) { + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; - if (use_callchain) + min_callchain_hits = total * (callchain_param.min_percent / 100); + } callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); + } while (*p != NULL) { parent = *p; @@ -1179,23 +1595,41 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, entries); + + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + perf_hpp__defined_dynamic_entry(fmt, he->hists)) + fmt->sort(fmt, he, NULL); /* update column width */ + } } -void hists__output_resort(struct hists *hists, struct ui_progress *prog) +static void output_resort(struct hists *hists, struct ui_progress *prog, + bool use_callchain) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; + u64 callchain_total; u64 min_callchain_hits; - struct perf_evsel *evsel = hists_to_evsel(hists); - bool use_callchain; - if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) - use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; - else - use_callchain = symbol_conf.use_callchain; + callchain_total = hists->callchain_period; + if (symbol_conf.filter_relative) + callchain_total = hists->callchain_non_filtered_period; - min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = callchain_total * (callchain_param.min_percent / 100); + + hists__reset_stats(hists); + hists__reset_col_len(hists); + + if (symbol_conf.report_hierarchy) { + hists__hierarchy_output_resort(hists, prog, + &hists->entries_collapsed, + &hists->entries, + min_callchain_hits, + use_callchain); + hierarchy_recalc_total_periods(hists); + return; + } if (sort__need_collapse) root = &hists->entries_collapsed; @@ -1205,9 +1639,6 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) next = rb_first(root); hists->entries = RB_ROOT; - hists__reset_stats(hists); - hists__reset_col_len(hists); - while (next) { n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); @@ -1223,15 +1654,136 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) } } +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +{ + bool use_callchain; + + if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; + + output_resort(evsel__hists(evsel), prog, use_callchain); +} + +void hists__output_resort(struct hists *hists, struct ui_progress *prog) +{ + output_resort(hists, prog, symbol_conf.use_callchain); +} + +static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) +{ + if (he->leaf || hmd == HMD_FORCE_SIBLING) + return false; + + if (he->unfolded || hmd == HMD_FORCE_CHILD) + return true; + + return false; +} + +struct rb_node *rb_hierarchy_last(struct rb_node *node) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + while (can_goto_child(he, HMD_NORMAL)) { + node = rb_last(&he->hroot_out); + he = rb_entry(node, struct hist_entry, rb_node); + } + return node; +} + +struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + if (can_goto_child(he, hmd)) + node = rb_first(&he->hroot_out); + else + node = rb_next(node); + + while (node == NULL) { + he = he->parent_he; + if (he == NULL) + break; + + node = rb_next(&he->rb_node); + } + return node; +} + +struct rb_node *rb_hierarchy_prev(struct rb_node *node) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + node = rb_prev(node); + if (node) + return rb_hierarchy_last(node); + + he = he->parent_he; + if (he == NULL) + return NULL; + + return &he->rb_node; +} + +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) +{ + struct rb_node *node; + struct hist_entry *child; + float percent; + + if (he->leaf) + return false; + + node = rb_first(&he->hroot_out); + child = rb_entry(node, struct hist_entry, rb_node); + + while (node && child->filtered) { + node = rb_next(node); + child = rb_entry(node, struct hist_entry, rb_node); + } + + if (node) + percent = hist_entry__get_percent_limit(child); + else + percent = 0; + + return node && percent >= limit; +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { h->filtered &= ~(1 << filter); + + if (symbol_conf.report_hierarchy) { + struct hist_entry *parent = h->parent_he; + + while (parent) { + he_stat__add_stat(&parent->stat, &h->stat); + + parent->filtered &= ~(1 << filter); + + if (parent->filtered) + goto next; + + /* force fold unfiltered entry for simplicity */ + parent->unfolded = false; + parent->has_no_entry = false; + parent->row_offset = 0; + parent->nr_rows = 0; +next: + parent = parent->parent_he; + } + } + if (h->filtered) return; /* force fold unfiltered entry for simplicity */ h->unfolded = false; + h->has_no_entry = false; h->row_offset = 0; h->nr_rows = 0; @@ -1254,28 +1806,6 @@ static bool hists__filter_entry_by_dso(struct hists *hists, return false; } -void hists__filter_by_dso(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (symbol_conf.exclude_other && !h->parent) - continue; - - if (hists__filter_entry_by_dso(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); - } -} - static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { @@ -1288,25 +1818,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists, return false; } -void hists__filter_by_thread(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_thread(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); - } -} - static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he) { @@ -1320,7 +1831,21 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, return false; } -void hists__filter_by_symbol(struct hists *hists) +static bool hists__filter_entry_by_socket(struct hists *hists, + struct hist_entry *he) +{ + if ((hists->socket_filter > -1) && + (he->socket != hists->socket_filter)) { + he->filtered |= (1 << HIST_FILTER__SOCKET); + return true; + } + + return false; +} + +typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); + +static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) { struct rb_node *nd; @@ -1332,42 +1857,155 @@ void hists__filter_by_symbol(struct hists *hists) for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (hists__filter_entry_by_symbol(hists, h)) + if (filter(hists, h)) continue; - hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL); + hists__remove_entry_filter(hists, h, type); } } -static bool hists__filter_entry_by_socket(struct hists *hists, - struct hist_entry *he) +static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) { - if ((hists->socket_filter > -1) && - (he->socket != hists->socket_filter)) { - he->filtered |= (1 << HIST_FILTER__SOCKET); - return true; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + struct rb_root new_root = RB_ROOT; + struct rb_node *nd; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (hist_entry__sort(he, iter) > 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; } - return false; + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, root); + + if (he->leaf || he->filtered) + return; + + nd = rb_first(&he->hroot_out); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + nd = rb_next(nd); + rb_erase(&h->rb_node, &he->hroot_out); + + resort_filtered_entry(&new_root, h); + } + + he->hroot_out = new_root; } -void hists__filter_by_socket(struct hists *hists) +static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) { struct rb_node *nd; + struct rb_root new_root = RB_ROOT; hists->stats.nr_non_filtered_samples = 0; hists__reset_filter_stats(hists); hists__reset_col_len(hists); - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + nd = rb_first(&hists->entries); + while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + int ret; - if (hists__filter_entry_by_socket(hists, h)) - continue; + ret = hist_entry__filter(h, type, arg); - hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET); + /* + * case 1. non-matching type + * zero out the period, set filter marker and move to child + */ + if (ret < 0) { + memset(&h->stat, 0, sizeof(h->stat)); + h->filtered |= (1 << type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD); + } + /* + * case 2. matched type (filter out) + * set filter marker and move to next + */ + else if (ret == 1) { + h->filtered |= (1 << type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); + } + /* + * case 3. ok (not filtered) + * add period to hists and parents, erase the filter marker + * and move to next sibling + */ + else { + hists__remove_entry_filter(hists, h, type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); + } + } + + hierarchy_recalc_total_periods(hists); + + /* + * resort output after applying a new filter since filter in a lower + * hierarchy can change periods in a upper hierarchy. + */ + nd = rb_first(&hists->entries); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + nd = rb_next(nd); + rb_erase(&h->rb_node, &hists->entries); + + resort_filtered_entry(&new_root, h); } + + hists->entries = new_root; +} + +void hists__filter_by_thread(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__THREAD, + hists->thread_filter); + else + hists__filter_by_type(hists, HIST_FILTER__THREAD, + hists__filter_entry_by_thread); +} + +void hists__filter_by_dso(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__DSO, + hists->dso_filter); + else + hists__filter_by_type(hists, HIST_FILTER__DSO, + hists__filter_entry_by_dso); +} + +void hists__filter_by_symbol(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL, + hists->symbol_filter_str); + else + hists__filter_by_type(hists, HIST_FILTER__SYMBOL, + hists__filter_entry_by_symbol); +} + +void hists__filter_by_socket(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__SOCKET, + &hists->socket_filter); + else + hists__filter_by_type(hists, HIST_FILTER__SOCKET, + hists__filter_entry_by_socket); } void events_stats__inc(struct events_stats *stats, u32 type) @@ -1585,7 +2223,7 @@ int perf_hist_config(const char *var, const char *value) return 0; } -int __hists__init(struct hists *hists) +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) { memset(hists, 0, sizeof(*hists)); hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; @@ -1594,6 +2232,8 @@ int __hists__init(struct hists *hists) hists->entries = RB_ROOT; pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; + hists->hpp_list = hpp_list; + INIT_LIST_HEAD(&hists->hpp_formats); return 0; } @@ -1622,15 +2262,26 @@ static void hists__delete_all_entries(struct hists *hists) static void hists_evsel__exit(struct perf_evsel *evsel) { struct hists *hists = evsel__hists(evsel); + struct perf_hpp_fmt *fmt, *pos; + struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + + list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { + list_del(&fmt->list); + free(fmt); + } + list_del(&node->list); + free(node); + } } static int hists_evsel__init(struct perf_evsel *evsel) { struct hists *hists = evsel__hists(evsel); - __hists__init(hists); + __hists__init(hists, &perf_hpp_list); return 0; } @@ -1649,3 +2300,9 @@ int hists__init(void) return err; } + +void perf_hpp_list__init(struct perf_hpp_list *list) +{ + INIT_LIST_HEAD(&list->fields); + INIT_LIST_HEAD(&list->sorts); +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d4ec4822a103..bec0cd660fbd 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -66,6 +66,8 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; + u64 callchain_period; + u64 callchain_non_filtered_period; struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; @@ -75,6 +77,9 @@ struct hists { u64 event_stream; u16 col_len[HISTC_NR_COLS]; int socket_filter; + struct perf_hpp_list *hpp_list; + struct list_head hpp_formats; + int nr_hpp_node; }; struct hist_entry_iter; @@ -121,15 +126,21 @@ struct hist_entry *__hists__add_entry(struct hists *hists, int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); +struct perf_hpp; +struct perf_hpp_fmt; + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); -void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); +int hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__delete_entries(struct hists *hists); @@ -185,10 +196,10 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) } int hists__init(void); -int __hists__init(struct hists *hists); +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); -bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, +int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he); struct perf_hpp { @@ -214,28 +225,64 @@ struct perf_hpp_fmt { struct hist_entry *a, struct hist_entry *b); int64_t (*sort)(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b); + bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); + void (*free)(struct perf_hpp_fmt *fmt); struct list_head list; struct list_head sort_list; bool elide; int len; int user_len; + int idx; + int level; +}; + +struct perf_hpp_list { + struct list_head fields; + struct list_head sorts; }; -extern struct list_head perf_hpp__list; -extern struct list_head perf_hpp__sort_list; +extern struct perf_hpp_list perf_hpp_list; + +struct perf_hpp_list_node { + struct list_head list; + struct perf_hpp_list hpp; + int level; + bool skip; +}; + +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); + +static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) +{ + perf_hpp_list__column_register(&perf_hpp_list, format); +} + +static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__register_sort_field(&perf_hpp_list, format); +} + +#define perf_hpp_list__for_each_format(_list, format) \ + list_for_each_entry(format, &(_list)->fields, list) -#define perf_hpp__for_each_format(format) \ - list_for_each_entry(format, &perf_hpp__list, list) +#define perf_hpp_list__for_each_format_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->fields, list) -#define perf_hpp__for_each_format_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__list, list) +#define perf_hpp_list__for_each_sort_list(_list, format) \ + list_for_each_entry(format, &(_list)->sorts, sort_list) -#define perf_hpp__for_each_sort_list(format) \ - list_for_each_entry(format, &perf_hpp__sort_list, sort_list) +#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) -#define perf_hpp__for_each_sort_list_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list) +#define hists__for_each_format(hists, format) \ + perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + +#define hists__for_each_sort_list(hists, format) \ + perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) extern struct perf_hpp_fmt perf_hpp__format[]; @@ -254,21 +301,29 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_unregister(struct perf_hpp_fmt *format); -void perf_hpp__column_enable(unsigned col); -void perf_hpp__column_disable(unsigned col); void perf_hpp__cancel_cumulate(void); +void perf_hpp__setup_output_field(struct perf_hpp_list *list); +void perf_hpp__reset_output_field(struct perf_hpp_list *list); +void perf_hpp__append_sort_keys(struct perf_hpp_list *list); +int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, + struct perf_evlist *evlist); -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); -void perf_hpp__setup_output_field(void); -void perf_hpp__reset_output_field(void); -void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format); bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists); +bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); + +struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); + +int hist_entry__filter(struct hist_entry *he, int type, const void *arg); static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format, struct hists *hists) @@ -372,13 +427,35 @@ static inline int script_browse(const char *script_opt __maybe_unused) #endif unsigned int hists__sort_list_width(struct hists *hists); +unsigned int hists__overhead_width(struct hists *hists); void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode); struct option; -int parse_filter_percentage(const struct option *opt __maybe_unused, - const char *arg, int unset __maybe_unused); +int parse_filter_percentage(const struct option *opt, const char *arg, int unset); int perf_hist_config(const char *var, const char *value); +void perf_hpp_list__init(struct perf_hpp_list *list); + +enum hierarchy_move_dir { + HMD_NORMAL, + HMD_FORCE_SIBLING, + HMD_FORCE_CHILD, +}; + +struct rb_node *rb_hierarchy_last(struct rb_node *node); +struct rb_node *__rb_hierarchy_next(struct rb_node *node, + enum hierarchy_move_dir hmd); +struct rb_node *rb_hierarchy_prev(struct rb_node *node); + +static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) +{ + return __rb_hierarchy_next(node, HMD_NORMAL); +} + +#define HIERARCHY_INDENT 3 + +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit); + #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index eb0e7f8bf515..abf1366e2a24 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -279,6 +279,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, event.sample.header.misc = PERF_RECORD_MISC_USER; event.sample.header.size = sizeof(struct perf_event_header); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = le64_to_cpu(branch->from); sample.pid = btsq->pid; sample.tid = btsq->tid; @@ -678,7 +679,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, return 0; } -static int intel_bts_flush(struct perf_session *session __maybe_unused, +static int intel_bts_flush(struct perf_session *session, struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index 47314a64399c..9f26eae6c9f0 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -374,7 +374,7 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(char, insn); + insn->displacement.value = get_next(signed char, insn); insn->displacement.nbytes = 1; } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { @@ -532,7 +532,7 @@ void insn_get_immediate(struct insn *insn) switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(char, insn); + insn->immediate.value = get_next(signed char, insn); insn->immediate.nbytes = 1; break; case INAT_IMM_WORD: @@ -566,7 +566,7 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(char, insn); + insn->immediate2.value = get_next(signed char, insn); insn->immediate2.nbytes = 1; } done: diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 05d815851be1..407f11b97c8d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -979,6 +979,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (!pt->timeless_decoding) sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = ptq->state->from_ip; sample.pid = ptq->pid; sample.tid = ptq->tid; @@ -1035,6 +1036,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) if (!pt->timeless_decoding) sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = ptq->state->from_ip; sample.pid = ptq->pid; sample.tid = ptq->tid; @@ -1092,6 +1094,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) if (!pt->timeless_decoding) sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = ptq->state->from_ip; sample.pid = ptq->pid; sample.tid = ptq->tid; diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h new file mode 100644 index 000000000000..3f42ee4d2a0b --- /dev/null +++ b/tools/perf/util/jit.h @@ -0,0 +1,11 @@ +#ifndef __JIT_H__ +#define __JIT_H__ + +#include <data.h> + +int jit_process(struct perf_session *session, struct perf_data_file *output, + struct machine *machine, char *filename, pid_t pid, u64 *nbytes); + +int jit_inject_record(const char *filename); + +#endif /* __JIT_H__ */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c new file mode 100644 index 000000000000..ad0c0bb1fbc7 --- /dev/null +++ b/tools/perf/util/jitdump.c @@ -0,0 +1,699 @@ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <byteswap.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include "util.h" +#include "event.h" +#include "debug.h" +#include "evlist.h" +#include "symbol.h" +#include "strlist.h" +#include <elf.h> + +#include "session.h" +#include "jit.h" +#include "jitdump.h" +#include "genelf.h" +#include "../builtin.h" + +struct jit_buf_desc { + struct perf_data_file *output; + struct perf_session *session; + struct machine *machine; + union jr_entry *entry; + void *buf; + uint64_t sample_type; + size_t bufsize; + FILE *in; + bool needs_bswap; /* handles cross-endianess */ + void *debug_data; + size_t nr_debug_entries; + uint32_t code_load_count; + u64 bytes_written; + struct rb_root code_root; + char dir[PATH_MAX]; +}; + +struct debug_line_info { + unsigned long vma; + unsigned int lineno; + /* The filename format is unspecified, absolute path, relative etc. */ + char const filename[0]; +}; + +struct jit_tool { + struct perf_tool tool; + struct perf_data_file output; + struct perf_data_file input; + u64 bytes_written; +}; + +#define hmax(a, b) ((a) > (b) ? (a) : (b)) +#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) + +static int +jit_emit_elf(char *filename, + const char *sym, + uint64_t code_addr, + const void *code, + int csize, + void *debug, + int nr_debug_entries) +{ + int ret, fd; + + if (verbose > 0) + fprintf(stderr, "write ELF image %s\n", filename); + + fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); + if (fd == -1) { + pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno)); + return -1; + } + + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries); + + close(fd); + + if (ret) + unlink(filename); + + return ret; +} + +static void +jit_close(struct jit_buf_desc *jd) +{ + if (!(jd && jd->in)) + return; + funlockfile(jd->in); + fclose(jd->in); + jd->in = NULL; +} + +static int +jit_validate_events(struct perf_session *session) +{ + struct perf_evsel *evsel; + + /* + * check that all events use CLOCK_MONOTONIC + */ + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) + return -1; + } + return 0; +} + +static int +jit_open(struct jit_buf_desc *jd, const char *name) +{ + struct jitheader header; + struct jr_prefix *prefix; + ssize_t bs, bsz = 0; + void *n, *buf = NULL; + int ret, retval = -1; + + jd->in = fopen(name, "r"); + if (!jd->in) + return -1; + + bsz = hmax(sizeof(header), sizeof(*prefix)); + + buf = malloc(bsz); + if (!buf) + goto error; + + /* + * protect from writer modifying the file while we are reading it + */ + flockfile(jd->in); + + ret = fread(buf, sizeof(header), 1, jd->in); + if (ret != 1) + goto error; + + memcpy(&header, buf, sizeof(header)); + + if (header.magic != JITHEADER_MAGIC) { + if (header.magic != JITHEADER_MAGIC_SW) + goto error; + jd->needs_bswap = true; + } + + if (jd->needs_bswap) { + header.version = bswap_32(header.version); + header.total_size = bswap_32(header.total_size); + header.pid = bswap_32(header.pid); + header.elf_mach = bswap_32(header.elf_mach); + header.timestamp = bswap_64(header.timestamp); + header.flags = bswap_64(header.flags); + } + + if (verbose > 2) + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + header.version, + header.total_size, + (unsigned long long)header.timestamp, + header.pid, + header.elf_mach); + + if (header.flags & JITDUMP_FLAGS_RESERVED) { + pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", + (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED); + goto error; + } + + /* + * validate event is using the correct clockid + */ + if (jit_validate_events(jd->session)) { + pr_err("error, jitted code must be sampled with perf record -k 1\n"); + goto error; + } + + bs = header.total_size - sizeof(header); + + if (bs > bsz) { + n = realloc(buf, bs); + if (!n) + goto error; + bsz = bs; + buf = n; + /* read extra we do not know about */ + ret = fread(buf, bs - bsz, 1, jd->in); + if (ret != 1) + goto error; + } + /* + * keep dirname for generating files and mmap records + */ + strcpy(jd->dir, name); + dirname(jd->dir); + + return 0; +error: + funlockfile(jd->in); + fclose(jd->in); + return retval; +} + +static union jr_entry * +jit_get_next_entry(struct jit_buf_desc *jd) +{ + struct jr_prefix *prefix; + union jr_entry *jr; + void *addr; + size_t bs, size; + int id, ret; + + if (!(jd && jd->in)) + return NULL; + + if (jd->buf == NULL) { + size_t sz = getpagesize(); + if (sz < sizeof(*prefix)) + sz = sizeof(*prefix); + + jd->buf = malloc(sz); + if (jd->buf == NULL) + return NULL; + + jd->bufsize = sz; + } + + prefix = jd->buf; + + /* + * file is still locked at this point + */ + ret = fread(prefix, sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + if (jd->needs_bswap) { + prefix->id = bswap_32(prefix->id); + prefix->total_size = bswap_32(prefix->total_size); + prefix->timestamp = bswap_64(prefix->timestamp); + } + id = prefix->id; + size = prefix->total_size; + + bs = (size_t)size; + if (bs < sizeof(*prefix)) + return NULL; + + if (id >= JIT_CODE_MAX) { + pr_warning("next_entry: unknown prefix %d, skipping\n", id); + return NULL; + } + if (bs > jd->bufsize) { + void *n; + n = realloc(jd->buf, bs); + if (!n) + return NULL; + jd->buf = n; + jd->bufsize = bs; + } + + addr = ((void *)jd->buf) + sizeof(*prefix); + + ret = fread(addr, bs - sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + jr = (union jr_entry *)jd->buf; + + switch(id) { + case JIT_CODE_DEBUG_INFO: + if (jd->needs_bswap) { + uint64_t n; + jr->info.code_addr = bswap_64(jr->info.code_addr); + jr->info.nr_entry = bswap_64(jr->info.nr_entry); + for (n = 0 ; n < jr->info.nr_entry; n++) { + jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr); + jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno); + jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim); + } + } + break; + case JIT_CODE_CLOSE: + break; + case JIT_CODE_LOAD: + if (jd->needs_bswap) { + jr->load.pid = bswap_32(jr->load.pid); + jr->load.tid = bswap_32(jr->load.tid); + jr->load.vma = bswap_64(jr->load.vma); + jr->load.code_addr = bswap_64(jr->load.code_addr); + jr->load.code_size = bswap_64(jr->load.code_size); + jr->load.code_index= bswap_64(jr->load.code_index); + } + jd->code_load_count++; + break; + case JIT_CODE_MOVE: + if (jd->needs_bswap) { + jr->move.pid = bswap_32(jr->move.pid); + jr->move.tid = bswap_32(jr->move.tid); + jr->move.vma = bswap_64(jr->move.vma); + jr->move.old_code_addr = bswap_64(jr->move.old_code_addr); + jr->move.new_code_addr = bswap_64(jr->move.new_code_addr); + jr->move.code_size = bswap_64(jr->move.code_size); + jr->move.code_index = bswap_64(jr->move.code_index); + } + break; + case JIT_CODE_MAX: + default: + return NULL; + } + return jr; +} + +static int +jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) +{ + ssize_t size; + + size = perf_data_file__write(jd->output, event, event->header.size); + if (size < 0) + return -1; + + jd->bytes_written += size; + return 0; +} + +static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + uint64_t code, addr; + uintptr_t uaddr; + char *filename; + struct stat st; + size_t size; + u16 idr_size; + const char *sym; + uint32_t count; + int ret, csize; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->load.pid; + tid = jr->load.tid; + csize = jr->load.code_size; + addr = jr->load.code_addr; + sym = (void *)((unsigned long)jr + sizeof(jr->load)); + code = (unsigned long)jr + jr->load.p.total_size - csize; + count = jr->load.code_index; + idr_size = jd->machine->id_hdr_size; + + event = calloc(1, sizeof(*event) + idr_size); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so", + jd->dir, + pid, + count); + + size++; /* for \0 */ + + size = PERF_ALIGN(size, sizeof(u64)); + uaddr = (uintptr_t)code; + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries); + + if (jd->debug_data && jd->nr_debug_entries) { + free(jd->debug_data); + jd->debug_data = NULL; + jd->nr_debug_entries = 0; + } + + if (ret) { + free(event); + return -1; + } + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = addr; + event->mmap2.len = csize; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.cpumode = PERF_RECORD_MISC_USER; + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + /* + * mark dso as use to generate buildid in the header + */ + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + char *filename; + size_t size; + struct stat st; + u16 idr_size; + int ret; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->move.pid; + tid = jr->move.tid; + idr_size = jd->machine->id_hdr_size; + + /* + * +16 to account for sample_id_all (hack) + */ + event = calloc(1, sizeof(*event) + 16); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64, + jd->dir, + pid, + jr->move.code_index); + + size++; /* for \0 */ + + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + size = PERF_ALIGN(size, sizeof(u64)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = jr->move.new_code_addr; + event->mmap2.len = jr->move.code_size; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.cpumode = PERF_RECORD_MISC_USER; + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = jr->move.new_code_addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr) +{ + void *data; + size_t sz; + + if (!(jd && jr)) + return -1; + + sz = jr->prefix.total_size - sizeof(jr->info); + data = malloc(sz); + if (!data) + return -1; + + memcpy(data, &jr->info.entries, sz); + + jd->debug_data = data; + + /* + * we must use nr_entry instead of size here because + * we cannot distinguish actual entry from padding otherwise + */ + jd->nr_debug_entries = jr->info.nr_entry; + + return 0; +} + +static int +jit_process_dump(struct jit_buf_desc *jd) +{ + union jr_entry *jr; + int ret; + + while ((jr = jit_get_next_entry(jd))) { + switch(jr->prefix.id) { + case JIT_CODE_LOAD: + ret = jit_repipe_code_load(jd, jr); + break; + case JIT_CODE_MOVE: + ret = jit_repipe_code_move(jd, jr); + break; + case JIT_CODE_DEBUG_INFO: + ret = jit_repipe_debug_info(jd, jr); + break; + default: + ret = 0; + continue; + } + } + return ret; +} + +static int +jit_inject(struct jit_buf_desc *jd, char *path) +{ + int ret; + + if (verbose > 0) + fprintf(stderr, "injecting: %s\n", path); + + ret = jit_open(jd, path); + if (ret) + return -1; + + ret = jit_process_dump(jd); + + jit_close(jd); + + if (verbose > 0) + fprintf(stderr, "injected: %s (%d)\n", path, ret); + + return 0; +} + +/* + * File must be with pattern .../jit-XXXX.dump + * where XXXX is the PID of the process which did the mmap() + * as captured in the RECORD_MMAP record + */ +static int +jit_detect(char *mmap_name, pid_t pid) + { + char *p; + char *end = NULL; + pid_t pid2; + + if (verbose > 2) + fprintf(stderr, "jit marker trying : %s\n", mmap_name); + /* + * get file name + */ + p = strrchr(mmap_name, '/'); + if (!p) + return -1; + + /* + * match prefix + */ + if (strncmp(p, "/jit-", 5)) + return -1; + + /* + * skip prefix + */ + p += 5; + + /* + * must be followed by a pid + */ + if (!isdigit(*p)) + return -1; + + pid2 = (int)strtol(p, &end, 10); + if (!end) + return -1; + + /* + * pid does not match mmap pid + * pid==0 in system-wide mode (synthesized) + */ + if (pid && pid2 != pid) + return -1; + /* + * validate suffix + */ + if (strcmp(end, ".dump")) + return -1; + + if (verbose > 0) + fprintf(stderr, "jit marker found: %s\n", mmap_name); + + return 0; +} + +int +jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes) +{ + struct perf_evsel *first; + struct jit_buf_desc jd; + int ret; + + /* + * first, detect marker mmap (i.e., the jitdump mmap) + */ + if (jit_detect(filename, pid)) + return 0; + + memset(&jd, 0, sizeof(jd)); + + jd.session = session; + jd.output = output; + jd.machine = machine; + + /* + * track sample_type to compute id_all layout + * perf sets the same sample type to all events as of now + */ + first = perf_evlist__first(session->evlist); + jd.sample_type = first->attr.sample_type; + + *nbytes = 0; + + ret = jit_inject(&jd, filename); + if (!ret) { + *nbytes = jd.bytes_written; + ret = 1; + } + + return ret; +} diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h new file mode 100644 index 000000000000..b66c1f503d9e --- /dev/null +++ b/tools/perf/util/jitdump.h @@ -0,0 +1,124 @@ +/* + * jitdump.h: jitted code info encapsulation file format + * + * Adapted from OProfile GPLv2 support jidump.h: + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#ifndef JITDUMP_H +#define JITDUMP_H + +#include <sys/time.h> +#include <time.h> +#include <stdint.h> + +/* JiTD */ +#define JITHEADER_MAGIC 0x4A695444 +#define JITHEADER_MAGIC_SW 0x4454694A + +#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7) + +#define JITHEADER_VERSION 1 + +enum jitdump_flags_bits { + JITDUMP_FLAGS_MAX_BIT, +}; + +#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ + (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) + +struct jitheader { + uint32_t magic; /* characters "jItD" */ + uint32_t version; /* header version */ + uint32_t total_size; /* total size of header */ + uint32_t elf_mach; /* elf mach target */ + uint32_t pad1; /* reserved */ + uint32_t pid; /* JIT process id */ + uint64_t timestamp; /* timestamp */ + uint64_t flags; /* flags */ +}; + +enum jit_record_type { + JIT_CODE_LOAD = 0, + JIT_CODE_MOVE = 1, + JIT_CODE_DEBUG_INFO = 2, + JIT_CODE_CLOSE = 3, + + JIT_CODE_MAX, +}; + +/* record prefix (mandatory in each record) */ +struct jr_prefix { + uint32_t id; + uint32_t total_size; + uint64_t timestamp; +}; + +struct jr_code_load { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct jr_code_close { + struct jr_prefix p; +}; + +struct jr_code_move { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t old_code_addr; + uint64_t new_code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct debug_entry { + uint64_t addr; + int lineno; /* source line number starting at 1 */ + int discrim; /* column discriminator, 0 is default */ + const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */ +}; + +struct jr_code_debug_info { + struct jr_prefix p; + + uint64_t code_addr; + uint64_t nr_entry; + struct debug_entry entries[0]; +}; + +union jr_entry { + struct jr_code_debug_info info; + struct jr_code_close close; + struct jr_code_load load; + struct jr_code_move move; + struct jr_prefix prefix; +}; + +static inline struct debug_entry * +debug_entry_next(struct debug_entry *ent) +{ + void *a = ent + 1; + size_t l = strlen(ent->name) + 1; + return a + l; +} + +static inline char * +debug_entry_file(struct debug_entry *ent) +{ + void *a = ent + 1; + return a; +} + +#endif /* !JITDUMP_H */ diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index ae825d4ec110..d01e73592f6e 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -122,6 +122,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, bool kvm_exit_event(struct perf_evsel *evsel); bool kvm_entry_event(struct perf_evsel *evsel); +int setup_kvm_events_tp(struct perf_kvm_stat *kvm); #define define_exit_reasons_table(name, symbols) \ static struct exit_reasons_table name[] = { \ @@ -133,8 +134,13 @@ bool kvm_entry_event(struct perf_evsel *evsel); */ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid); -extern const char * const kvm_events_tp[]; +extern const char *kvm_events_tp[]; extern struct kvm_reg_events_ops kvm_reg_events_ops[]; extern const char * const kvm_skip_events[]; +extern const char *vcpu_id_str; +extern const int decode_str_len; +extern const char *kvm_exit_reason; +extern const char *kvm_entry_trace; +extern const char *kvm_exit_trace; #endif /* __PERF_KVM_STAT_H */ diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 00724d496d38..33071d6159bc 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -3,11 +3,11 @@ * Copyright (C) 2015, Huawei Inc. */ +#include <limits.h> #include <stdio.h> -#include "util.h" +#include <stdlib.h> #include "debug.h" #include "llvm-utils.h" -#include "cache.h" #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ @@ -98,11 +98,12 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) void *buf = NULL; FILE *file = NULL; size_t read_sz = 0, buf_sz = 0; + char serr[STRERR_BUFSIZE]; file = popen(cmd, "r"); if (!file) { pr_err("ERROR: unable to popen cmd: %s\n", - strerror(errno)); + strerror_r(errno, serr, sizeof(serr))); return -EINVAL; } @@ -136,7 +137,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) if (ferror(file)) { pr_err("ERROR: error occurred when reading from pipe: %s\n", - strerror(errno)); + strerror_r(errno, serr, sizeof(serr))); err = -EIO; goto errout; } @@ -334,10 +335,18 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, unsigned int kernel_version; char linux_version_code_str[64]; const char *clang_opt = llvm_param.clang_opt; - char clang_path[PATH_MAX], nr_cpus_avail_str[64]; + char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; + char serr[STRERR_BUFSIZE]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; + if (path[0] != '-' && realpath(path, abspath) == NULL) { + err = errno; + pr_err("ERROR: problems with path %s: %s\n", + path, strerror_r(err, serr, sizeof(serr))); + return -err; + } + if (!template) template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; @@ -362,7 +371,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, if (nr_cpus_avail <= 0) { pr_err( "WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", strerror(errno)); +" \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr))); nr_cpus_avail = 128; } snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", @@ -387,8 +396,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, * stdin to be source file (testing). */ force_set_env("CLANG_SOURCE", - (path[0] == '-') ? path : - make_nonrelative_path(path)); + (path[0] == '-') ? path : abspath); pr_debug("llvm compiling command template: %s\n", template); err = read_from_pipe(template, &obj_buf, &obj_buf_sz); diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index 5b3cf1c229e2..23b9a743fe72 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -39,11 +39,10 @@ struct llvm_param { }; extern struct llvm_param llvm_param; -extern int perf_llvm_config(const char *var, const char *value); +int perf_llvm_config(const char *var, const char *value); -extern int llvm__compile_bpf(const char *path, void **p_obj_buf, - size_t *p_obj_buf_sz); +int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz); /* This function is for test__llvm() use only */ -extern int llvm__search_clang(void); +int llvm__search_clang(void); #endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ad79297c76c8..80b9b6a87990 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1301,9 +1301,8 @@ out_problem: int machine__process_mmap2_event(struct machine *machine, union perf_event *event, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; struct map *map; enum map_type type; @@ -1312,8 +1311,8 @@ int machine__process_mmap2_event(struct machine *machine, if (dump_trace) perf_event__fprintf_mmap2(event, stdout); - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || - cpumode == PERF_RECORD_MISC_KERNEL) { + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + sample->cpumode == PERF_RECORD_MISC_KERNEL) { ret = machine__process_kernel_mmap_event(machine, event); if (ret < 0) goto out_problem; @@ -1355,9 +1354,8 @@ out_problem: } int machine__process_mmap_event(struct machine *machine, union perf_event *event, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; struct map *map; enum map_type type; @@ -1366,8 +1364,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_mmap(event, stdout); - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || - cpumode == PERF_RECORD_MISC_KERNEL) { + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + sample->cpumode == PERF_RECORD_MISC_KERNEL) { ret = machine__process_kernel_mmap_event(machine, event); if (ret < 0) goto out_problem; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2c2b443df5ba..8499db281158 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -94,7 +94,7 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); -int machine__process_switch_event(struct machine *machine __maybe_unused, +int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); @@ -180,6 +180,16 @@ struct symbol *machine__find_kernel_symbol(struct machine *machine, } static inline +struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, + enum map_type type, const char *name, + struct map **mapp, + symbol_filter_t filter) +{ + return map_groups__find_symbol_by_name(&machine->kmaps, type, name, + mapp, filter); +} + +static inline struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr, struct map **mapp, symbol_filter_t filter) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c new file mode 100644 index 000000000000..75465f89a413 --- /dev/null +++ b/tools/perf/util/mem-events.c @@ -0,0 +1,255 @@ +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <api/fs/fs.h> +#include "mem-events.h" +#include "debug.h" +#include "symbol.h" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("ldlat-loads", "cpu/mem-loads,ldlat=30/P", "mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), +}; +#undef E + +#undef E + +char *perf_mem_events__name(int i) +{ + return (char *)perf_mem_events[i].name; +} + +int perf_mem_events__parse(const char *str) +{ + char *tok, *saveptr = NULL; + bool found = false; + char *buf; + int j; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(str) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, str); + + tok = strtok_r((char *)buf, ",", &saveptr); + + while (tok) { + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + if (strstr(e->tag, tok)) + e->record = found = true; + } + + tok = strtok_r(NULL, ",", &saveptr); + } + + free(buf); + + if (found) + return 0; + + pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); + return -1; +} + +int perf_mem_events__init(void) +{ + const char *mnt = sysfs__mount(); + bool found = false; + int j; + + if (!mnt) + return -ENOENT; + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + char path[PATH_MAX]; + struct perf_mem_event *e = &perf_mem_events[j]; + struct stat st; + + scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s", + mnt, e->sysfs_name); + + if (!stat(path, &st)) + e->supported = found = true; + } + + return found ? 0 : -ENOENT; +} + +static const char * const tlb_access[] = { + "N/A", + "HIT", + "MISS", + "L1", + "L2", + "Walker", + "Fault", +}; + +int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t l = 0, i; + u64 m = PERF_MEM_TLB_NA; + u64 hit, miss; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + m = mem_info->data_src.mem_dtlb; + + hit = m & PERF_MEM_TLB_HIT; + miss = m & PERF_MEM_TLB_MISS; + + /* already taken care of */ + m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); + + for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, tlb_access[i]); + } + if (*out == '\0') + l += scnprintf(out, sz - l, "N/A"); + if (hit) + l += scnprintf(out + l, sz - l, " hit"); + if (miss) + l += scnprintf(out + l, sz - l, " miss"); + + return l; +} + +static const char * const mem_lvl[] = { + "N/A", + "HIT", + "MISS", + "L1", + "LFB", + "L2", + "L3", + "Local RAM", + "Remote RAM (1 hop)", + "Remote RAM (2 hops)", + "Remote Cache (1 hop)", + "Remote Cache (2 hops)", + "I/O", + "Uncached", +}; + +int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t i, l = 0; + u64 m = PERF_MEM_LVL_NA; + u64 hit, miss; + + if (mem_info) + m = mem_info->data_src.mem_lvl; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + hit = m & PERF_MEM_LVL_HIT; + miss = m & PERF_MEM_LVL_MISS; + + /* already taken care of */ + m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); + + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, mem_lvl[i]); + } + if (*out == '\0') + l += scnprintf(out, sz - l, "N/A"); + if (hit) + l += scnprintf(out + l, sz - l, " hit"); + if (miss) + l += scnprintf(out + l, sz - l, " miss"); + + return l; +} + +static const char * const snoop_access[] = { + "N/A", + "None", + "Miss", + "Hit", + "HitM", +}; + +int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t i, l = 0; + u64 m = PERF_MEM_SNOOP_NA; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + m = mem_info->data_src.mem_snoop; + + for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, snoop_access[i]); + } + + if (*out == '\0') + l += scnprintf(out, sz - l, "N/A"); + + return l; +} + +int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + u64 mask = PERF_MEM_LOCK_NA; + int l; + + if (mem_info) + mask = mem_info->data_src.mem_lock; + + if (mask & PERF_MEM_LOCK_NA) + l = scnprintf(out, sz, "N/A"); + else if (mask & PERF_MEM_LOCK_LOCKED) + l = scnprintf(out, sz, "Yes"); + else + l = scnprintf(out, sz, "No"); + + return l; +} + +int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + int i = 0; + + i += perf_mem__lvl_scnprintf(out, sz, mem_info); + i += scnprintf(out + i, sz - i, "|SNP "); + i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|TLB "); + i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|LCK "); + i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); + + return i; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h new file mode 100644 index 000000000000..5d6d93066a6e --- /dev/null +++ b/tools/perf/util/mem-events.h @@ -0,0 +1,35 @@ +#ifndef __PERF_MEM_EVENTS_H +#define __PERF_MEM_EVENTS_H + +#include <stdbool.h> + +struct perf_mem_event { + bool record; + bool supported; + const char *tag; + const char *name; + const char *sysfs_name; +}; + +enum { + PERF_MEM_EVENTS__LOAD, + PERF_MEM_EVENTS__STORE, + PERF_MEM_EVENTS__MAX, +}; + +extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; + +int perf_mem_events__parse(const char *str); +int perf_mem_events__init(void); + +char *perf_mem_events__name(int i); + +struct mem_info; +int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); + +int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); + +#endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 813d9b272c81..4c19d5e79d8c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -279,7 +279,24 @@ const char *event_type(int type) return "unknown"; } +static int parse_events__is_name_term(struct parse_events_term *term) +{ + return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; +} +static char *get_config_name(struct list_head *head_terms) +{ + struct parse_events_term *term; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, head_terms, list) + if (parse_events__is_name_term(term)) + return term->val.str; + + return NULL; +} static struct perf_evsel * __add_event(struct list_head *list, int *idx, @@ -333,11 +350,25 @@ static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES] return -1; } +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); +static int config_attr(struct perf_event_attr *attr, + struct list_head *head, + struct parse_events_error *err, + config_term_func_t config_term); + int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2) + char *type, char *op_result1, char *op_result2, + struct parse_events_error *err, + struct list_head *head_config) { struct perf_event_attr attr; - char name[MAX_NAME_LEN]; + LIST_HEAD(config_terms); + char name[MAX_NAME_LEN], *config_name; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n; @@ -351,6 +382,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, if (cache_type == -1) return -EINVAL; + config_name = get_config_name(head_config); n = snprintf(name, MAX_NAME_LEN, "%s", type); for (i = 0; (i < 2) && (op_result[i]); i++) { @@ -391,7 +423,16 @@ int parse_events_add_cache(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); attr.config = cache_type | (cache_op << 8) | (cache_result << 16); attr.type = PERF_TYPE_HW_CACHE; - return add_event(list, idx, &attr, name, NULL); + + if (head_config) { + if (config_attr(&attr, head_config, err, + config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } + return add_event(list, idx, &attr, config_name ? : name, &config_terms); } static void tracepoint_error(struct parse_events_error *e, int err, @@ -540,6 +581,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, struct __add_bpf_event_param { struct parse_events_evlist *data; struct list_head *list; + struct list_head *head_config; }; static int add_bpf_event(struct probe_trace_event *tev, int fd, @@ -556,7 +598,8 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, tev->group, tev->event, fd); err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group, - tev->event, evlist->error, NULL); + tev->event, evlist->error, + param->head_config); if (err) { struct perf_evsel *evsel, *tmp; @@ -581,11 +624,12 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, int parse_events_load_bpf_obj(struct parse_events_evlist *data, struct list_head *list, - struct bpf_object *obj) + struct bpf_object *obj, + struct list_head *head_config) { int err; char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {data, list}; + struct __add_bpf_event_param param = {data, list, head_config}; static bool registered_unprobe_atexit = false; if (IS_ERR(obj) || !obj) { @@ -631,17 +675,99 @@ errout: return err; } +static int +parse_events_config_bpf(struct parse_events_evlist *data, + struct bpf_object *obj, + struct list_head *head_config) +{ + struct parse_events_term *term; + int error_pos; + + if (!head_config || list_empty(head_config)) + return 0; + + list_for_each_entry(term, head_config, list) { + char errbuf[BUFSIZ]; + int err; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { + snprintf(errbuf, sizeof(errbuf), + "Invalid config term for BPF object"); + errbuf[BUFSIZ - 1] = '\0'; + + data->error->idx = term->err_term; + data->error->str = strdup(errbuf); + return -EINVAL; + } + + err = bpf__config_obj(obj, term, data->evlist, &error_pos); + if (err) { + bpf__strerror_config_obj(obj, term, data->evlist, + &error_pos, err, errbuf, + sizeof(errbuf)); + data->error->help = strdup( +"Hint:\tValid config terms:\n" +" \tmap:[<arraymap>].value<indices>=[value]\n" +" \tmap:[<eventmap>].event<indices>=[event]\n" +"\n" +" \twhere <indices> is something like [0,3...5] or [all]\n" +" \t(add -v to see detail)"); + data->error->str = strdup(errbuf); + if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) + data->error->idx = term->err_val; + else + data->error->idx = term->err_term + error_pos; + return err; + } + } + return 0; +} + +/* + * Split config terms: + * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ... + * 'call-graph=fp' is 'evt config', should be applied to each + * events in bpf.c. + * 'map:array.value[0]=1' is 'obj config', should be processed + * with parse_events_config_bpf. + * + * Move object config terms from the first list to obj_head_config. + */ +static void +split_bpf_config_terms(struct list_head *evt_head_config, + struct list_head *obj_head_config) +{ + struct parse_events_term *term, *temp; + + /* + * Currectly, all possible user config term + * belong to bpf object. parse_events__is_hardcoded_term() + * happends to be a good flag. + * + * See parse_events_config_bpf() and + * config_term_tracepoint(). + */ + list_for_each_entry_safe(term, temp, evt_head_config, list) + if (!parse_events__is_hardcoded_term(term)) + list_move_tail(&term->list, obj_head_config); +} + int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, - bool source) + bool source, + struct list_head *head_config) { + int err; struct bpf_object *obj; + LIST_HEAD(obj_head_config); + + if (head_config) + split_bpf_config_terms(head_config, &obj_head_config); obj = bpf__prepare_load(bpf_file_name, source); if (IS_ERR(obj)) { char errbuf[BUFSIZ]; - int err; err = PTR_ERR(obj); @@ -659,7 +785,18 @@ int parse_events_load_bpf(struct parse_events_evlist *data, return err; } - return parse_events_load_bpf_obj(data, list, obj); + err = parse_events_load_bpf_obj(data, list, obj, head_config); + if (err) + return err; + err = parse_events_config_bpf(data, obj, &obj_head_config); + + /* + * Caller doesn't know anything about obj_head_config, + * so combine them together again before returnning. + */ + if (head_config) + list_splice_tail(&obj_head_config, head_config); + return err; } static int @@ -746,9 +883,59 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -typedef int config_term_func_t(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err); +/* + * Update according to parse-events.l + */ +static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { + [PARSE_EVENTS__TERM_TYPE_USER] = "<sysfs term>", + [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config", + [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", + [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", + [PARSE_EVENTS__TERM_TYPE_NAME] = "name", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", + [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type", + [PARSE_EVENTS__TERM_TYPE_TIME] = "time", + [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph", + [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", + [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", + [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", +}; + +static bool config_term_shrinked; + +static bool +config_term_avail(int term_type, struct parse_events_error *err) +{ + if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { + err->str = strdup("Invalid term_type"); + return false; + } + if (!config_term_shrinked) + return true; + + switch (term_type) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + case PARSE_EVENTS__TERM_TYPE_NAME: + return true; + default: + if (!err) + return false; + + /* term_type is validated so indexing is safe */ + if (asprintf(&err->str, "'%s' is not usable in 'perf stat'", + config_term_names[term_type]) < 0) + err->str = NULL; + return false; + } +} + +void parse_events__shrink_config_terms(void) +{ + config_term_shrinked = true; +} static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, @@ -815,6 +1002,17 @@ do { \ return -EINVAL; } + /* + * Check term availbility after basic checking so + * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. + * + * If check availbility at the entry of this function, + * user will see "'<sysfs term>' is not usable in 'perf stat'" + * if an invalid config term is provided for legacy events + * (for example, instructions/badterm/...), which is confusing. + */ + if (!config_term_avail(term->type_term, err)) + return -EINVAL; return 0; #undef CHECK_TYPE_VAL } @@ -961,23 +1159,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data, return -ENOMEM; } - return add_event(list, &data->idx, &attr, NULL, &config_terms); -} - -static int parse_events__is_name_term(struct parse_events_term *term) -{ - return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; -} - -static char *pmu_event_name(struct list_head *head_terms) -{ - struct parse_events_term *term; - - list_for_each_entry(term, head_terms, list) - if (parse_events__is_name_term(term)) - return term->val.str; - - return NULL; + return add_event(list, &data->idx, &attr, + get_config_name(head_config), &config_terms); } int parse_events_add_pmu(struct parse_events_evlist *data, @@ -1024,7 +1207,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, return -EINVAL; evsel = __add_event(list, &data->idx, &attr, - pmu_event_name(head_config), pmu->cpus, + get_config_name(head_config), pmu->cpus, &config_terms); if (evsel) { evsel->unit = info.unit; @@ -1386,8 +1569,7 @@ int parse_events_terms(struct list_head *terms, const char *str) return 0; } - if (data.terms) - parse_events__free_terms(data.terms); + parse_events_terms__delete(data.terms); return ret; } @@ -1395,9 +1577,10 @@ int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *err) { struct parse_events_evlist data = { - .list = LIST_HEAD_INIT(data.list), - .idx = evlist->nr_entries, - .error = err, + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + .error = err, + .evlist = evlist, }; int ret; @@ -2068,12 +2251,29 @@ int parse_events_term__clone(struct parse_events_term **new, term->err_term, term->err_val); } -void parse_events__free_terms(struct list_head *terms) +void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; - list_for_each_entry_safe(term, h, terms, list) + list_for_each_entry_safe(term, h, terms, list) { + if (term->array.nr_ranges) + free(term->array.ranges); + list_del_init(&term->list); free(term); + } +} + +void parse_events_terms__delete(struct list_head *terms) +{ + if (!terms) + return; + parse_events_terms__purge(terms); + free(terms); +} + +void parse_events__clear_array(struct parse_events_array *a) +{ + free(a->ranges); } void parse_events_evlist_error(struct parse_events_evlist *data, @@ -2088,6 +2288,33 @@ void parse_events_evlist_error(struct parse_events_evlist *data, WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } +static void config_terms_list(char *buf, size_t buf_sz) +{ + int i; + bool first = true; + + buf[0] = '\0'; + for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { + const char *name = config_term_names[i]; + + if (!config_term_avail(i, NULL)) + continue; + if (!name) + continue; + if (name[0] == '<') + continue; + + if (strlen(buf) + strlen(name) + 2 >= buf_sz) + return; + + if (!first) + strcat(buf, ","); + else + first = false; + strcat(buf, name); + } +} + /* * Return string contains valid config terms of an event. * @additional_terms: For terms such as PMU sysfs terms. @@ -2095,17 +2322,18 @@ void parse_events_evlist_error(struct parse_events_evlist *data, char *parse_events_formats_error_string(char *additional_terms) { char *str; - static const char *static_terms = "config,config1,config2,name," - "period,freq,branch_type,time," - "call-graph,stack-size\n"; + /* "branch_type" is the longest name */ + char static_terms[__PARSE_EVENTS__TERM_TYPE_NR * + (sizeof("branch_type") - 1)]; + config_terms_list(static_terms, sizeof(static_terms)); /* valid terms */ if (additional_terms) { - if (!asprintf(&str, "valid terms: %s,%s", - additional_terms, static_terms)) + if (asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms) < 0) goto fail; } else { - if (!asprintf(&str, "valid terms: %s", static_terms)) + if (asprintf(&str, "valid terms: %s", static_terms) < 0) goto fail; } return str; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f1a6db107241..d740c3ca9a1d 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -22,19 +22,18 @@ struct tracepoint_path { struct tracepoint_path *next; }; -extern struct tracepoint_path *tracepoint_id_to_path(u64 config); -extern struct tracepoint_path *tracepoint_name_to_path(const char *name); -extern bool have_tracepoints(struct list_head *evlist); +struct tracepoint_path *tracepoint_id_to_path(u64 config); +struct tracepoint_path *tracepoint_name_to_path(const char *name); +bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); -extern int parse_events_option(const struct option *opt, const char *str, - int unset); -extern int parse_events(struct perf_evlist *evlist, const char *str, - struct parse_events_error *error); -extern int parse_events_terms(struct list_head *terms, const char *str); -extern int parse_filter(const struct option *opt, const char *str, int unset); -extern int exclude_perf(const struct option *opt, const char *arg, int unset); +int parse_events_option(const struct option *opt, const char *str, int unset); +int parse_events(struct perf_evlist *evlist, const char *str, + struct parse_events_error *error); +int parse_events_terms(struct list_head *terms, const char *str); +int parse_filter(const struct option *opt, const char *str, int unset); +int exclude_perf(const struct option *opt, const char *arg, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -68,11 +67,21 @@ enum { PARSE_EVENTS__TERM_TYPE_CALLGRAPH, PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, - PARSE_EVENTS__TERM_TYPE_INHERIT + PARSE_EVENTS__TERM_TYPE_INHERIT, + __PARSE_EVENTS__TERM_TYPE_NR, +}; + +struct parse_events_array { + size_t nr_ranges; + struct { + unsigned int start; + size_t length; + } *ranges; }; struct parse_events_term { char *config; + struct parse_events_array array; union { char *str; u64 num; @@ -98,12 +107,14 @@ struct parse_events_evlist { int idx; int nr_groups; struct parse_events_error *error; + struct perf_evlist *evlist; }; struct parse_events_terms { struct list_head *terms; }; +void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, int type_term, char *config, u64 num, @@ -115,7 +126,9 @@ int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx); int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); -void parse_events__free_terms(struct list_head *terms); +void parse_events_terms__delete(struct list_head *terms); +void parse_events_terms__purge(struct list_head *terms); +void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); @@ -126,18 +139,22 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, - bool source); + bool source, + struct list_head *head_config); /* Provide this function for perf test */ struct bpf_object; int parse_events_load_bpf_obj(struct parse_events_evlist *data, struct list_head *list, - struct bpf_object *obj); + struct bpf_object *obj, + struct list_head *head_config); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2); + char *type, char *op_result1, char *op_result2, + struct parse_events_error *error, + struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_evlist *data, @@ -165,7 +182,7 @@ void print_symbol_events(const char *event_glob, unsigned type, void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); -extern int is_valid_tracepoint(const char *event_string); +int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 58c5831ffd5c..1477fbc78993 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -9,8 +9,8 @@ %{ #include <errno.h> #include "../perf.h" -#include "parse-events-bison.h" #include "parse-events.h" +#include "parse-events-bison.h" char *parse_events_get_text(yyscan_t yyscanner); YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); @@ -111,6 +111,7 @@ do { \ %x mem %s config %x event +%x array group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* @@ -122,7 +123,7 @@ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} @@ -176,10 +177,17 @@ modifier_bp [rwx]{1,3} } +<array>{ +"]" { BEGIN(config); return ']'; } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } +, { return ','; } +"\.\.\." { return PE_ARRAY_RANGE; } +} + <config>{ /* - * Please update parse_events_formats_error_string any time - * new static term is added. + * Please update config_term_names when new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } @@ -196,6 +204,8 @@ no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } +\[all\] { return PE_ARRAY_ALL; } +"[" { BEGIN(array); return '['; } } <mem>{ @@ -238,6 +248,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index ad379968d4c1..5be4a5f216d6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -28,7 +28,7 @@ do { \ INIT_LIST_HEAD(list); \ } while (0) -static inc_group_count(struct list_head *list, +static void inc_group_count(struct list_head *list, struct parse_events_evlist *data) { /* Count groups only have more than 1 members */ @@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list, %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%token PE_ARRAY_ALL PE_ARRAY_RANGE %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW @@ -64,6 +65,7 @@ static inc_group_count(struct list_head *list, %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT %type <num> value_sym %type <head> event_config +%type <head> opt_event_config %type <term> event_term %type <head> event_pmu %type <head> event_legacy_symbol @@ -82,6 +84,9 @@ static inc_group_count(struct list_head *list, %type <head> group_def %type <head> group %type <head> groups +%type <array> array +%type <array> array_term +%type <array> array_terms %union { @@ -93,6 +98,7 @@ static inc_group_count(struct list_head *list, char *sys; char *event; } tracepoint_name; + struct parse_events_array array; } %% @@ -211,24 +217,14 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME '/' event_config '/' +PE_NAME opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); - parse_events__free_terms($3); - $$ = list; -} -| -PE_NAME '/' '/' -{ - struct parse_events_evlist *data = _data; - struct list_head *list; - - ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, NULL)); + ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + parse_events_terms__delete($2); $$ = list; } | @@ -246,7 +242,7 @@ PE_KERNEL_PMU_EVENT sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } | @@ -266,7 +262,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } @@ -285,7 +281,7 @@ value_sym '/' event_config '/' ALLOC_LIST(list); ABORT_ON(parse_events_add_numeric(data, list, type, config, $3)); - parse_events__free_terms($3); + parse_events_terms__delete($3); $$ = list; } | @@ -302,33 +298,39 @@ value_sym sep_slash_dc } event_legacy_cache: -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6)); + parse_events_terms__delete($6); $$ = list; } | -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4)); + parse_events_terms__delete($4); $$ = list; } | -PE_NAME_CACHE_TYPE +PE_NAME_CACHE_TYPE opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2)); + parse_events_terms__delete($2); $$ = list; } @@ -378,24 +380,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -tracepoint_name -{ - struct parse_events_evlist *data = _data; - struct parse_events_error *error = data->error; - struct list_head *list; - - ALLOC_LIST(list); - if (error) - error->idx = @1.first_column; - - if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error, NULL)) - return -1; - - $$ = list; -} -| -tracepoint_name '/' event_config '/' +tracepoint_name opt_event_config { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; @@ -406,7 +391,7 @@ tracepoint_name '/' event_config '/' error->idx = @1.first_column; if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error, $3)) + error, $2)) return -1; $$ = list; @@ -433,49 +418,68 @@ PE_NAME ':' PE_NAME } event_legacy_numeric: -PE_VALUE ':' PE_VALUE +PE_VALUE ':' PE_VALUE opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4)); + parse_events_terms__delete($4); $$ = list; } event_legacy_raw: -PE_RAW +PE_RAW opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2)); + parse_events_terms__delete($2); $$ = list; } event_bpf_file: -PE_BPF_OBJECT +PE_BPF_OBJECT opt_event_config { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, false)); + ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2)); + parse_events_terms__delete($2); $$ = list; } | -PE_BPF_SOURCE +PE_BPF_SOURCE opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, true)); + ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2)); + parse_events_terms__delete($2); $$ = list; } +opt_event_config: +'/' event_config '/' +{ + $$ = $2; +} +| +'/' '/' +{ + $$ = NULL; +} +| +{ + $$ = NULL; +} + start_terms: event_config { struct parse_events_terms *data = _data; @@ -573,6 +577,86 @@ PE_TERM ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL)); $$ = term; } +| +PE_NAME array '=' PE_NAME +{ + struct parse_events_term *term; + int i; + + ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $4, &@1, &@4)); + + term->array = $2; + $$ = term; +} +| +PE_NAME array '=' PE_VALUE +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $4, &@1, &@4)); + term->array = $2; + $$ = term; +} + +array: +'[' array_terms ']' +{ + $$ = $2; +} +| +PE_ARRAY_ALL +{ + $$.nr_ranges = 0; + $$.ranges = NULL; +} + +array_terms: +array_terms ',' array_term +{ + struct parse_events_array new_array; + + new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges; + new_array.ranges = malloc(sizeof(new_array.ranges[0]) * + new_array.nr_ranges); + ABORT_ON(!new_array.ranges); + memcpy(&new_array.ranges[0], $1.ranges, + $1.nr_ranges * sizeof(new_array.ranges[0])); + memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges, + $3.nr_ranges * sizeof(new_array.ranges[0])); + free($1.ranges); + free($3.ranges); + $$ = new_array; +} +| +array_term + +array_term: +PE_VALUE +{ + struct parse_events_array array; + + array.nr_ranges = 1; + array.ranges = malloc(sizeof(array.ranges[0])); + ABORT_ON(!array.ranges); + array.ranges[0].start = $1; + array.ranges[0].length = 1; + $$ = array; +} +| +PE_VALUE PE_ARRAY_RANGE PE_VALUE +{ + struct parse_events_array array; + + ABORT_ON($3 < $1); + array.nr_ranges = 1; + array.ranges = malloc(sizeof(array.ranges[0])); + ABORT_ON(!array.ranges); + array.ranges[0].start = $1; + array.ranges[0].length = $3 - $1 + 1; + $$ = array; +} sep_dc: ':' | diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 3654d964e49d..3bf6bf82ff2d 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -41,36 +41,6 @@ static char *cleanup_path(char *path) return path; } -static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) -{ - const char *perf_dir = get_perf_dir(); - size_t len; - - len = strlen(perf_dir); - if (n < len + 1) - goto bad; - memcpy(buf, perf_dir, len); - if (len && !is_dir_sep(perf_dir[len-1])) - buf[len++] = '/'; - len += vsnprintf(buf + len, n - len, fmt, args); - if (len >= n) - goto bad; - return cleanup_path(buf); -bad: - strlcpy(buf, bad_path, n); - return buf; -} - -char *perf_pathdup(const char *fmt, ...) -{ - char path[PATH_MAX]; - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(path, sizeof(path), fmt, args); - va_end(args); - return xstrdup(path); -} - char *mkpath(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b597bcc8fc78..adef23b1352e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * char scale[128]; int fd, ret = -1; char path[PATH_MAX]; - const char *lc; + char *lc; snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); @@ -124,6 +124,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * lc = setlocale(LC_NUMERIC, NULL); /* + * The lc string may be allocated in static storage, + * so get a dynamic copy to make it survive setlocale + * call below. + */ + lc = strdup(lc); + if (!lc) { + ret = -ENOMEM; + goto error; + } + + /* * force to C locale to ensure kernel * scale string is converted correctly. * kernel uses default C locale. @@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); + free(lc); + ret = 0; error: close(fd); @@ -153,7 +166,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n if (fd == -1) return -1; - sret = read(fd, alias->unit, UNIT_MAX_LEN); + sret = read(fd, alias->unit, UNIT_MAX_LEN); if (sret < 0) goto error; @@ -284,13 +297,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; - int ret = 0; event_dir = opendir(dir); if (!event_dir) return -EINVAL; - while (!ret && (evt_ent = readdir(event_dir))) { + while ((evt_ent = readdir(event_dir))) { char path[PATH_MAX]; char *name = evt_ent->d_name; FILE *file; @@ -306,17 +318,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) snprintf(path, PATH_MAX, "%s/%s", dir, name); - ret = -EINVAL; file = fopen(path, "r"); - if (!file) - break; + if (!file) { + pr_debug("Cannot open %s\n", path); + continue; + } - ret = perf_pmu__new_alias(head, dir, name, file); + if (perf_pmu__new_alias(head, dir, name, file) < 0) + pr_debug("Cannot set up %s\n", name); fclose(file); } closedir(event_dir); - return ret; + return 0; } /* @@ -354,7 +368,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, list_for_each_entry(term, &alias->terms, list) { ret = parse_events_term__clone(&cloned, term); if (ret) { - parse_events__free_terms(&list); + parse_events_terms__purge(&list); return ret; } list_add_tail(&cloned->list, &list); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 93996ec4bbe3..8319fbb08636 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2179,7 +2179,7 @@ static int perf_probe_event__sprintf(const char *group, const char *event, strbuf_addf(result, " in %s", module); if (pev->nargs > 0) { - strbuf_addstr(result, " with"); + strbuf_add(result, " with", 5); for (i = 0; i < pev->nargs; i++) { ret = synthesize_perf_probe_arg(&pev->args[i], buf, 128); diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index ba926c30f8cd..e54e7b011577 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -114,49 +114,44 @@ int init_probe_symbol_maps(bool user_only); void exit_probe_symbol_maps(void); /* Command string to events */ -extern int parse_perf_probe_command(const char *cmd, - struct perf_probe_event *pev); -extern int parse_probe_trace_command(const char *cmd, - struct probe_trace_event *tev); +int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev); +int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev); /* Events to command string */ -extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); -extern char *synthesize_probe_trace_command(struct probe_trace_event *tev); -extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, - size_t len); +char *synthesize_perf_probe_command(struct perf_probe_event *pev); +char *synthesize_probe_trace_command(struct probe_trace_event *tev); +int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len); /* Check the perf_probe_event needs debuginfo */ -extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); +bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); /* Release event contents */ -extern void clear_perf_probe_event(struct perf_probe_event *pev); -extern void clear_probe_trace_event(struct probe_trace_event *tev); +void clear_perf_probe_event(struct perf_probe_event *pev); +void clear_probe_trace_event(struct probe_trace_event *tev); /* Command string to line-range */ -extern int parse_line_range_desc(const char *cmd, struct line_range *lr); +int parse_line_range_desc(const char *cmd, struct line_range *lr); /* Release line range members */ -extern void line_range__clear(struct line_range *lr); +void line_range__clear(struct line_range *lr); /* Initialize line range */ -extern int line_range__init(struct line_range *lr); - -extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int del_perf_probe_events(struct strfilter *filter); - -extern int show_perf_probe_event(const char *group, const char *event, - struct perf_probe_event *pev, - const char *module, bool use_stdout); -extern int show_perf_probe_events(struct strfilter *filter); -extern int show_line_range(struct line_range *lr, const char *module, - bool user); -extern int show_available_vars(struct perf_probe_event *pevs, int npevs, - struct strfilter *filter); -extern int show_available_funcs(const char *module, struct strfilter *filter, - bool user); +int line_range__init(struct line_range *lr); + +int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); +void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int del_perf_probe_events(struct strfilter *filter); + +int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout); +int show_perf_probe_events(struct strfilter *filter); +int show_line_range(struct line_range *lr, const char *module, bool user); +int show_available_vars(struct perf_probe_event *pevs, int npevs, + struct strfilter *filter); +int show_available_funcs(const char *module, struct strfilter *filter, bool user); bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 4ce5c5e18f48..b3bd0fba0237 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1314,18 +1314,18 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) if (probe_conf.show_location_range) { if (!externs) { if (ret) - strbuf_addf(&buf, "[INV]\t"); + strbuf_add(&buf, "[INV]\t", 6); else - strbuf_addf(&buf, "[VAL]\t"); + strbuf_add(&buf, "[VAL]\t", 6); } else - strbuf_addf(&buf, "[EXT]\t"); + strbuf_add(&buf, "[EXT]\t", 6); } ret2 = die_get_varname(die_mem, &buf); if (!ret2 && probe_conf.show_location_range && !externs) { - strbuf_addf(&buf, "\t"); + strbuf_addch(&buf, '\t'); ret2 = die_get_var_range(&af->pf.sp_die, die_mem, &buf); } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 0aec7704e395..51137fccb9c8 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -34,27 +34,25 @@ struct debuginfo { }; /* This also tries to open distro debuginfo */ -extern struct debuginfo *debuginfo__new(const char *path); -extern void debuginfo__delete(struct debuginfo *dbg); +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -extern int debuginfo__find_trace_events(struct debuginfo *dbg, - struct perf_probe_event *pev, - struct probe_trace_event **tevs); +int debuginfo__find_trace_events(struct debuginfo *dbg, + struct perf_probe_event *pev, + struct probe_trace_event **tevs); /* Find a perf_probe_point from debuginfo */ -extern int debuginfo__find_probe_point(struct debuginfo *dbg, - unsigned long addr, - struct perf_probe_point *ppt); +int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, + struct perf_probe_point *ppt); /* Find a line range */ -extern int debuginfo__find_line_range(struct debuginfo *dbg, - struct line_range *lr); +int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); /* Find available variables */ -extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, - struct perf_probe_event *pev, - struct variable_list **vls); +int debuginfo__find_available_vars_at(struct debuginfo *dbg, + struct perf_probe_event *pev, + struct variable_list **vls); /* Find a src file from a DWARF tag path */ int get_real_path(const char *raw_path, const char *comp_dir, diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 172889ea234f..3340c9c4a6ca 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -24,6 +24,6 @@ * sq_quote() in a real application. */ -extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); +void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); #endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 544509c159ce..b3aabc0d4eb0 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -187,6 +187,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d72fafc1c800..fbd05242b4e5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -205,6 +205,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; @@ -1091,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); - set_table_handlers(tables); if (tables->db_export_mode) { @@ -1101,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } + free(command_line); + return err; error: Py_Finalize(); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 40b7a0d0905b..4abd85c6346d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -240,14 +240,6 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_build_id_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -260,23 +252,6 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); -static int process_id_index_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int skipn(int fd, off_t n) { char buf[4096]; @@ -303,10 +278,9 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, return event->auxtrace.size; } -static -int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -410,7 +384,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_build_id_stub; + tool->build_id = process_event_op2_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -418,13 +392,13 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->finished_round = process_finished_round_stub; } if (tool->id_index == NULL) - tool->id_index = process_id_index_stub; + tool->id_index = process_event_op2_stub; if (tool->auxtrace_info == NULL) - tool->auxtrace_info = process_event_auxtrace_info_stub; + tool->auxtrace_info = process_event_op2_stub; if (tool->auxtrace == NULL) tool->auxtrace = process_event_auxtrace_stub; if (tool->auxtrace_error == NULL) - tool->auxtrace_error = process_event_auxtrace_error_stub; + tool->auxtrace_error = process_event_op2_stub; if (tool->thread_map == NULL) tool->thread_map = process_event_thread_map_stub; if (tool->cpu_map == NULL) @@ -1133,12 +1107,11 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { - const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct machine *machine; if (perf_guest && - ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || - (cpumode == PERF_RECORD_MISC_GUEST_USER))) { + ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || + (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) { u32 pid; if (event->header.type == PERF_RECORD_MMAP diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1833103768cb..c8680984d2d6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') @@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +# use full paths with source files +ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ec722346e6ff..47966a1618c7 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -6,6 +6,7 @@ #include "evsel.h" #include "evlist.h" #include <traceevent/event-parse.h> +#include "mem-events.h" regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; @@ -25,9 +26,19 @@ int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; +int sort__has_thread = 0; +int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; - +/* + * Replaces all occurrences of a char used with the: + * + * -t, --field-separator + * + * option, that uses a special separator character and don't pad with spaces, + * replacing all occurances of this separator in symbol names (and other + * output) with a '.' character, that thus it's the only non valid separator. +*/ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) { int n; @@ -80,10 +91,21 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, width, width, comm ?: ""); } +static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg) +{ + const struct thread *th = arg; + + if (type != HIST_FILTER__THREAD) + return -1; + + return th && he->thread != th; +} + struct sort_entry sort_thread = { .se_header = " Pid:Command", .se_cmp = sort__thread_cmp, .se_snprintf = hist_entry__thread_snprintf, + .se_filter = hist_entry__thread_filter, .se_width_idx = HISTC_THREAD, }; @@ -121,6 +143,7 @@ struct sort_entry sort_comm = { .se_collapse = sort__comm_collapse, .se_sort = sort__comm_sort, .se_snprintf = hist_entry__comm_snprintf, + .se_filter = hist_entry__thread_filter, .se_width_idx = HISTC_COMM, }; @@ -170,10 +193,21 @@ static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf, return _hist_entry__dso_snprintf(he->ms.map, bf, size, width); } +static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->ms.map || he->ms.map->dso != dso); +} + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, .se_snprintf = hist_entry__dso_snprintf, + .se_filter = hist_entry__dso_filter, .se_width_idx = HISTC_DSO, }; @@ -246,10 +280,8 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", ip - map->unmap_ip(map, sym->start)); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } else { - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + ret += repsep_snprintf(bf + ret, size - ret, "%.*s", width - ret, sym->name); } @@ -257,14 +289,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, size_t len = BITS_PER_LONG / 4; ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, ip); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } - if (ret > width) - bf[width] = '\0'; - - return width; + return ret; } static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, @@ -274,46 +301,56 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, he->level, bf, size, width); } +static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym)); +} + struct sort_entry sort_sym = { .se_header = "Symbol", .se_cmp = sort__sym_cmp, .se_sort = sort__sym_sort, .se_snprintf = hist_entry__sym_snprintf, + .se_filter = hist_entry__sym_filter, .se_width_idx = HISTC_SYMBOL, }; /* --sort srcline */ +static char *hist_entry__get_srcline(struct hist_entry *he) +{ + struct map *map = he->ms.map; + + if (!map) + return SRCLINE_UNKNOWN; + + return get_srcline(map->dso, map__rip_2objdump(map, he->ip), + he->ms.sym, true); +} + static int64_t sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->srcline) { - if (!left->ms.map) - left->srcline = SRCLINE_UNKNOWN; - else { - struct map *map = left->ms.map; - left->srcline = get_srcline(map->dso, - map__rip_2objdump(map, left->ip), - left->ms.sym, true); - } - } - if (!right->srcline) { - if (!right->ms.map) - right->srcline = SRCLINE_UNKNOWN; - else { - struct map *map = right->ms.map; - right->srcline = get_srcline(map->dso, - map__rip_2objdump(map, right->ip), - right->ms.sym, true); - } - } + if (!left->srcline) + left->srcline = hist_entry__get_srcline(left); + if (!right->srcline) + right->srcline = hist_entry__get_srcline(right); + return strcmp(right->srcline, left->srcline); } static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline); + if (!he->srcline) + he->srcline = hist_entry__get_srcline(he); + + return repsep_snprintf(bf, size, "%-.*s", width, he->srcline); } struct sort_entry sort_srcline = { @@ -327,11 +364,14 @@ struct sort_entry sort_srcline = { static char no_srcfile[1]; -static char *get_srcfile(struct hist_entry *e) +static char *hist_entry__get_srcfile(struct hist_entry *e) { char *sf, *p; struct map *map = e->ms.map; + if (!map) + return no_srcfile; + sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), e->ms.sym, false, true); if (!strcmp(sf, SRCLINE_UNKNOWN)) @@ -348,25 +388,21 @@ static char *get_srcfile(struct hist_entry *e) static int64_t sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->srcfile) { - if (!left->ms.map) - left->srcfile = no_srcfile; - else - left->srcfile = get_srcfile(left); - } - if (!right->srcfile) { - if (!right->ms.map) - right->srcfile = no_srcfile; - else - right->srcfile = get_srcfile(right); - } + if (!left->srcfile) + left->srcfile = hist_entry__get_srcfile(left); + if (!right->srcfile) + right->srcfile = hist_entry__get_srcfile(right); + return strcmp(right->srcfile, left->srcfile); } static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); + if (!he->srcfile) + he->srcfile = hist_entry__get_srcfile(he); + + return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile); } struct sort_entry sort_srcfile = { @@ -439,10 +475,21 @@ static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket); } +static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg) +{ + int sk = *(const int *)arg; + + if (type != HIST_FILTER__SOCKET) + return -1; + + return sk >= 0 && he->socket != sk; +} + struct sort_entry sort_socket = { .se_header = "Socket", .se_cmp = sort__socket_cmp, .se_snprintf = hist_entry__socket_snprintf, + .se_filter = hist_entry__socket_filter, .se_width_idx = HISTC_SOCKET, }; @@ -483,9 +530,6 @@ sort__trace_cmp(struct hist_entry *left, struct hist_entry *right) if (right->trace_output == NULL) right->trace_output = get_trace_output(right); - hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output)); - hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output)); - return strcmp(right->trace_output, left->trace_output); } @@ -496,11 +540,11 @@ static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf, evsel = hists_to_evsel(he->hists); if (evsel->attr.type != PERF_TYPE_TRACEPOINT) - return scnprintf(bf, size, "%-*.*s", width, width, "N/A"); + return scnprintf(bf, size, "%-.*s", width, "N/A"); if (he->trace_output == NULL) he->trace_output = get_trace_output(he); - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output); + return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output); } struct sort_entry sort_trace = { @@ -532,6 +576,18 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__dso_from_filter(struct hist_entry *he, int type, + const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->branch_info || !he->branch_info->from.map || + he->branch_info->from.map->dso != dso); +} + static int64_t sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -552,6 +608,18 @@ static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__dso_to_filter(struct hist_entry *he, int type, + const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->branch_info || !he->branch_info->to.map || + he->branch_info->to.map->dso != dso); +} + static int64_t sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -613,10 +681,35 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__sym_from_filter(struct hist_entry *he, int type, + const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && !(he->branch_info && he->branch_info->from.sym && + strstr(he->branch_info->from.sym->name, sym)); +} + +static int hist_entry__sym_to_filter(struct hist_entry *he, int type, + const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && !(he->branch_info && he->branch_info->to.sym && + strstr(he->branch_info->to.sym->name, sym)); +} + struct sort_entry sort_dso_from = { .se_header = "Source Shared Object", .se_cmp = sort__dso_from_cmp, .se_snprintf = hist_entry__dso_from_snprintf, + .se_filter = hist_entry__dso_from_filter, .se_width_idx = HISTC_DSO_FROM, }; @@ -624,6 +717,7 @@ struct sort_entry sort_dso_to = { .se_header = "Target Shared Object", .se_cmp = sort__dso_to_cmp, .se_snprintf = hist_entry__dso_to_snprintf, + .se_filter = hist_entry__dso_to_filter, .se_width_idx = HISTC_DSO_TO, }; @@ -631,6 +725,7 @@ struct sort_entry sort_sym_from = { .se_header = "Source Symbol", .se_cmp = sort__sym_from_cmp, .se_snprintf = hist_entry__sym_from_snprintf, + .se_filter = hist_entry__sym_from_filter, .se_width_idx = HISTC_SYMBOL_FROM, }; @@ -638,6 +733,7 @@ struct sort_entry sort_sym_to = { .se_header = "Target Symbol", .se_cmp = sort__sym_to_cmp, .se_snprintf = hist_entry__sym_to_snprintf, + .se_filter = hist_entry__sym_to_filter, .se_width_idx = HISTC_SYMBOL_TO, }; @@ -797,20 +893,10 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - const char *out; - u64 mask = PERF_MEM_LOCK_NA; + char out[10]; - if (he->mem_info) - mask = he->mem_info->data_src.mem_lock; - - if (mask & PERF_MEM_LOCK_NA) - out = "N/A"; - else if (mask & PERF_MEM_LOCK_LOCKED) - out = "Yes"; - else - out = "No"; - - return repsep_snprintf(bf, size, "%-*s", width, out); + perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info); + return repsep_snprintf(bf, size, "%.*s", width, out); } static int64_t @@ -832,54 +918,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb); } -static const char * const tlb_access[] = { - "N/A", - "HIT", - "MISS", - "L1", - "L2", - "Walker", - "Fault", -}; -#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *)) - static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t l = 0, i; - u64 m = PERF_MEM_TLB_NA; - u64 hit, miss; - - out[0] = '\0'; - - if (he->mem_info) - m = he->mem_info->data_src.mem_dtlb; - - hit = m & PERF_MEM_TLB_HIT; - miss = m & PERF_MEM_TLB_MISS; - - /* already taken care of */ - m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); - - for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, tlb_access[i], sz - l); - l += strlen(tlb_access[i]); - } - if (*out == '\0') - strcpy(out, "N/A"); - if (hit) - strncat(out, " hit", sz - l); - if (miss) - strncat(out, " miss", sz - l); + perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } @@ -902,61 +946,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl); } -static const char * const mem_lvl[] = { - "N/A", - "HIT", - "MISS", - "L1", - "LFB", - "L2", - "L3", - "Local RAM", - "Remote RAM (1 hop)", - "Remote RAM (2 hops)", - "Remote Cache (1 hop)", - "Remote Cache (2 hops)", - "I/O", - "Uncached", -}; -#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *)) - static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t i, l = 0; - u64 m = PERF_MEM_LVL_NA; - u64 hit, miss; - - if (he->mem_info) - m = he->mem_info->data_src.mem_lvl; - - out[0] = '\0'; - - hit = m & PERF_MEM_LVL_HIT; - miss = m & PERF_MEM_LVL_MISS; - - /* already taken care of */ - m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - - for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, mem_lvl[i], sz - l); - l += strlen(mem_lvl[i]); - } - if (*out == '\0') - strcpy(out, "N/A"); - if (hit) - strncat(out, " hit", sz - l); - if (miss) - strncat(out, " miss", sz - l); + perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } @@ -979,51 +974,15 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop); } -static const char * const snoop_access[] = { - "N/A", - "None", - "Miss", - "Hit", - "HitM", -}; -#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *)) - static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t i, l = 0; - u64 m = PERF_MEM_SNOOP_NA; - - out[0] = '\0'; - - if (he->mem_info) - m = he->mem_info->data_src.mem_snoop; - - for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, snoop_access[i], sz - l); - l += strlen(snoop_access[i]); - } - - if (*out == '\0') - strcpy(out, "N/A"); + perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } -static inline u64 cl_address(u64 address) -{ - /* return the cacheline of the address */ - return (address & ~(cacheline_size - 1)); -} - static int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1440,20 +1399,6 @@ struct hpp_sort_entry { struct sort_entry *se; }; -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) -{ - struct hpp_sort_entry *hse_a; - struct hpp_sort_entry *hse_b; - - if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) - return false; - - hse_a = container_of(a, struct hpp_sort_entry, hpp); - hse_b = container_of(b, struct hpp_sort_entry, hpp); - - return hse_a->se == hse_b->se; -} - void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) { struct hpp_sort_entry *hse; @@ -1539,8 +1484,56 @@ static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, return sort_fn(a, b); } +bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +{ + return format->header == __sort__hpp_header; +} + +#define MK_SORT_ENTRY_CHK(key) \ +bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \ +{ \ + struct hpp_sort_entry *hse; \ + \ + if (!perf_hpp__is_sort_entry(fmt)) \ + return false; \ + \ + hse = container_of(fmt, struct hpp_sort_entry, hpp); \ + return hse->se == &sort_ ## key ; \ +} + +MK_SORT_ENTRY_CHK(trace) +MK_SORT_ENTRY_CHK(srcline) +MK_SORT_ENTRY_CHK(srcfile) +MK_SORT_ENTRY_CHK(thread) +MK_SORT_ENTRY_CHK(comm) +MK_SORT_ENTRY_CHK(dso) +MK_SORT_ENTRY_CHK(sym) + + +static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_sort_entry *hse_a; + struct hpp_sort_entry *hse_b; + + if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) + return false; + + hse_a = container_of(a, struct hpp_sort_entry, hpp); + hse_b = container_of(b, struct hpp_sort_entry, hpp); + + return hse_a->se == hse_b->se; +} + +static void hse_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + free(hse); +} + static struct hpp_sort_entry * -__sort_dimension__alloc_hpp(struct sort_dimension *sd) +__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level) { struct hpp_sort_entry *hse; @@ -1560,40 +1553,92 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.cmp = __sort__hpp_cmp; hse->hpp.collapse = __sort__hpp_collapse; hse->hpp.sort = __sort__hpp_sort; + hse->hpp.equal = __sort__hpp_equal; + hse->hpp.free = hse_free; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); hse->hpp.elide = false; hse->hpp.len = 0; hse->hpp.user_len = 0; + hse->hpp.level = level; return hse; } -bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +static void hpp_free(struct perf_hpp_fmt *fmt) { - return format->header == __sort__hpp_header; + free(fmt); +} + +static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd, + int level) +{ + struct perf_hpp_fmt *fmt; + + fmt = memdup(hd->fmt, sizeof(*fmt)); + if (fmt) { + INIT_LIST_HEAD(&fmt->list); + INIT_LIST_HEAD(&fmt->sort_list); + fmt->free = hpp_free; + fmt->level = level; + } + + return fmt; +} + +int hist_entry__filter(struct hist_entry *he, int type, const void *arg) +{ + struct perf_hpp_fmt *fmt; + struct hpp_sort_entry *hse; + int ret = -1; + int r; + + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_filter == NULL) + continue; + + /* + * hist entry is filtered if any of sort key in the hpp list + * is applied. But it should skip non-matched filter types. + */ + r = hse->se->se_filter(he, type, arg); + if (r >= 0) { + if (ret < 0) + ret = 0; + ret |= r; + } + } + + return ret; } -static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; - perf_hpp__register_sort_field(&hse->hpp); + perf_hpp_list__register_sort_field(list, &hse->hpp); return 0; } -static int __sort_dimension__add_hpp_output(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, + struct perf_hpp_list *list) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); if (hse == NULL) return -1; - perf_hpp__column_register(&hse->hpp); + perf_hpp_list__column_register(list, &hse->hpp); return 0; } @@ -1727,6 +1772,9 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, if (hde->raw_trace) goto raw_field; + if (!he->trace_output) + he->trace_output = get_trace_output(he); + field = hde->field; namelen = strlen(field->name); str = he->trace_output; @@ -1776,6 +1824,11 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + if (b == NULL) { + update_dynamic_len(hde, a); + return 0; + } + field = hde->field; if (field->flags & FIELD_IS_DYNAMIC) { unsigned long long dyn; @@ -1790,9 +1843,6 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, } else { offset = field->offset; size = field->size; - - update_dynamic_len(hde, a); - update_dynamic_len(hde, b); } return memcmp(a->raw_data + offset, b->raw_data + offset, size); @@ -1803,8 +1853,31 @@ bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt) return fmt->cmp == __sort__hde_cmp; } +static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_dynamic_entry *hde_a; + struct hpp_dynamic_entry *hde_b; + + if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b)) + return false; + + hde_a = container_of(a, struct hpp_dynamic_entry, hpp); + hde_b = container_of(b, struct hpp_dynamic_entry, hpp); + + return hde_a->field == hde_b->field; +} + +static void hde_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_dynamic_entry *hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + free(hde); +} + static struct hpp_dynamic_entry * -__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) +__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field, + int level) { struct hpp_dynamic_entry *hde; @@ -1827,16 +1900,47 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) hde->hpp.cmp = __sort__hde_cmp; hde->hpp.collapse = __sort__hde_cmp; hde->hpp.sort = __sort__hde_cmp; + hde->hpp.equal = __sort__hde_equal; + hde->hpp.free = hde_free; INIT_LIST_HEAD(&hde->hpp.list); INIT_LIST_HEAD(&hde->hpp.sort_list); hde->hpp.elide = false; hde->hpp.len = 0; hde->hpp.user_len = 0; + hde->hpp.level = level; return hde; } +struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) +{ + struct perf_hpp_fmt *new_fmt = NULL; + + if (perf_hpp__is_sort_entry(fmt)) { + struct hpp_sort_entry *hse, *new_hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + new_hse = memdup(hse, sizeof(*hse)); + if (new_hse) + new_fmt = &new_hse->hpp; + } else if (perf_hpp__is_dynamic_entry(fmt)) { + struct hpp_dynamic_entry *hde, *new_hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + new_hde = memdup(hde, sizeof(*hde)); + if (new_hde) + new_fmt = &new_hde->hpp; + } else { + new_fmt = memdup(fmt, sizeof(*fmt)); + } + + INIT_LIST_HEAD(&new_fmt->list); + INIT_LIST_HEAD(&new_fmt->sort_list); + + return new_fmt; +} + static int parse_field_name(char *str, char **event, char **field, char **opt) { char *event_name, *field_name, *opt_name; @@ -1908,11 +2012,11 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam static int __dynamic_dimension__add(struct perf_evsel *evsel, struct format_field *field, - bool raw_trace) + bool raw_trace, int level) { struct hpp_dynamic_entry *hde; - hde = __alloc_dynamic_entry(evsel, field); + hde = __alloc_dynamic_entry(evsel, field, level); if (hde == NULL) return -ENOMEM; @@ -1922,14 +2026,14 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel, return 0; } -static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace) +static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level) { int ret; struct format_field *field; field = evsel->tp_format->format.fields; while (field) { - ret = __dynamic_dimension__add(evsel, field, raw_trace); + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); if (ret < 0) return ret; @@ -1938,7 +2042,8 @@ static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace) return 0; } -static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace) +static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace, + int level) { int ret; struct perf_evsel *evsel; @@ -1947,7 +2052,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace) if (evsel->attr.type != PERF_TYPE_TRACEPOINT) continue; - ret = add_evsel_fields(evsel, raw_trace); + ret = add_evsel_fields(evsel, raw_trace, level); if (ret < 0) return ret; } @@ -1955,7 +2060,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace) } static int add_all_matching_fields(struct perf_evlist *evlist, - char *field_name, bool raw_trace) + char *field_name, bool raw_trace, int level) { int ret = -ESRCH; struct perf_evsel *evsel; @@ -1969,14 +2074,15 @@ static int add_all_matching_fields(struct perf_evlist *evlist, if (field == NULL) continue; - ret = __dynamic_dimension__add(evsel, field, raw_trace); + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); if (ret < 0) break; } return ret; } -static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) +static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok, + int level) { char *str, *event_name, *field_name, *opt_name; struct perf_evsel *evsel; @@ -2006,12 +2112,12 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) } if (!strcmp(field_name, "trace_fields")) { - ret = add_all_dynamic_fields(evlist, raw_trace); + ret = add_all_dynamic_fields(evlist, raw_trace, level); goto out; } if (event_name == NULL) { - ret = add_all_matching_fields(evlist, field_name, raw_trace); + ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } @@ -2029,7 +2135,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) } if (!strcmp(field_name, "*")) { - ret = add_evsel_fields(evsel, raw_trace); + ret = add_evsel_fields(evsel, raw_trace, level); } else { field = pevent_find_any_field(evsel->tp_format, field_name); if (field == NULL) { @@ -2038,7 +2144,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) return -ENOENT; } - ret = __dynamic_dimension__add(evsel, field, raw_trace); + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); } out: @@ -2046,12 +2152,14 @@ out: return ret; } -static int __sort_dimension__add(struct sort_dimension *sd) +static int __sort_dimension__add(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_sort(sd) < 0) + if (__sort_dimension__add_hpp_sort(sd, list, level) < 0) return -1; if (sd->entry->se_collapse) @@ -2062,46 +2170,63 @@ static int __sort_dimension__add(struct sort_dimension *sd) return 0; } -static int __hpp_dimension__add(struct hpp_dimension *hd) +static int __hpp_dimension__add(struct hpp_dimension *hd, + struct perf_hpp_list *list, + int level) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__register_sort_field(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd, level); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp_list__register_sort_field(list, fmt); return 0; } -static int __sort_dimension__add_output(struct sort_dimension *sd) +static int __sort_dimension__add_output(struct perf_hpp_list *list, + struct sort_dimension *sd) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd) < 0) + if (__sort_dimension__add_hpp_output(sd, list) < 0) return -1; sd->taken = 1; return 0; } -static int __hpp_dimension__add_output(struct hpp_dimension *hd) +static int __hpp_dimension__add_output(struct perf_hpp_list *list, + struct hpp_dimension *hd) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__column_register(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd, 0); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp_list__column_register(list, fmt); return 0; } int hpp_dimension__add_output(unsigned col) { BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&hpp_sort_dimensions[col]); + return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } -static int sort_dimension__add(const char *tok, - struct perf_evlist *evlist __maybe_unused) +static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, + struct perf_evlist *evlist, + int level) { unsigned int i; @@ -2136,9 +2261,13 @@ static int sort_dimension__add(const char *tok, sort__has_dso = 1; } else if (sd->entry == &sort_socket) { sort__has_socket = 1; + } else if (sd->entry == &sort_thread) { + sort__has_thread = 1; + } else if (sd->entry == &sort_comm) { + sort__has_comm = 1; } - return __sort_dimension__add(sd); + return __sort_dimension__add(sd, list, level); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2147,7 +2276,7 @@ static int sort_dimension__add(const char *tok, if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add(hd); + return __hpp_dimension__add(hd, list, level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2162,7 +2291,7 @@ static int sort_dimension__add(const char *tok, if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) sort__has_sym = 1; - __sort_dimension__add(sd); + __sort_dimension__add(sd, list, level); return 0; } @@ -2178,16 +2307,60 @@ static int sort_dimension__add(const char *tok, if (sd->entry == &sort_mem_daddr_sym) sort__has_sym = 1; - __sort_dimension__add(sd); + __sort_dimension__add(sd, list, level); return 0; } - if (!add_dynamic_entry(evlist, tok)) + if (!add_dynamic_entry(evlist, tok, level)) return 0; return -ESRCH; } +static int setup_sort_list(struct perf_hpp_list *list, char *str, + struct perf_evlist *evlist) +{ + char *tmp, *tok; + int ret = 0; + int level = 0; + int next_level = 1; + bool in_group = false; + + do { + tok = str; + tmp = strpbrk(str, "{}, "); + if (tmp) { + if (in_group) + next_level = level; + else + next_level = level + 1; + + if (*tmp == '{') + in_group = true; + else if (*tmp == '}') + in_group = false; + + *tmp = '\0'; + str = tmp + 1; + } + + if (*tok) { + ret = sort_dimension__add(list, tok, evlist, level); + if (ret == -EINVAL) { + error("Invalid --sort key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --sort key: `%s'", tok); + break; + } + } + + level = next_level; + } while (tmp); + + return ret; +} + static const char *get_default_sort_order(struct perf_evlist *evlist) { const char *default_sort_orders[] = { @@ -2282,7 +2455,7 @@ static char *setup_overhead(char *keys) static int __setup_sorting(struct perf_evlist *evlist) { - char *tmp, *tok, *str; + char *str; const char *sort_keys; int ret = 0; @@ -2320,17 +2493,7 @@ static int __setup_sorting(struct perf_evlist *evlist) } } - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = sort_dimension__add(tok, evlist); - if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); - break; - } - } + ret = setup_sort_list(&perf_hpp_list, str, evlist); free(str); return ret; @@ -2341,7 +2504,7 @@ void perf_hpp__set_elide(int idx, bool elide) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2401,7 +2564,7 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2413,7 +2576,7 @@ void sort__setup_elide(FILE *output) * It makes no sense to elide all of sort entries. * Just revert them to show up again. */ - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2421,7 +2584,7 @@ void sort__setup_elide(FILE *output) return; } - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2429,7 +2592,7 @@ void sort__setup_elide(FILE *output) } } -static int output_field_add(char *tok) +static int output_field_add(struct perf_hpp_list *list, char *tok) { unsigned int i; @@ -2439,7 +2602,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2448,7 +2611,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add_output(hd); + return __hpp_dimension__add_output(list, hd); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2457,7 +2620,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -2466,12 +2629,32 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } return -ESRCH; } +static int setup_output_list(struct perf_hpp_list *list, char *str) +{ + char *tmp, *tok; + int ret = 0; + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + ret = output_field_add(list, tok); + if (ret == -EINVAL) { + error("Invalid --fields key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --fields key: `%s'", tok); + break; + } + } + + return ret; +} + static void reset_dimensions(void) { unsigned int i; @@ -2496,7 +2679,7 @@ bool is_strict_order(const char *order) static int __setup_output_field(void) { - char *tmp, *tok, *str, *strp; + char *str, *strp; int ret = -EINVAL; if (field_order == NULL) @@ -2516,17 +2699,7 @@ static int __setup_output_field(void) goto out; } - for (tok = strtok_r(strp, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(tok); - if (ret == -EINVAL) { - error("Invalid --fields key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --fields key: `%s'", tok); - break; - } - } + ret = setup_output_list(&perf_hpp_list, strp); out: free(str); @@ -2542,7 +2715,7 @@ int setup_sorting(struct perf_evlist *evlist) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); if (err < 0) return err; } @@ -2560,9 +2733,13 @@ int setup_sorting(struct perf_evlist *evlist) return err; /* copy sort keys to output fields */ - perf_hpp__setup_output_field(); + perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ - perf_hpp__append_sort_keys(); + perf_hpp__append_sort_keys(&perf_hpp_list); + + /* setup hists-specific output fields */ + if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0) + return -1; return 0; } @@ -2578,5 +2755,5 @@ void reset_output_field(void) sort_order = NULL; reset_dimensions(); - perf_hpp__reset_output_field(); + perf_hpp__reset_output_field(&perf_hpp_list); } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 687bbb124428..3f4e35998119 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -32,9 +32,12 @@ extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; extern int sort__need_collapse; +extern int sort__has_dso; extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; +extern int sort__has_thread; +extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; @@ -94,9 +97,11 @@ struct hist_entry { s32 socket; s32 cpu; u8 cpumode; + u8 depth; /* We are added by hists__add_dummy_entry. */ bool dummy; + bool leaf; char level; u8 filtered; @@ -113,18 +118,28 @@ struct hist_entry { bool init_have_children; bool unfolded; bool has_children; + bool has_no_entry; }; }; char *srcline; char *srcfile; struct symbol *parent; - struct rb_root sorted_chain; struct branch_info *branch_info; struct hists *hists; struct mem_info *mem_info; void *raw_data; u32 raw_size; void *trace_output; + struct perf_hpp_list *hpp_list; + struct hist_entry *parent_he; + union { + /* this is for hierarchical entry structure */ + struct { + struct rb_root hroot_in; + struct rb_root hroot_out; + }; /* non-leaf entries */ + struct rb_root sorted_chain; /* leaf entry has callchains */ + }; struct callchain_root callchain[0]; /* must be last member */ }; @@ -160,6 +175,17 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he) return period * 100.0 / total_period; } +static inline u64 cl_address(u64 address) +{ + /* return the cacheline of the address */ + return (address & ~(cacheline_size - 1)); +} + +static inline u64 cl_offset(u64 address) +{ + /* return the cacheline of the address */ + return (address & (cacheline_size - 1)); +} enum sort_mode { SORT_MODE__NORMAL, @@ -221,6 +247,7 @@ struct sort_entry { int64_t (*se_sort)(struct hist_entry *, struct hist_entry *); int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); + int (*se_filter)(struct hist_entry *he, int type, const void *arg); u8 se_width_idx; }; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ac03146889d..fdb71961143e 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "stat.h" #include "color.h" +#include "pmu.h" enum { CTX_BIT_USER = 1 << 0, @@ -14,6 +15,13 @@ enum { #define NUM_CTX CTX_BIT_MAX +/* + * AGGR_GLOBAL: Use CPU 0 + * AGGR_SOCKET: Use first CPU of socket + * AGGR_CORE: Use first CPU of core + * AGGR_NONE: Use matching CPU + * AGGR_THREAD: Not supported? + */ static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; @@ -28,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +void perf_stat__init_shadow_stats(void) +{ + have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); +} + static int evsel_context(struct perf_evsel *evsel) { int ctx = 0; @@ -137,9 +151,9 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(FILE *out, int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg) +static void print_stalled_cycles_frontend(int cpu, + struct perf_evsel *evsel, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -152,14 +166,16 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " frontend cycles idle "); + if (ratio) + out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + ratio); + else + out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(FILE *out, int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg) +static void print_stalled_cycles_backend(int cpu, + struct perf_evsel *evsel, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -172,14 +188,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " backend cycles idle "); + out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) +static void print_branch_misses(int cpu, + struct perf_evsel *evsel, + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -192,14 +207,13 @@ static void print_branch_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all branches "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) +static void print_l1_dcache_misses(int cpu, + struct perf_evsel *evsel, + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -212,14 +226,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-dcache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) +static void print_l1_icache_misses(int cpu, + struct perf_evsel *evsel, + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -231,15 +244,13 @@ static void print_l1_icache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-icache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) +static void print_dtlb_cache_misses(int cpu, + struct perf_evsel *evsel, + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -251,15 +262,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all dTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) +static void print_itlb_cache_misses(int cpu, + struct perf_evsel *evsel, + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -271,15 +280,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all iTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) +static void print_ll_cache_misses(int cpu, + struct perf_evsel *evsel, + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -291,15 +298,15 @@ static void print_ll_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all LL-cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr) +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out) { + void *ctxp = out->ctx; + print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -307,119 +314,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %5.2f insns per cycle ", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "insn per cycle", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); if (total && avg) { + out->new_line(ctxp); ratio = total / avg; - fprintf(out, "\n"); - if (aggr == AGGR_NONE) - fprintf(out, " "); - fprintf(out, " # %5.2f stalled cycles per insn", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "stalled cycles per insn", + ratio); + } else if (have_frontend_stalled) { + print_metric(ctxp, NULL, NULL, + "stalled cycles per insn", 0); } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + if (runtime_branches_stats[ctx][cpu].n != 0) + print_branch_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_dcache_stats[ctx][cpu].n != 0) + print_l1_dcache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_icache_stats[ctx][cpu].n != 0) + print_l1_icache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_itlb_cache_stats[ctx][cpu].n != 0) + print_itlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[ctx][cpu].n != 0) { + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_ll_cache_stats[ctx][cpu].n != 0) + print_ll_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); if (total) ratio = avg * 100 / total; - fprintf(out, " # %8.3f %% of all cache refs ", ratio); - + if (runtime_cacherefs_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.3f %%", + "of all cache refs", ratio); + else + print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(out, cpu, evsel, avg); + print_stalled_cycles_frontend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(out, cpu, evsel, avg); + print_stalled_cycles_backend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %8.3f GHz ", ratio); + print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) - fprintf(out, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); + print_metric(ctxp, NULL, + "%7.2f%%", "transactional cycles", + 100.0 * (avg / total)); + else + print_metric(ctxp, NULL, NULL, "transactional cycles", + 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) total2 = avg; if (total) - fprintf(out, - " # %5.2f%% aborted cycles ", + print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + else + print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / transaction ", ratio); - } else if (perf_stat_evsel__is(evsel, ELISION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.0f", + "cycles / transaction", ratio); + else + print_metric(ctxp, NULL, NULL, "cycles / transaction", + 0); + } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / elision ", ratio); + print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - fprintf(out, " # %8.3f CPUs utilized ", avg / ratio); + print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + avg / ratio); else - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; + char unit_buf[10]; total = avg_stats(&runtime_nsecs_stats[cpu]); @@ -429,9 +462,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, ratio *= 1000; unit = 'K'; } - - fprintf(out, " # %8.3f %c/sec ", ratio, unit); + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, NULL, 0); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index afb0c45eba34..4d9b481cf3b6 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -97,7 +97,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; struct perf_stat_evsel *ps = evsel->priv; @@ -108,7 +108,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) perf_stat_evsel_id_init(evsel); } -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->priv == NULL) @@ -117,13 +117,13 @@ int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) return 0; } -void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { zfree(&evsel->priv); } -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads) +static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) { struct perf_counts *counts; @@ -134,13 +134,13 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, return counts ? 0 : -ENOMEM; } -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) { perf_counts__delete(evsel->prev_raw_counts); evsel->prev_raw_counts = NULL; } -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) { int ncpus = perf_evsel__nr_cpus(evsel); int nthreads = thread_map__nr(evsel->threads); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 086f4e128d63..0150e786ccc7 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -68,21 +68,23 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel); extern struct stats walltime_nsecs_stats; +typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, + const char *fmt, double val); +typedef void (*new_line_t )(void *ctx); + +void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr); - -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); -void perf_evsel__free_stat_priv(struct perf_evsel *evsel); - -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads); -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); +struct perf_stat_output_ctx { + void *ctx; + print_metric_t print_metric; + new_line_t new_line; +}; -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 25671fa16618..8fb73295ec34 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -51,28 +51,11 @@ void strbuf_grow(struct strbuf *sb, size_t extra) ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); } -static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, - const void *data, size_t dlen) +void strbuf_addch(struct strbuf *sb, int c) { - if (pos + len < pos) - die("you want to use way too much memory"); - if (pos > sb->len) - die("`pos' is too far after the end of the buffer"); - if (pos + len > sb->len) - die("`pos + len' is too far after the end of the buffer"); - - if (dlen >= len) - strbuf_grow(sb, dlen - len); - memmove(sb->buf + pos + dlen, - sb->buf + pos + len, - sb->len - pos - len); - memcpy(sb->buf + pos, data, dlen); - strbuf_setlen(sb, sb->len + dlen - len); -} - -void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_splice(sb, pos, len, NULL, 0); + strbuf_grow(sb, 1); + sb->buf[sb->len++] = c; + sb->buf[sb->len] = '\0'; } void strbuf_add(struct strbuf *sb, const void *data, size_t len) @@ -82,7 +65,7 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len) strbuf_setlen(sb, sb->len + len); } -void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) +static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) { int len; va_list ap_saved; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 529f2f035249..ab9be0fbbd40 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -51,16 +51,16 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *buf, ssize_t hint); -extern void strbuf_release(struct strbuf *); -extern char *strbuf_detach(struct strbuf *, size_t *); +void strbuf_init(struct strbuf *buf, ssize_t hint); +void strbuf_release(struct strbuf *buf); +char *strbuf_detach(struct strbuf *buf, size_t *); /*----- strbuf size related -----*/ static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } -extern void strbuf_grow(struct strbuf *, size_t); +void strbuf_grow(struct strbuf *buf, size_t); static inline void strbuf_setlen(struct strbuf *sb, size_t len) { if (!sb->alloc) @@ -71,24 +71,17 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len) { } /*----- add data in your buffer -----*/ -static inline void strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, 1); - sb->buf[sb->len++] = c; - sb->buf[sb->len] = '\0'; -} - -extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); +void strbuf_addch(struct strbuf *sb, int c); -extern void strbuf_add(struct strbuf *, const void *, size_t); +void strbuf_add(struct strbuf *buf, const void *, size_t); static inline void strbuf_addstr(struct strbuf *sb, const char *s) { strbuf_add(sb, s, strlen(s)); } __attribute__((format(printf,2,3))) -extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); -extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap); +void strbuf_addf(struct strbuf *sb, const char *fmt, ...); /* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); +ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); #endif /* __PERF_STRBUF_H */ diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index 9292a5291445..946fdf2db97c 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -3,32 +3,31 @@ #include <linux/types.h> -extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); -extern void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_box(int Yslot, u64 start, u64 end, const char *type); -extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); - - -extern void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace); -extern void svg_cstate(int cpu, u64 start, u64 end, int type); -extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq); - - -extern void svg_time_grid(double min_thickness); -extern void svg_io_legenda(void); -extern void svg_legenda(void); -extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); -extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); -extern void svg_interrupt(u64 start, int row, const char *backtrace); -extern void svg_text(int Yslot, u64 start, const char *text); -extern void svg_close(void); -extern int svg_build_topology_map(char *sib_core, int sib_core_nr, - char *sib_thr, int sib_thr_nr); +void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); +void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_box(int Yslot, u64 start, u64 end, const char *type); +void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); + + +void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace); +void svg_cstate(int cpu, u64 start, u64 end, int type); +void svg_pstate(int cpu, u64 start, u64 end, u64 freq); + + +void svg_time_grid(double min_thickness); +void svg_io_legenda(void); +void svg_legenda(void); +void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); +void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); +void svg_interrupt(u64 start, int row, const char *backtrace); +void svg_text(int Yslot, u64 start, const char *text); +void svg_close(void); +int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr); extern int svg_page_width; extern u64 svg_highlight; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 562b8ebeae5b..bc229a74c6a9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,7 @@ #include <inttypes.h> #include "symbol.h" +#include "demangle-java.h" #include "machine.h" #include "vdso.h" #include <symbol/kallsyms.h> @@ -792,6 +793,7 @@ int dso__load_sym(struct dso *dso, struct map *map, uint32_t idx; GElf_Ehdr ehdr; GElf_Shdr shdr; + GElf_Shdr tshdr; Elf_Data *syms, *opddata = NULL; GElf_Sym sym; Elf_Scn *sec, *sec_strndx; @@ -831,6 +833,9 @@ int dso__load_sym(struct dso *dso, struct map *map, sec = syms_ss->symtab; shdr = syms_ss->symshdr; + if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; + if (runtime_ss->opdsec) opddata = elf_rawdata(runtime_ss->opdsec, NULL); @@ -879,12 +884,8 @@ int dso__load_sym(struct dso *dso, struct map *map, * Handle any relocation of vdso necessary because older kernels * attempted to prelink vdso to its virtual address. */ - if (dso__is_vdso(dso)) { - GElf_Shdr tshdr; - - if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) - map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; - } + if (dso__is_vdso(dso)) + map->reloc = map->start - dso->text_offset; dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* @@ -1077,6 +1078,8 @@ new_symbol: demangle_flags = DMGL_PARAMS | DMGL_ANSI; demangled = bfd_demangle(NULL, elf_name, demangle_flags); + if (demangled == NULL) + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); if (demangled != NULL) elf_name = demangled; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ab02209a7cf3..e7588dc91518 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1466,7 +1466,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) + if (is_regular_file(name) && + filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); /* @@ -1487,6 +1488,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir, name, PATH_MAX)) continue; + if (!is_regular_file(name)) + continue; + /* Name is now the name of the next image to try */ if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; @@ -1525,6 +1529,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; + if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (dso__build_id_is_kmod(dso, name, PATH_MAX)) + kmod = true; + if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod); else diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ccd1caa40e11..c8b7544d9267 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -34,8 +34,8 @@ #endif #ifdef HAVE_LIBELF_SUPPORT -extern Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, size_t *idx); +Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name, size_t *idx); #endif #ifndef DMGL_PARAMS @@ -110,7 +110,8 @@ struct symbol_conf { has_filter, show_ref_callgraph, hide_unresolved, - raw_trace; + raw_trace, + report_hierarchy; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 802bb868d446..8ae051e0ec79 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -10,6 +10,7 @@ #include <linux/err.h> #include <traceevent/event-parse.h> #include <api/fs/tracing_path.h> +#include <api/fs/fs.h> #include "trace-event.h" #include "machine.h" #include "util.h" diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 4d4210d4e13d..1b741646eed0 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -19,7 +19,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) u64 quot, rem; quot = cyc >> tc->time_shift; - rem = cyc & ((1 << tc->time_shift) - 1); + rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + ((rem * tc->time_mult) >> tc->time_shift); } diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 6adfa18cdd4e..996046a66fe5 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -41,15 +41,9 @@ static void warn_builtin(const char *warn, va_list params) /* If we are in a dlopen()ed .so write to a global variable would segfault * (ugh), so keep things static. */ static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; static void (*error_routine)(const char *err, va_list params) = error_builtin; static void (*warn_routine)(const char *err, va_list params) = warn_builtin; -void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) -{ - die_routine = routine; -} - void set_warning_routine(void (*routine)(const char *err, va_list params)) { warn_routine = routine; @@ -65,7 +59,7 @@ void die(const char *err, ...) va_list params; va_start(params, err); - die_routine(err, params); + die_builtin(err, params); va_end(params); } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ead9509835d2..b7766c577b01 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -14,6 +14,7 @@ #include <limits.h> #include <byteswap.h> #include <linux/kernel.h> +#include <linux/log2.h> #include <unistd.h> #include "callchain.h" #include "strlist.h" @@ -507,54 +508,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) return ret; } -int filename__read_str(const char *filename, char **buf, size_t *sizep) -{ - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; - - fd = open(filename, O_RDONLY); - if (fd < 0) - return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warning("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; - } else - free(bf); - - close(fd); - return err; -} - const char *get_filename_for_perf_kvm(void) { const char *filename; @@ -691,3 +644,66 @@ out: return tip; } + +bool is_regular_file(const char *file) +{ + struct stat st; + + if (stat(file, &st)) + return false; + + return S_ISREG(st.st_mode); +} + +int fetch_current_timestamp(char *buf, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra) +{ + size_t i, j, mask; + + if (!printer) + return; + + bytes_per_line = roundup_pow_of_two(bytes_per_line); + mask = bytes_per_line - 1; + + printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + for (i = 0; i < len; i++) { + if ((i & mask) == 0) { + printer(BINARY_PRINT_LINE_BEGIN, -1, extra); + printer(BINARY_PRINT_ADDR, i, extra); + } + + printer(BINARY_PRINT_NUM_DATA, data[i], extra); + + if (((i & mask) == mask) || i == len - 1) { + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_NUM_PAD, -1, extra); + + printer(BINARY_PRINT_SEP, i, extra); + for (j = i & ~mask; j <= i; j++) + printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_CHAR_PAD, i, extra); + printer(BINARY_PRINT_LINE_END, -1, extra); + } + } + printer(BINARY_PRINT_DATA_END, -1, extra); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index fe915e616f9b..8298d607c738 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -82,6 +82,8 @@ extern const char *graph_line; extern const char *graph_dotted_line; +extern const char *spaces; +extern const char *dots; extern char buildid_dir[]; /* On most systems <limits.h> would have given us this, but @@ -131,25 +133,15 @@ extern char buildid_dir[]; #define PERF_GTK_DSO "libperf-gtk.so" /* General helper functions */ -extern void usage(const char *err) NORETURN; -extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +void usage(const char *err) NORETURN; +void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); +int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); -#include "../../../include/linux/stringify.h" +void set_warning_routine(void (*routine)(const char *err, va_list params)); -#define DIE_IF(cnd) \ - do { if (cnd) \ - die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ - __stringify(cnd) "\n"); \ - } while (0) - - -extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); -extern void set_warning_routine(void (*routine)(const char *err, va_list params)); - -extern int prefixcmp(const char *str, const char *prefix); -extern void set_buildid_dir(const char *dir); +int prefixcmp(const char *str, const char *prefix); +void set_buildid_dir(const char *dir); #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 1) @@ -170,8 +162,7 @@ static inline char *gitstrchrnul(const char *s, int c) /* * Wrappers: */ -extern char *xstrdup(const char *str); -extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); +void *xrealloc(void *ptr, size_t size) __attribute__((weak)); static inline void *zalloc(size_t size) @@ -303,7 +294,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); -int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); @@ -343,5 +333,27 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) const char *perf_tip(const char *dirpath); +bool is_regular_file(const char *file); +int fetch_current_timestamp(char *buf, size_t sz); + +enum binary_printer_ops { + BINARY_PRINT_DATA_BEGIN, + BINARY_PRINT_LINE_BEGIN, + BINARY_PRINT_ADDR, + BINARY_PRINT_NUM_DATA, + BINARY_PRINT_NUM_PAD, + BINARY_PRINT_SEP, + BINARY_PRINT_CHAR_DATA, + BINARY_PRINT_CHAR_PAD, + BINARY_PRINT_LINE_END, + BINARY_PRINT_DATA_END, +}; + +typedef void (*print_binary_t)(enum binary_printer_ops, + unsigned int val, + void *extra); +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 19f15b650703..5f1a07c4b87b 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -12,18 +12,6 @@ static inline void release_pack_memory(size_t size __maybe_unused, { } -char *xstrdup(const char *str) -{ - char *ret = strdup(str); - if (!ret) { - release_pack_memory(strlen(str) + 1, -1); - ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); - } - return ret; -} - void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d2ae87d148ed..acbf7ff2ee6e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2321,7 +2321,7 @@ int has_config_tdp(unsigned int family, unsigned int model) } static void -dump_cstate_pstate_config_info(int family, int model) +dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { if (!do_nhm_platform_info) return; @@ -2584,7 +2584,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(int model) +double get_tdp(unsigned int model) { unsigned long long msr; @@ -2704,7 +2704,7 @@ void rapl_probe(unsigned int family, unsigned int model) return; } -void perf_limit_reasons_probe(int family, int model) +void perf_limit_reasons_probe(unsigned int family, unsigned int model) { if (!genuine_intel) return; diff --git a/tools/perf/config/utilities.mak b/tools/scripts/utilities.mak index c16ce833079c..c16ce833079c 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/scripts/utilities.mak diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b3281dcd4a5d..3187322eeed7 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -151,6 +151,11 @@ struct nfit_test { int (*alloc)(struct nfit_test *t); void (*setup)(struct nfit_test *t); int setup_hotplug; + struct ars_state { + struct nd_cmd_ars_status *ars_status; + unsigned long deadline; + spinlock_t lock; + } ars_state; }; static struct nfit_test *to_nfit_test(struct device *dev) @@ -218,6 +223,7 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, } #define NFIT_TEST_ARS_RECORDS 4 +#define NFIT_TEST_CLEAR_ERR_UNIT 256 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, unsigned int buf_len) @@ -228,44 +234,113 @@ static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status) + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record); nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; + nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT; return 0; } -static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd, - unsigned int buf_len) +/* + * Initialize the ars_state to return an ars_result 1 second in the future with + * a 4K error range in the middle of the requested address range. + */ +static void post_ars_status(struct ars_state *ars_state, u64 addr, u64 len) { - if (buf_len < sizeof(*nd_cmd)) + struct nd_cmd_ars_status *ars_status; + struct nd_ars_record *ars_record; + + ars_state->deadline = jiffies + 1*HZ; + ars_status = ars_state->ars_status; + ars_status->status = 0; + ars_status->out_length = sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record); + ars_status->address = addr; + ars_status->length = len; + ars_status->type = ND_ARS_PERSISTENT; + ars_status->num_records = 1; + ars_record = &ars_status->records[0]; + ars_record->handle = 0; + ars_record->err_address = addr + len / 2; + ars_record->length = SZ_4K; +} + +static int nfit_test_cmd_ars_start(struct ars_state *ars_state, + struct nd_cmd_ars_start *ars_start, unsigned int buf_len, + int *cmd_rc) +{ + if (buf_len < sizeof(*ars_start)) return -EINVAL; - nd_cmd->status = 0; + spin_lock(&ars_state->lock); + if (time_before(jiffies, ars_state->deadline)) { + ars_start->status = NFIT_ARS_START_BUSY; + *cmd_rc = -EBUSY; + } else { + ars_start->status = 0; + ars_start->scrub_time = 1; + post_ars_status(ars_state, ars_start->address, + ars_start->length); + *cmd_rc = 0; + } + spin_unlock(&ars_state->lock); return 0; } -static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd, - unsigned int buf_len) +static int nfit_test_cmd_ars_status(struct ars_state *ars_state, + struct nd_cmd_ars_status *ars_status, unsigned int buf_len, + int *cmd_rc) { - if (buf_len < sizeof(*nd_cmd)) + if (buf_len < ars_state->ars_status->out_length) return -EINVAL; - nd_cmd->out_length = sizeof(struct nd_cmd_ars_status); - /* TODO: emit error records */ - nd_cmd->num_records = 0; - nd_cmd->address = 0; - nd_cmd->length = -1ULL; - nd_cmd->status = 0; + spin_lock(&ars_state->lock); + if (time_before(jiffies, ars_state->deadline)) { + memset(ars_status, 0, buf_len); + ars_status->status = NFIT_ARS_STATUS_BUSY; + ars_status->out_length = sizeof(*ars_status); + *cmd_rc = -EBUSY; + } else { + memcpy(ars_status, ars_state->ars_status, + ars_state->ars_status->out_length); + *cmd_rc = 0; + } + spin_unlock(&ars_state->lock); + return 0; +} +static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, + unsigned int buf_len, int *cmd_rc) +{ + const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1; + if (buf_len < sizeof(*clear_err)) + return -EINVAL; + + if ((clear_err->address & mask) || (clear_err->length & mask)) + return -EINVAL; + + /* + * Report 'all clear' success for all commands even though a new + * scrub will find errors again. This is enough to have the + * error removed from the 'badblocks' tracking in the pmem + * driver. + */ + clear_err->status = 0; + clear_err->cleared = clear_err->length; + *cmd_rc = 0; return 0; } static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); - int i, rc = 0; + int i, rc = 0, __cmd_rc; + + if (!cmd_rc) + cmd_rc = &__cmd_rc; + *cmd_rc = 0; if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); @@ -297,6 +372,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, return -ENOTTY; } } else { + struct ars_state *ars_state = &t->ars_state; + if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) return -ENOTTY; @@ -305,10 +382,15 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_ars_cap(buf, buf_len); break; case ND_CMD_ARS_START: - rc = nfit_test_cmd_ars_start(buf, buf_len); + rc = nfit_test_cmd_ars_start(ars_state, buf, buf_len, + cmd_rc); break; case ND_CMD_ARS_STATUS: - rc = nfit_test_cmd_ars_status(buf, buf_len); + rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len, + cmd_rc); + break; + case ND_CMD_CLEAR_ERROR: + rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc); break; default: return -ENOTTY; @@ -424,11 +506,25 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) return NULL; } +static int ars_state_init(struct device *dev, struct ars_state *ars_state) +{ + ars_state->ars_status = devm_kzalloc(dev, + sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record) * NFIT_TEST_ARS_RECORDS, + GFP_KERNEL); + if (!ars_state->ars_status) + return -ENOMEM; + spin_lock_init(&ars_state->lock); + return 0; +} + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR + + offsetof(struct acpi_nfit_control_region, + window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; @@ -471,14 +567,14 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } - return 0; + return ars_state_init(&t->pdev.dev, &t->ars_state); } static int nfit_test1_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) + sizeof(struct acpi_nfit_memory_map) - + sizeof(struct acpi_nfit_control_region); + + offsetof(struct acpi_nfit_control_region, window_size); t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); if (!t->nfit_buf) @@ -489,12 +585,11 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[0]) return -ENOMEM; - return 0; + return ars_state_init(&t->pdev.dev, &t->ars_state); } static void nfit_test0_setup(struct nfit_test *t) { - struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; struct acpi_nfit_memory_map *memdev; void *nfit_buf = t->nfit_buf; @@ -611,7 +706,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 0; memdev->region_id = 0; memdev->range_index = 0+1; - memdev->region_index = 0+1; + memdev->region_index = 4+1; memdev->region_size = SPA0_SIZE/2; memdev->region_offset = t->spa_set_dma[0]; memdev->address = 0; @@ -626,7 +721,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 1; memdev->region_id = 0; memdev->range_index = 0+1; - memdev->region_index = 1+1; + memdev->region_index = 5+1; memdev->region_size = SPA0_SIZE/2; memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2; memdev->address = 0; @@ -641,7 +736,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 0; memdev->region_id = 1; memdev->range_index = 1+1; - memdev->region_index = 0+1; + memdev->region_index = 4+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1]; memdev->address = SPA0_SIZE/2; @@ -656,7 +751,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 1; memdev->region_id = 1; memdev->range_index = 1+1; - memdev->region_index = 1+1; + memdev->region_index = 5+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4; memdev->address = SPA0_SIZE/2; @@ -671,7 +766,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 2; memdev->region_id = 0; memdev->range_index = 1+1; - memdev->region_index = 2+1; + memdev->region_index = 6+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4; memdev->address = SPA0_SIZE/2; @@ -686,7 +781,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 3; memdev->region_id = 0; memdev->range_index = 1+1; - memdev->region_index = 3+1; + memdev->region_index = 7+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4; memdev->address = SPA0_SIZE/2; @@ -814,7 +909,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_ways = 1; offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; - /* dcr-descriptor0 */ + /* dcr-descriptor0: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -823,6 +918,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[0]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -830,7 +926,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_offset = 8; dcr->status_size = 4; - /* dcr-descriptor1 */ + /* dcr-descriptor1: blk */ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -839,6 +935,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[1]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -846,7 +943,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_offset = 8; dcr->status_size = 4; - /* dcr-descriptor2 */ + /* dcr-descriptor2: blk */ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -855,6 +952,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[2]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -862,7 +960,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_offset = 8; dcr->status_size = 4; - /* dcr-descriptor3 */ + /* dcr-descriptor3: blk */ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -871,6 +969,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[3]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -879,6 +978,63 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_size = 4; offset = offset + sizeof(struct acpi_nfit_control_region) * 4; + /* dcr-descriptor0: pmem */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 4+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[0]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + /* dcr-descriptor1: pmem */ + dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, + window_size); + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 5+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[1]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + /* dcr-descriptor2: pmem */ + dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, + window_size) * 2; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 6+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[2]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + /* dcr-descriptor3: pmem */ + dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, + window_size) * 3; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 7+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[3]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + offset = offset + offsetof(struct acpi_nfit_control_region, + window_size) * 4; /* bdw0 (spa/dcr0, dimm0) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; @@ -958,15 +1114,16 @@ static void nfit_test0_setup(struct nfit_test *t) if (t->setup_hotplug) { offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; - /* dcr-descriptor4 */ + /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); - dcr->region_index = 4+1; + dcr->region_index = 8+1; dcr->vendor_id = 0xabcd; dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[4]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -975,11 +1132,26 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_size = 4; offset = offset + sizeof(struct acpi_nfit_control_region); + /* dcr-descriptor4: pmem */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 9+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[4]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + offset = offset + offsetof(struct acpi_nfit_control_region, + window_size); /* bdw4 (spa/dcr4, dimm4) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; bdw->header.length = sizeof(struct acpi_nfit_data_region); - bdw->region_index = 4+1; + bdw->region_index = 8+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; @@ -1027,7 +1199,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 4; memdev->region_id = 0; memdev->range_index = 10+1; - memdev->region_index = 4+1; + memdev->region_index = 8+1; memdev->region_size = 0; memdev->region_offset = 0; memdev->address = 0; @@ -1043,14 +1215,14 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 4; memdev->region_id = 0; memdev->range_index = 11+1; - memdev->region_index = 4+1; + memdev->region_index = 9+1; memdev->region_size = SPA0_SIZE; memdev->region_offset = t->spa_set_dma[2]; memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; - /* mem-region16 (spa/dcr4, dimm4) */ + /* mem-region16 (spa/bdw4, dimm4) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1059,7 +1231,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 4; memdev->region_id = 0; memdev->range_index = 12+1; - memdev->region_index = 4+1; + memdev->region_index = 8+1; memdev->region_size = 0; memdev->region_offset = 0; memdev->address = 0; @@ -1076,6 +1248,8 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_address[0] = t->flush_dma[4]; } + post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -1083,8 +1257,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - nd_desc = &acpi_desc->nd_desc; - nd_desc->ndctl = nfit_test_ctl; + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1094,7 +1267,6 @@ static void nfit_test1_setup(struct nfit_test *t) struct acpi_nfit_memory_map *memdev; struct acpi_nfit_control_region *dcr; struct acpi_nfit_system_address *spa; - struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; offset = 0; @@ -1130,26 +1302,23 @@ static void nfit_test1_setup(struct nfit_test *t) /* dcr-descriptor0 */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); dcr->region_index = 0+1; dcr->vendor_id = 0xabcd; dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~0; - dcr->code = 0x201; + dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; - dcr->window_size = 0; - dcr->command_offset = 0; - dcr->command_size = 0; - dcr->status_offset = 0; - dcr->status_size = 0; + + post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - nd_desc = &acpi_desc->nd_desc; - nd_desc->ndctl = nfit_test_ctl; + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -1232,26 +1401,16 @@ static int nfit_test_probe(struct platform_device *pdev) nfit_test->setup(nfit_test); acpi_desc = &nfit_test->acpi_desc; - acpi_desc->dev = &pdev->dev; + acpi_nfit_desc_init(acpi_desc, &pdev->dev); acpi_desc->nfit = nfit_test->nfit_buf; acpi_desc->blk_do_io = nfit_test_blk_do_io; nd_desc = &acpi_desc->nd_desc; - nd_desc->attr_groups = acpi_nfit_attribute_groups; + nd_desc->provider_name = NULL; + nd_desc->ndctl = nfit_test_ctl; acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); if (!acpi_desc->nvdimm_bus) return -ENXIO; - INIT_LIST_HEAD(&acpi_desc->spa_maps); - INIT_LIST_HEAD(&acpi_desc->spas); - INIT_LIST_HEAD(&acpi_desc->dcrs); - INIT_LIST_HEAD(&acpi_desc->bdws); - INIT_LIST_HEAD(&acpi_desc->idts); - INIT_LIST_HEAD(&acpi_desc->flushes); - INIT_LIST_HEAD(&acpi_desc->memdevs); - INIT_LIST_HEAD(&acpi_desc->dimms); - mutex_init(&acpi_desc->spa_map_mutex); - mutex_init(&acpi_desc->init_mutex); - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); if (rc) { nvdimm_bus_unregister(acpi_desc->nvdimm_bus); diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore new file mode 100644 index 000000000000..11d888ca6a92 --- /dev/null +++ b/tools/testing/radix-tree/.gitignore @@ -0,0 +1,2 @@ +main +radix-tree.c diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile new file mode 100644 index 000000000000..604212db9d4b --- /dev/null +++ b/tools/testing/radix-tree/Makefile @@ -0,0 +1,19 @@ + +CFLAGS += -I. -g -Wall -D_LGPL_SOURCE +LDFLAGS += -lpthread -lurcu +TARGETS = main +OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ + regression1.o regression2.o regression3.o + +targets: $(TARGETS) + +main: $(OFILES) + $(CC) $(CFLAGS) $(LDFLAGS) $(OFILES) -o main + +clean: + $(RM) -f $(TARGETS) *.o radix-tree.c + +$(OFILES): *.h */*.h + +radix-tree.c: ../../../lib/radix-tree.c + sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ diff --git a/tools/testing/radix-tree/find_next_bit.c b/tools/testing/radix-tree/find_next_bit.c new file mode 100644 index 000000000000..d1c2178bb2d4 --- /dev/null +++ b/tools/testing/radix-tree/find_next_bit.c @@ -0,0 +1,57 @@ +/* find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/bitops.h> + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c new file mode 100644 index 000000000000..154823737b20 --- /dev/null +++ b/tools/testing/radix-tree/linux.c @@ -0,0 +1,60 @@ +#include <stdlib.h> +#include <string.h> +#include <malloc.h> +#include <unistd.h> +#include <assert.h> + +#include <linux/mempool.h> +#include <linux/slab.h> +#include <urcu/uatomic.h> + +int nr_allocated; + +void *mempool_alloc(mempool_t *pool, int gfp_mask) +{ + return pool->alloc(gfp_mask, pool->data); +} + +void mempool_free(void *element, mempool_t *pool) +{ + pool->free(element, pool->data); +} + +mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data) +{ + mempool_t *ret = malloc(sizeof(*ret)); + + ret->alloc = alloc_fn; + ret->free = free_fn; + ret->data = pool_data; + return ret; +} + +void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) +{ + void *ret = malloc(cachep->size); + if (cachep->ctor) + cachep->ctor(ret); + uatomic_inc(&nr_allocated); + return ret; +} + +void kmem_cache_free(struct kmem_cache *cachep, void *objp) +{ + assert(objp); + uatomic_dec(&nr_allocated); + memset(objp, 0, cachep->size); + free(objp); +} + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void *)) +{ + struct kmem_cache *ret = malloc(sizeof(*ret)); + + ret->size = size; + ret->ctor = ctor; + return ret; +} diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h new file mode 100644 index 000000000000..71d58427ab60 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops.h @@ -0,0 +1,150 @@ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#include <linux/types.h> + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/__ffs.h b/tools/testing/radix-tree/linux/bitops/__ffs.h new file mode 100644 index 000000000000..9a3274aecf83 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/__ffs.h @@ -0,0 +1,43 @@ +#ifndef _ASM_GENERIC_BITOPS___FFS_H_ +#define _ASM_GENERIC_BITOPS___FFS_H_ + +#include <asm/types.h> + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#endif /* _ASM_GENERIC_BITOPS___FFS_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/ffs.h b/tools/testing/radix-tree/linux/bitops/ffs.h new file mode 100644 index 000000000000..fbbb43af7dc0 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/ffs.h @@ -0,0 +1,41 @@ +#ifndef _ASM_GENERIC_BITOPS_FFS_H_ +#define _ASM_GENERIC_BITOPS_FFS_H_ + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static inline int ffs(int x) +{ + int r = 1; + + if (!x) + return 0; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + +#endif /* _ASM_GENERIC_BITOPS_FFS_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/ffz.h b/tools/testing/radix-tree/linux/bitops/ffz.h new file mode 100644 index 000000000000..6744bd4cdf46 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/ffz.h @@ -0,0 +1,12 @@ +#ifndef _ASM_GENERIC_BITOPS_FFZ_H_ +#define _ASM_GENERIC_BITOPS_FFZ_H_ + +/* + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +#define ffz(x) __ffs(~(x)) + +#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/find.h b/tools/testing/radix-tree/linux/bitops/find.h new file mode 100644 index 000000000000..72a51e5a12ef --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/find.h @@ -0,0 +1,13 @@ +#ifndef _ASM_GENERIC_BITOPS_FIND_H_ +#define _ASM_GENERIC_BITOPS_FIND_H_ + +extern unsigned long find_next_bit(const unsigned long *addr, unsigned long + size, unsigned long offset); + +extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned + long size, unsigned long offset); + +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) + +#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/fls.h b/tools/testing/radix-tree/linux/bitops/fls.h new file mode 100644 index 000000000000..850859bc5069 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/fls.h @@ -0,0 +1,41 @@ +#ifndef _ASM_GENERIC_BITOPS_FLS_H_ +#define _ASM_GENERIC_BITOPS_FLS_H_ + +/** + * fls - find last (most-significant) bit set + * @x: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/fls64.h b/tools/testing/radix-tree/linux/bitops/fls64.h new file mode 100644 index 000000000000..1b6b17ce2428 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/fls64.h @@ -0,0 +1,14 @@ +#ifndef _ASM_GENERIC_BITOPS_FLS64_H_ +#define _ASM_GENERIC_BITOPS_FLS64_H_ + +#include <asm/types.h> + +static inline int fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +} + +#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/hweight.h b/tools/testing/radix-tree/linux/bitops/hweight.h new file mode 100644 index 000000000000..fbbc383771da --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/hweight.h @@ -0,0 +1,11 @@ +#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_HWEIGHT_H_ + +#include <asm/types.h> + +extern unsigned int hweight32(unsigned int w); +extern unsigned int hweight16(unsigned int w); +extern unsigned int hweight8(unsigned int w); +extern unsigned long hweight64(__u64 w); + +#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/le.h b/tools/testing/radix-tree/linux/bitops/le.h new file mode 100644 index 000000000000..b9c7e5d2d2ad --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/le.h @@ -0,0 +1,53 @@ +#ifndef _ASM_GENERIC_BITOPS_LE_H_ +#define _ASM_GENERIC_BITOPS_LE_H_ + +#include <asm/types.h> +#include <asm/byteorder.h> + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) + +#if defined(__LITTLE_ENDIAN) + +#define generic_test_le_bit(nr, addr) test_bit(nr, addr) +#define generic___set_le_bit(nr, addr) __set_bit(nr, addr) +#define generic___clear_le_bit(nr, addr) __clear_bit(nr, addr) + +#define generic_test_and_set_le_bit(nr, addr) test_and_set_bit(nr, addr) +#define generic_test_and_clear_le_bit(nr, addr) test_and_clear_bit(nr, addr) + +#define generic___test_and_set_le_bit(nr, addr) __test_and_set_bit(nr, addr) +#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr) + +#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset) + +#elif defined(__BIG_ENDIAN) + +#define generic_test_le_bit(nr, addr) \ + test_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic___set_le_bit(nr, addr) \ + __set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic___clear_le_bit(nr, addr) \ + __clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define generic_test_and_set_le_bit(nr, addr) \ + test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic_test_and_clear_le_bit(nr, addr) \ + test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define generic___test_and_set_le_bit(nr, addr) \ + __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic___test_and_clear_le_bit(nr, addr) \ + __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +#else +#error "Please fix <asm/byteorder.h>" +#endif + +#define generic_find_first_zero_le_bit(addr, size) \ + generic_find_next_zero_le_bit((addr), (size), 0) + +#endif /* _ASM_GENERIC_BITOPS_LE_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h new file mode 100644 index 000000000000..46a825cf2ae1 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/non-atomic.h @@ -0,0 +1,111 @@ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#include <asm/types.h> + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h new file mode 100644 index 000000000000..ccbe444977df --- /dev/null +++ b/tools/testing/radix-tree/linux/bug.h @@ -0,0 +1 @@ +#define WARN_ON_ONCE(x) assert(x) diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h new file mode 100644 index 000000000000..60a40459f269 --- /dev/null +++ b/tools/testing/radix-tree/linux/cpu.h @@ -0,0 +1,34 @@ + +#define hotcpu_notifier(a, b) + +#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ +#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ +#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ +#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ +#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ +#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ +#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ +#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug + * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ +#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached + * idle loop. */ +#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, + * perhaps due to preemption. */ +#define CPU_TASKS_FROZEN 0x0010 + +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) +#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) diff --git a/tools/testing/radix-tree/linux/export.h b/tools/testing/radix-tree/linux/export.h new file mode 100644 index 000000000000..b6afd131998d --- /dev/null +++ b/tools/testing/radix-tree/linux/export.h @@ -0,0 +1,2 @@ + +#define EXPORT_SYMBOL(sym) diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h new file mode 100644 index 000000000000..0e37f7a760eb --- /dev/null +++ b/tools/testing/radix-tree/linux/gfp.h @@ -0,0 +1,10 @@ +#ifndef _GFP_H +#define _GFP_H + +#define __GFP_BITS_SHIFT 22 +#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) +#define __GFP_WAIT 1 +#define __GFP_ACCOUNT 0 +#define __GFP_NOWARN 0 + +#endif diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h new file mode 100644 index 000000000000..ae013b0160ac --- /dev/null +++ b/tools/testing/radix-tree/linux/kernel.h @@ -0,0 +1,35 @@ +#ifndef _KERNEL_H +#define _KERNEL_H + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stddef.h> +#include <limits.h> + +#ifndef NULL +#define NULL 0 +#endif + +#define BUG_ON(expr) assert(!(expr)) +#define __init +#define __must_check +#define panic(expr) +#define printk printf +#define __force +#define likely(c) (c) +#define unlikely(c) (c) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type, member) );}) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline int in_interrupt(void) +{ + return 0; +} +#endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/radix-tree/linux/kmemleak.h new file mode 100644 index 000000000000..155f112786c4 --- /dev/null +++ b/tools/testing/radix-tree/linux/kmemleak.h @@ -0,0 +1 @@ +static inline void kmemleak_update_trace(const void *ptr) { } diff --git a/tools/testing/radix-tree/linux/mempool.h b/tools/testing/radix-tree/linux/mempool.h new file mode 100644 index 000000000000..6a2dc55b41d6 --- /dev/null +++ b/tools/testing/radix-tree/linux/mempool.h @@ -0,0 +1,16 @@ + +#include <linux/slab.h> + +typedef void *(mempool_alloc_t)(int gfp_mask, void *pool_data); +typedef void (mempool_free_t)(void *element, void *pool_data); + +typedef struct { + mempool_alloc_t *alloc; + mempool_free_t *free; + void *data; +} mempool_t; + +void *mempool_alloc(mempool_t *pool, int gfp_mask); +void mempool_free(void *element, mempool_t *pool); +mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data); diff --git a/tools/testing/radix-tree/linux/notifier.h b/tools/testing/radix-tree/linux/notifier.h new file mode 100644 index 000000000000..70e4797d5a46 --- /dev/null +++ b/tools/testing/radix-tree/linux/notifier.h @@ -0,0 +1,8 @@ +#ifndef _NOTIFIER_H +#define _NOTIFIER_H + +struct notifier_block; + +#define NOTIFY_OK 0x0001 /* Suits me */ + +#endif diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h new file mode 100644 index 000000000000..5837f1d56f17 --- /dev/null +++ b/tools/testing/radix-tree/linux/percpu.h @@ -0,0 +1,7 @@ + +#define DEFINE_PER_CPU(type, val) type val + +#define __get_cpu_var(var) var +#define this_cpu_ptr(var) var +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu)) diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h new file mode 100644 index 000000000000..6210672e3baa --- /dev/null +++ b/tools/testing/radix-tree/linux/preempt.h @@ -0,0 +1,4 @@ +/* */ + +#define preempt_disable() do { } while (0) +#define preempt_enable() do { } while (0) diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h new file mode 100644 index 000000000000..ce694ddd4aea --- /dev/null +++ b/tools/testing/radix-tree/linux/radix-tree.h @@ -0,0 +1 @@ +#include "../../../../include/linux/radix-tree.h" diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h new file mode 100644 index 000000000000..f7129ea2a899 --- /dev/null +++ b/tools/testing/radix-tree/linux/rcupdate.h @@ -0,0 +1,9 @@ +#ifndef _RCUPDATE_H +#define _RCUPDATE_H + +#include <urcu.h> + +#define rcu_dereference_raw(p) rcu_dereference(p) +#define rcu_dereference_protected(p, cond) rcu_dereference(p) + +#endif diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h new file mode 100644 index 000000000000..57282506c21d --- /dev/null +++ b/tools/testing/radix-tree/linux/slab.h @@ -0,0 +1,28 @@ +#ifndef SLAB_H +#define SLAB_H + +#include <linux/types.h> + +#define GFP_KERNEL 1 +#define SLAB_HWCACHE_ALIGN 1 +#define SLAB_PANIC 2 +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ + +static inline int gfpflags_allow_blocking(gfp_t mask) +{ + return 1; +} + +struct kmem_cache { + int size; + void (*ctor)(void *); +}; + +void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); +void kmem_cache_free(struct kmem_cache *cachep, void *objp); + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void *)); + +#endif /* SLAB_H */ diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h new file mode 100644 index 000000000000..72a9d85f6c76 --- /dev/null +++ b/tools/testing/radix-tree/linux/types.h @@ -0,0 +1,28 @@ +#ifndef _TYPES_H +#define _TYPES_H + +#define __rcu +#define __read_mostly + +#define BITS_PER_LONG (sizeof(long) * 8) + +struct list_head { + struct list_head *next, *prev; +}; + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +typedef struct { + unsigned int x; +} spinlock_t; + +#define uninitialized_var(x) x = x + +typedef unsigned gfp_t; +#include <linux/gfp.h> + +#endif diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c new file mode 100644 index 000000000000..0e83cad27a9f --- /dev/null +++ b/tools/testing/radix-tree/main.c @@ -0,0 +1,272 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <assert.h> + +#include <linux/slab.h> +#include <linux/radix-tree.h> + +#include "test.h" +#include "regression.h" + +void __gang_check(unsigned long middle, long down, long up, int chunk, int hop) +{ + long idx; + RADIX_TREE(tree, GFP_KERNEL); + + middle = 1 << 30; + + for (idx = -down; idx < up; idx++) + item_insert(&tree, middle + idx); + + item_check_absent(&tree, middle - down - 1); + for (idx = -down; idx < up; idx++) + item_check_present(&tree, middle + idx); + item_check_absent(&tree, middle + up); + + item_gang_check_present(&tree, middle - down, + up + down, chunk, hop); + item_full_scan(&tree, middle - down, down + up, chunk); + item_kill_tree(&tree); +} + +void gang_check(void) +{ + __gang_check(1 << 30, 128, 128, 35, 2); + __gang_check(1 << 31, 128, 128, 32, 32); + __gang_check(1 << 31, 128, 128, 32, 100); + __gang_check(1 << 31, 128, 128, 17, 7); + __gang_check(0xffff0000, 0, 65536, 17, 7); + __gang_check(0xfffffffe, 1, 1, 17, 7); +} + +void __big_gang_check(void) +{ + unsigned long start; + int wrapped = 0; + + start = 0; + do { + unsigned long old_start; + +// printf("0x%08lx\n", start); + __gang_check(start, rand() % 113 + 1, rand() % 71, + rand() % 157, rand() % 91 + 1); + old_start = start; + start += rand() % 1000000; + start %= 1ULL << 33; + if (start < old_start) + wrapped = 1; + } while (!wrapped); +} + +void big_gang_check(void) +{ + int i; + + for (i = 0; i < 1000; i++) { + __big_gang_check(); + srand(time(0)); + printf("%d ", i); + fflush(stdout); + } +} + +void add_and_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 44); + item_check_present(&tree, 44); + item_check_absent(&tree, 43); + item_kill_tree(&tree); +} + +void dynamic_height_check(void) +{ + int i; + RADIX_TREE(tree, GFP_KERNEL); + tree_verify_min_height(&tree, 0); + + item_insert(&tree, 42); + tree_verify_min_height(&tree, 42); + + item_insert(&tree, 1000000); + tree_verify_min_height(&tree, 1000000); + + assert(item_delete(&tree, 1000000)); + tree_verify_min_height(&tree, 42); + + assert(item_delete(&tree, 42)); + tree_verify_min_height(&tree, 0); + + for (i = 0; i < 1000; i++) { + item_insert(&tree, i); + tree_verify_min_height(&tree, i); + } + + i--; + for (;;) { + assert(item_delete(&tree, i)); + if (i == 0) { + tree_verify_min_height(&tree, 0); + break; + } + i--; + tree_verify_min_height(&tree, i); + } + + item_kill_tree(&tree); +} + +void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsigned long end, unsigned long *idx, int count, int fromtag, int totag) +{ + int i; + + for (i = 0; i < count; i++) { +/* if (i % 1000 == 0) + putchar('.'); */ + if (idx[i] < start || idx[i] > end) { + if (item_tag_get(tree, idx[i], totag)) { + printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag)); + } + assert(!item_tag_get(tree, idx[i], totag)); + continue; + } + if (item_tag_get(tree, idx[i], fromtag) ^ + item_tag_get(tree, idx[i], totag)) { + printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag)); + } + assert(!(item_tag_get(tree, idx[i], fromtag) ^ + item_tag_get(tree, idx[i], totag))); + } +} + +#define ITEMS 50000 + +void copy_tag_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + unsigned long idx[ITEMS]; + unsigned long start, end, count = 0, tagged, cur, tmp; + int i; + +// printf("generating radix tree indices...\n"); + start = rand(); + end = rand(); + if (start > end && (rand() % 10)) { + cur = start; + start = end; + end = cur; + } + /* Specifically create items around the start and the end of the range + * with high probability to check for off by one errors */ + cur = rand(); + if (cur & 1) { + item_insert(&tree, start); + if (cur & 2) { + if (start <= end) + count++; + item_tag_set(&tree, start, 0); + } + } + if (cur & 4) { + item_insert(&tree, start-1); + if (cur & 8) + item_tag_set(&tree, start-1, 0); + } + if (cur & 16) { + item_insert(&tree, end); + if (cur & 32) { + if (start <= end) + count++; + item_tag_set(&tree, end, 0); + } + } + if (cur & 64) { + item_insert(&tree, end+1); + if (cur & 128) + item_tag_set(&tree, end+1, 0); + } + + for (i = 0; i < ITEMS; i++) { + do { + idx[i] = rand(); + } while (item_lookup(&tree, idx[i])); + + item_insert(&tree, idx[i]); + if (rand() & 1) { + item_tag_set(&tree, idx[i], 0); + if (idx[i] >= start && idx[i] <= end) + count++; + } +/* if (i % 1000 == 0) + putchar('.'); */ + } + +// printf("\ncopying tags...\n"); + cur = start; + tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, ITEMS, 0, 1); + +// printf("checking copied tags\n"); + assert(tagged == count); + check_copied_tags(&tree, start, end, idx, ITEMS, 0, 1); + + /* Copy tags in several rounds */ +// printf("\ncopying tags...\n"); + cur = start; + do { + tmp = rand() % (count/10+2); + tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, tmp, 0, 2); + } while (tmp == tagged); + +// printf("%lu %lu %lu\n", tagged, tmp, count); +// printf("checking copied tags\n"); + check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2); + assert(tagged < tmp); + verify_tag_consistency(&tree, 0); + verify_tag_consistency(&tree, 1); + verify_tag_consistency(&tree, 2); +// printf("\n"); + item_kill_tree(&tree); +} + +static void single_thread_tests(void) +{ + int i; + + tag_check(); + printf("after tag_check: %d allocated\n", nr_allocated); + gang_check(); + printf("after gang_check: %d allocated\n", nr_allocated); + add_and_check(); + printf("after add_and_check: %d allocated\n", nr_allocated); + dynamic_height_check(); + printf("after dynamic_height_check: %d allocated\n", nr_allocated); + big_gang_check(); + printf("after big_gang_check: %d allocated\n", nr_allocated); + for (i = 0; i < 2000; i++) { + copy_tag_check(); + printf("%d ", i); + fflush(stdout); + } + printf("after copy_tag_check: %d allocated\n", nr_allocated); +} + +int main(void) +{ + rcu_register_thread(); + radix_tree_init(); + + regression1_test(); + regression2_test(); + regression3_test(); + single_thread_tests(); + + sleep(1); + printf("after sleep(1): %d allocated\n", nr_allocated); + rcu_unregister_thread(); + + exit(0); +} diff --git a/tools/testing/radix-tree/rcupdate.c b/tools/testing/radix-tree/rcupdate.c new file mode 100644 index 000000000000..31a2d14225d6 --- /dev/null +++ b/tools/testing/radix-tree/rcupdate.c @@ -0,0 +1,86 @@ +#include <linux/rcupdate.h> +#include <pthread.h> +#include <stdio.h> +#include <assert.h> + +static pthread_mutex_t rculock = PTHREAD_MUTEX_INITIALIZER; +static struct rcu_head *rcuhead_global = NULL; +static __thread int nr_rcuhead = 0; +static __thread struct rcu_head *rcuhead = NULL; +static __thread struct rcu_head *rcutail = NULL; + +static pthread_cond_t rcu_worker_cond = PTHREAD_COND_INITIALIZER; + +/* switch to urcu implementation when it is merged. */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)) +{ + head->func = func; + head->next = rcuhead; + rcuhead = head; + if (!rcutail) + rcutail = head; + nr_rcuhead++; + if (nr_rcuhead >= 1000) { + int signal = 0; + + pthread_mutex_lock(&rculock); + if (!rcuhead_global) + signal = 1; + rcutail->next = rcuhead_global; + rcuhead_global = head; + pthread_mutex_unlock(&rculock); + + nr_rcuhead = 0; + rcuhead = NULL; + rcutail = NULL; + + if (signal) { + pthread_cond_signal(&rcu_worker_cond); + } + } +} + +static void *rcu_worker(void *arg) +{ + struct rcu_head *r; + + rcupdate_thread_init(); + + while (1) { + pthread_mutex_lock(&rculock); + while (!rcuhead_global) { + pthread_cond_wait(&rcu_worker_cond, &rculock); + } + r = rcuhead_global; + rcuhead_global = NULL; + + pthread_mutex_unlock(&rculock); + + synchronize_rcu(); + + while (r) { + struct rcu_head *tmp = r->next; + r->func(r); + r = tmp; + } + } + + rcupdate_thread_exit(); + + return NULL; +} + +static pthread_t worker_thread; +void rcupdate_init(void) +{ + pthread_create(&worker_thread, NULL, rcu_worker, NULL); +} + +void rcupdate_thread_init(void) +{ + rcu_register_thread(); +} +void rcupdate_thread_exit(void) +{ + rcu_unregister_thread(); +} diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h new file mode 100644 index 000000000000..e018c4816688 --- /dev/null +++ b/tools/testing/radix-tree/regression.h @@ -0,0 +1,8 @@ +#ifndef __REGRESSION_H__ +#define __REGRESSION_H__ + +void regression1_test(void); +void regression2_test(void); +void regression3_test(void); + +#endif diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c new file mode 100644 index 000000000000..2d03a63bb79c --- /dev/null +++ b/tools/testing/radix-tree/regression1.c @@ -0,0 +1,220 @@ +/* + * Regression1 + * Description: + * Salman Qazi describes the following radix-tree bug: + * + * In the following case, we get can get a deadlock: + * + * 0. The radix tree contains two items, one has the index 0. + * 1. The reader (in this case find_get_pages) takes the rcu_read_lock. + * 2. The reader acquires slot(s) for item(s) including the index 0 item. + * 3. The non-zero index item is deleted, and as a consequence the other item + * is moved to the root of the tree. The place where it used to be is queued + * for deletion after the readers finish. + * 3b. The zero item is deleted, removing it from the direct slot, it remains in + * the rcu-delayed indirect node. + * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref + * count + * 5. The reader looks at it again, hoping that the item will either be freed + * or the ref count will increase. This never happens, as the slot it is + * looking at will never be updated. Also, this slot can never be reclaimed + * because the reader is holding rcu_read_lock and is in an infinite loop. + * + * The fix is to re-use the same "indirect" pointer case that requires a slot + * lookup retry into a general "retry the lookup" bit. + * + * Running: + * This test should run to completion in a few seconds. The above bug would + * cause it to hang indefinitely. + * + * Upstream commit: + * Not yet + */ +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <linux/rcupdate.h> +#include <stdlib.h> +#include <pthread.h> +#include <stdio.h> +#include <assert.h> + +#include "regression.h" + +static RADIX_TREE(mt_tree, GFP_KERNEL); +static pthread_mutex_t mt_lock; + +struct page { + pthread_mutex_t lock; + struct rcu_head rcu; + int count; + unsigned long index; +}; + +static struct page *page_alloc(void) +{ + struct page *p; + p = malloc(sizeof(struct page)); + p->count = 1; + p->index = 1; + pthread_mutex_init(&p->lock, NULL); + + return p; +} + +static void page_rcu_free(struct rcu_head *rcu) +{ + struct page *p = container_of(rcu, struct page, rcu); + assert(!p->count); + pthread_mutex_destroy(&p->lock); + free(p); +} + +static void page_free(struct page *p) +{ + call_rcu(&p->rcu, page_rcu_free); +} + +static unsigned find_get_pages(unsigned long start, + unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + unsigned int ret; + unsigned int nr_found; + + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mt_tree, + (void ***)pages, NULL, start, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + assert((start | i) == 0); + goto restart; + } + /* + * No exceptional entries are inserted in this test. + */ + assert(0); + } + + pthread_mutex_lock(&page->lock); + if (!page->count) { + pthread_mutex_unlock(&page->lock); + goto repeat; + } + /* don't actually update page refcount */ + pthread_mutex_unlock(&page->lock); + + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + goto repeat; + } + + pages[ret] = page; + ret++; + } + rcu_read_unlock(); + return ret; +} + +static pthread_barrier_t worker_barrier; + +static void *regression1_fn(void *arg) +{ + rcu_register_thread(); + + if (pthread_barrier_wait(&worker_barrier) == + PTHREAD_BARRIER_SERIAL_THREAD) { + int j; + + for (j = 0; j < 1000000; j++) { + struct page *p; + + p = page_alloc(); + pthread_mutex_lock(&mt_lock); + radix_tree_insert(&mt_tree, 0, p); + pthread_mutex_unlock(&mt_lock); + + p = page_alloc(); + pthread_mutex_lock(&mt_lock); + radix_tree_insert(&mt_tree, 1, p); + pthread_mutex_unlock(&mt_lock); + + pthread_mutex_lock(&mt_lock); + p = radix_tree_delete(&mt_tree, 1); + pthread_mutex_lock(&p->lock); + p->count--; + pthread_mutex_unlock(&p->lock); + pthread_mutex_unlock(&mt_lock); + page_free(p); + + pthread_mutex_lock(&mt_lock); + p = radix_tree_delete(&mt_tree, 0); + pthread_mutex_lock(&p->lock); + p->count--; + pthread_mutex_unlock(&p->lock); + pthread_mutex_unlock(&mt_lock); + page_free(p); + } + } else { + int j; + + for (j = 0; j < 100000000; j++) { + struct page *pages[10]; + + find_get_pages(0, 10, pages); + } + } + + rcu_unregister_thread(); + + return NULL; +} + +static pthread_t *threads; +void regression1_test(void) +{ + int nr_threads; + int i; + long arg; + + /* Regression #1 */ + printf("running regression test 1, should finish in under a minute\n"); + nr_threads = 2; + pthread_barrier_init(&worker_barrier, NULL, nr_threads); + + threads = malloc(nr_threads * sizeof(pthread_t *)); + + for (i = 0; i < nr_threads; i++) { + arg = i; + if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) { + perror("pthread_create"); + exit(1); + } + } + + for (i = 0; i < nr_threads; i++) { + if (pthread_join(threads[i], NULL)) { + perror("pthread_join"); + exit(1); + } + } + + free(threads); + + printf("regression test 1, done\n"); +} diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c new file mode 100644 index 000000000000..5d2fa28cdca3 --- /dev/null +++ b/tools/testing/radix-tree/regression2.c @@ -0,0 +1,126 @@ +/* + * Regression2 + * Description: + * Toshiyuki Okajima describes the following radix-tree bug: + * + * In the following case, we can get a hangup on + * radix_radix_tree_gang_lookup_tag_slot. + * + * 0. The radix tree contains RADIX_TREE_MAP_SIZE items. And the tag of + * a certain item has PAGECACHE_TAG_DIRTY. + * 1. radix_tree_range_tag_if_tagged(, start, end, , PAGECACHE_TAG_DIRTY, + * PAGECACHE_TAG_TOWRITE) is called to add PAGECACHE_TAG_TOWRITE tag + * for the tag which has PAGECACHE_TAG_DIRTY. However, there is no tag with + * PAGECACHE_TAG_DIRTY within the range from start to end. As the result, + * There is no tag with PAGECACHE_TAG_TOWRITE but the root tag has + * PAGECACHE_TAG_TOWRITE. + * 2. An item is added into the radix tree and then the level of it is + * extended into 2 from 1. At that time, the new radix tree node succeeds + * the tag status of the root tag. Therefore the tag of the new radix tree + * node has PAGECACHE_TAG_TOWRITE but there is not slot with + * PAGECACHE_TAG_TOWRITE tag in the child node of the new radix tree node. + * 3. The tag of a certain item is cleared with PAGECACHE_TAG_DIRTY. + * 4. All items within the index range from 0 to RADIX_TREE_MAP_SIZE - 1 are + * released. (Only the item which index is RADIX_TREE_MAP_SIZE exist in the + * radix tree.) As the result, the slot of the radix tree node is NULL but + * the tag which corresponds to the slot has PAGECACHE_TAG_TOWRITE. + * 5. radix_tree_gang_lookup_tag_slot(PAGECACHE_TAG_TOWRITE) calls + * __lookup_tag. __lookup_tag returns with 0. And __lookup_tag doesn't + * change the index that is the input and output parameter. Because the 1st + * slot of the radix tree node is NULL, but the tag which corresponds to + * the slot has PAGECACHE_TAG_TOWRITE. + * Therefore radix_tree_gang_lookup_tag_slot tries to get some items by + * calling __lookup_tag, but it cannot get any items forever. + * + * The fix is to change that radix_tree_tag_if_tagged doesn't tag the root tag + * if it doesn't set any tags within the specified range. + * + * Running: + * This test should run to completion immediately. The above bug would cause it + * to hang indefinitely. + * + * Upstream commit: + * Not yet + */ +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <stdlib.h> +#include <stdio.h> + +#include "regression.h" + +#ifdef __KERNEL__ +#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#else +#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ +#endif + +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define PAGECACHE_TAG_DIRTY 0 +#define PAGECACHE_TAG_WRITEBACK 1 +#define PAGECACHE_TAG_TOWRITE 2 + +static RADIX_TREE(mt_tree, GFP_KERNEL); +unsigned long page_count = 0; + +struct page { + unsigned long index; +}; + +static struct page *page_alloc(void) +{ + struct page *p; + p = malloc(sizeof(struct page)); + p->index = page_count++; + + return p; +} + +void regression2_test(void) +{ + int i; + struct page *p; + int max_slots = RADIX_TREE_MAP_SIZE; + unsigned long int start, end; + struct page *pages[1]; + + printf("running regression test 2 (should take milliseconds)\n"); + /* 0. */ + for (i = 0; i <= max_slots - 1; i++) { + p = page_alloc(); + radix_tree_insert(&mt_tree, i, p); + } + radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY); + + /* 1. */ + start = 0; + end = max_slots - 2; + radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1, + PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); + + /* 2. */ + p = page_alloc(); + radix_tree_insert(&mt_tree, max_slots, p); + + /* 3. */ + radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY); + + /* 4. */ + for (i = max_slots - 1; i >= 0; i--) + radix_tree_delete(&mt_tree, i); + + /* 5. */ + // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot + // can return. + start = 1; + end = max_slots - 2; + radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end, + PAGECACHE_TAG_TOWRITE); + + /* We remove all the remained nodes */ + radix_tree_delete(&mt_tree, max_slots); + + printf("regression test 2, done\n"); +} diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c new file mode 100644 index 000000000000..1f06ed73d0a8 --- /dev/null +++ b/tools/testing/radix-tree/regression3.c @@ -0,0 +1,117 @@ +/* + * Regression3 + * Description: + * Helper radix_tree_iter_retry resets next_index to the current index. + * In following radix_tree_next_slot current chunk size becomes zero. + * This isn't checked and it tries to dereference null pointer in slot. + * + * Helper radix_tree_iter_next reset slot to NULL and next_index to index + 1, + * for tagger iteraction it also must reset cached tags in iterator to abort + * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk. + * + * Running: + * This test should run to completion immediately. The above bug would + * cause it to segfault. + * + * Upstream commit: + * Not yet + */ +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <stdlib.h> +#include <stdio.h> + +#include "regression.h" + +void regression3_test(void) +{ + RADIX_TREE(root, GFP_KERNEL); + void *ptr0 = (void *)4ul; + void *ptr = (void *)8ul; + struct radix_tree_iter iter; + void **slot; + bool first; + + printf("running regression test 3 (should take milliseconds)\n"); + + radix_tree_insert(&root, 0, ptr0); + radix_tree_tag_set(&root, 0, 0); + + first = true; + radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) { + printf("tagged %ld %p\n", iter.index, *slot); + if (first) { + radix_tree_insert(&root, 1, ptr); + radix_tree_tag_set(&root, 1, 0); + first = false; + } + if (radix_tree_deref_retry(*slot)) { + printf("retry at %ld\n", iter.index); + slot = radix_tree_iter_retry(&iter); + continue; + } + } + radix_tree_delete(&root, 1); + + first = true; + radix_tree_for_each_slot(slot, &root, &iter, 0) { + printf("slot %ld %p\n", iter.index, *slot); + if (first) { + radix_tree_insert(&root, 1, ptr); + first = false; + } + if (radix_tree_deref_retry(*slot)) { + printk("retry at %ld\n", iter.index); + slot = radix_tree_iter_retry(&iter); + continue; + } + } + radix_tree_delete(&root, 1); + + first = true; + radix_tree_for_each_contig(slot, &root, &iter, 0) { + printk("contig %ld %p\n", iter.index, *slot); + if (first) { + radix_tree_insert(&root, 1, ptr); + first = false; + } + if (radix_tree_deref_retry(*slot)) { + printk("retry at %ld\n", iter.index); + slot = radix_tree_iter_retry(&iter); + continue; + } + } + + radix_tree_for_each_slot(slot, &root, &iter, 0) { + printf("slot %ld %p\n", iter.index, *slot); + if (!iter.index) { + printf("next at %ld\n", iter.index); + slot = radix_tree_iter_next(&iter); + } + } + + radix_tree_for_each_contig(slot, &root, &iter, 0) { + printf("contig %ld %p\n", iter.index, *slot); + if (!iter.index) { + printf("next at %ld\n", iter.index); + slot = radix_tree_iter_next(&iter); + } + } + + radix_tree_tag_set(&root, 0, 0); + radix_tree_tag_set(&root, 1, 0); + radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) { + printf("tagged %ld %p\n", iter.index, *slot); + if (!iter.index) { + printf("next at %ld\n", iter.index); + slot = radix_tree_iter_next(&iter); + } + } + + radix_tree_delete(&root, 0); + radix_tree_delete(&root, 1); + + printf("regression test 3 passed\n"); +} diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c new file mode 100644 index 000000000000..83136be552a0 --- /dev/null +++ b/tools/testing/radix-tree/tag_check.c @@ -0,0 +1,332 @@ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include <linux/slab.h> +#include <linux/radix-tree.h> + +#include "test.h" + + +static void +__simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) +{ + int ret; + + item_check_absent(tree, index); + assert(item_tag_get(tree, index, tag) == 0); + + item_insert(tree, index); + assert(item_tag_get(tree, index, tag) == 0); + item_tag_set(tree, index, tag); + ret = item_tag_get(tree, index, tag); + assert(ret != 0); + ret = item_delete(tree, index); + assert(ret != 0); + item_insert(tree, index); + ret = item_tag_get(tree, index, tag); + assert(ret == 0); + ret = item_delete(tree, index); + assert(ret != 0); + ret = item_delete(tree, index); + assert(ret == 0); +} + +void simple_checks(void) +{ + unsigned long index; + RADIX_TREE(tree, GFP_KERNEL); + + for (index = 0; index < 10000; index++) { + __simple_checks(&tree, index, 0); + __simple_checks(&tree, index, 1); + } + verify_tag_consistency(&tree, 0); + verify_tag_consistency(&tree, 1); + printf("before item_kill_tree: %d allocated\n", nr_allocated); + item_kill_tree(&tree); + printf("after item_kill_tree: %d allocated\n", nr_allocated); +} + +/* + * Check that tags propagate correctly when extending a tree. + */ +static void extend_checks(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 43); + assert(item_tag_get(&tree, 43, 0) == 0); + item_tag_set(&tree, 43, 0); + assert(item_tag_get(&tree, 43, 0) == 1); + item_insert(&tree, 1000000); + assert(item_tag_get(&tree, 43, 0) == 1); + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + item_delete(&tree, 1000000); + assert(item_tag_get(&tree, 43, 0) != 0); + item_delete(&tree, 43); + assert(item_tag_get(&tree, 43, 0) == 0); /* crash */ + assert(item_tag_get(&tree, 0, 0) == 1); + + verify_tag_consistency(&tree, 0); + + item_kill_tree(&tree); +} + +/* + * Check that tags propagate correctly when contracting a tree. + */ +static void contract_checks(void) +{ + struct item *item; + int tmp; + RADIX_TREE(tree, GFP_KERNEL); + + tmp = 1<<RADIX_TREE_MAP_SHIFT; + item_insert(&tree, tmp); + item_insert(&tree, tmp+1); + item_tag_set(&tree, tmp, 0); + item_tag_set(&tree, tmp, 1); + item_tag_set(&tree, tmp+1, 0); + item_delete(&tree, tmp+1); + item_tag_clear(&tree, tmp, 1); + + assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1); + assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0); + + assert(item_tag_get(&tree, tmp, 0) == 1); + assert(item_tag_get(&tree, tmp, 1) == 0); + + verify_tag_consistency(&tree, 0); + item_kill_tree(&tree); +} + +/* + * Stupid tag thrasher + * + * Create a large linear array corresponding to the tree. Each element in + * the array is coherent with each node in the tree + */ + +enum { + NODE_ABSENT = 0, + NODE_PRESENT = 1, + NODE_TAGGED = 2, +}; + +#define THRASH_SIZE 1000 * 1000 +#define N 127 +#define BATCH 33 + +static void gang_check(struct radix_tree_root *tree, + char *thrash_state, int tag) +{ + struct item *items[BATCH]; + int nr_found; + unsigned long index = 0; + unsigned long last_index = 0; + + while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items, + index, BATCH, tag))) { + int i; + + for (i = 0; i < nr_found; i++) { + struct item *item = items[i]; + + while (last_index < item->index) { + assert(thrash_state[last_index] != NODE_TAGGED); + last_index++; + } + assert(thrash_state[last_index] == NODE_TAGGED); + last_index++; + } + index = items[nr_found - 1]->index + 1; + } +} + +static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag) +{ + int insert_chunk; + int delete_chunk; + int tag_chunk; + int untag_chunk; + int total_tagged = 0; + int total_present = 0; + + for (insert_chunk = 1; insert_chunk < THRASH_SIZE; insert_chunk *= N) + for (delete_chunk = 1; delete_chunk < THRASH_SIZE; delete_chunk *= N) + for (tag_chunk = 1; tag_chunk < THRASH_SIZE; tag_chunk *= N) + for (untag_chunk = 1; untag_chunk < THRASH_SIZE; untag_chunk *= N) { + int i; + unsigned long index; + int nr_inserted = 0; + int nr_deleted = 0; + int nr_tagged = 0; + int nr_untagged = 0; + int actual_total_tagged; + int actual_total_present; + + for (i = 0; i < insert_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] != NODE_ABSENT) + continue; + item_check_absent(tree, index); + item_insert(tree, index); + assert(thrash_state[index] != NODE_PRESENT); + thrash_state[index] = NODE_PRESENT; + nr_inserted++; + total_present++; + } + + for (i = 0; i < delete_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] == NODE_ABSENT) + continue; + item_check_present(tree, index); + if (item_tag_get(tree, index, tag)) { + assert(thrash_state[index] == NODE_TAGGED); + total_tagged--; + } else { + assert(thrash_state[index] == NODE_PRESENT); + } + item_delete(tree, index); + assert(thrash_state[index] != NODE_ABSENT); + thrash_state[index] = NODE_ABSENT; + nr_deleted++; + total_present--; + } + + for (i = 0; i < tag_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] != NODE_PRESENT) { + if (item_lookup(tree, index)) + assert(item_tag_get(tree, index, tag)); + continue; + } + item_tag_set(tree, index, tag); + item_tag_set(tree, index, tag); + assert(thrash_state[index] != NODE_TAGGED); + thrash_state[index] = NODE_TAGGED; + nr_tagged++; + total_tagged++; + } + + for (i = 0; i < untag_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] != NODE_TAGGED) + continue; + item_check_present(tree, index); + assert(item_tag_get(tree, index, tag)); + item_tag_clear(tree, index, tag); + item_tag_clear(tree, index, tag); + assert(thrash_state[index] != NODE_PRESENT); + thrash_state[index] = NODE_PRESENT; + nr_untagged++; + total_tagged--; + } + + actual_total_tagged = 0; + actual_total_present = 0; + for (index = 0; index < THRASH_SIZE; index++) { + switch (thrash_state[index]) { + case NODE_ABSENT: + item_check_absent(tree, index); + break; + case NODE_PRESENT: + item_check_present(tree, index); + assert(!item_tag_get(tree, index, tag)); + actual_total_present++; + break; + case NODE_TAGGED: + item_check_present(tree, index); + assert(item_tag_get(tree, index, tag)); + actual_total_present++; + actual_total_tagged++; + break; + } + } + + gang_check(tree, thrash_state, tag); + + printf("%d(%d) %d(%d) %d(%d) %d(%d) / " + "%d(%d) present, %d(%d) tagged\n", + insert_chunk, nr_inserted, + delete_chunk, nr_deleted, + tag_chunk, nr_tagged, + untag_chunk, nr_untagged, + total_present, actual_total_present, + total_tagged, actual_total_tagged); + } +} + +static void thrash_tags(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + char *thrash_state; + + thrash_state = malloc(THRASH_SIZE); + memset(thrash_state, 0, THRASH_SIZE); + + do_thrash(&tree, thrash_state, 0); + + verify_tag_consistency(&tree, 0); + item_kill_tree(&tree); + free(thrash_state); +} + +static void leak_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 1000000); + item_delete(&tree, 1000000); + item_kill_tree(&tree); +} + +static void __leak_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); + item_insert(&tree, 1000000); + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); + item_delete(&tree, 1000000); + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); + item_kill_tree(&tree); + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); +} + +static void single_check(void) +{ + struct item *items[BATCH]; + RADIX_TREE(tree, GFP_KERNEL); + int ret; + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0); + assert(ret == 1); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0); + assert(ret == 0); + verify_tag_consistency(&tree, 0); + verify_tag_consistency(&tree, 1); + item_kill_tree(&tree); +} + +void tag_check(void) +{ + single_check(); + extend_checks(); + contract_checks(); + printf("after extend_checks: %d allocated\n", nr_allocated); + __leak_check(); + leak_check(); + printf("after leak_check: %d allocated\n", nr_allocated); + simple_checks(); + printf("after simple_checks: %d allocated\n", nr_allocated); + thrash_tags(); + printf("after thrash_tags: %d allocated\n", nr_allocated); +} diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c new file mode 100644 index 000000000000..2bebf34cdc27 --- /dev/null +++ b/tools/testing/radix-tree/test.c @@ -0,0 +1,219 @@ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "test.h" + +struct item * +item_tag_set(struct radix_tree_root *root, unsigned long index, int tag) +{ + return radix_tree_tag_set(root, index, tag); +} + +struct item * +item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag) +{ + return radix_tree_tag_clear(root, index, tag); +} + +int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag) +{ + return radix_tree_tag_get(root, index, tag); +} + +int __item_insert(struct radix_tree_root *root, struct item *item) +{ + return radix_tree_insert(root, item->index, item); +} + +int item_insert(struct radix_tree_root *root, unsigned long index) +{ + return __item_insert(root, item_create(index)); +} + +int item_delete(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + assert(item->index == index); + free(item); + return 1; + } + return 0; +} + +struct item *item_create(unsigned long index) +{ + struct item *ret = malloc(sizeof(*ret)); + + ret->index = index; + return ret; +} + +void item_check_present(struct radix_tree_root *root, unsigned long index) +{ + struct item *item; + + item = radix_tree_lookup(root, index); + assert(item != 0); + assert(item->index == index); +} + +struct item *item_lookup(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_lookup(root, index); +} + +void item_check_absent(struct radix_tree_root *root, unsigned long index) +{ + struct item *item; + + item = radix_tree_lookup(root, index); + assert(item == 0); +} + +/* + * Scan only the passed (start, start+nr] for present items + */ +void item_gang_check_present(struct radix_tree_root *root, + unsigned long start, unsigned long nr, + int chunk, int hop) +{ + struct item *items[chunk]; + unsigned long into; + + for (into = 0; into < nr; ) { + int nfound; + int nr_to_find = chunk; + int i; + + if (nr_to_find > (nr - into)) + nr_to_find = nr - into; + + nfound = radix_tree_gang_lookup(root, (void **)items, + start + into, nr_to_find); + assert(nfound == nr_to_find); + for (i = 0; i < nfound; i++) + assert(items[i]->index == start + into + i); + into += hop; + } +} + +/* + * Scan the entire tree, only expecting present items (start, start+nr] + */ +void item_full_scan(struct radix_tree_root *root, unsigned long start, + unsigned long nr, int chunk) +{ + struct item *items[chunk]; + unsigned long into = 0; + unsigned long this_index = start; + int nfound; + int i; + +// printf("%s(0x%08lx, 0x%08lx, %d)\n", __FUNCTION__, start, nr, chunk); + + while ((nfound = radix_tree_gang_lookup(root, (void **)items, into, + chunk))) { +// printf("At 0x%08lx, nfound=%d\n", into, nfound); + for (i = 0; i < nfound; i++) { + assert(items[i]->index == this_index); + this_index++; + } +// printf("Found 0x%08lx->0x%08lx\n", +// items[0]->index, items[nfound-1]->index); + into = this_index; + } + if (chunk) + assert(this_index == start + nr); + nfound = radix_tree_gang_lookup(root, (void **)items, + this_index, chunk); + assert(nfound == 0); +} + +static int verify_node(struct radix_tree_node *slot, unsigned int tag, + unsigned int height, int tagged) +{ + int anyset = 0; + int i; + int j; + + slot = indirect_to_ptr(slot); + + /* Verify consistency at this level */ + for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) { + if (slot->tags[tag][i]) { + anyset = 1; + break; + } + } + if (tagged != anyset) { + printf("tag: %u, height %u, tagged: %d, anyset: %d\n", tag, height, tagged, anyset); + for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { + printf("tag %d: ", j); + for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) + printf("%016lx ", slot->tags[j][i]); + printf("\n"); + } + return 1; + } + assert(tagged == anyset); + + /* Go for next level */ + if (height > 1) { + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) + if (slot->slots[i]) + if (verify_node(slot->slots[i], tag, height - 1, + !!test_bit(i, slot->tags[tag]))) { + printf("Failure at off %d\n", i); + for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { + printf("tag %d: ", j); + for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) + printf("%016lx ", slot->tags[j][i]); + printf("\n"); + } + return 1; + } + } + return 0; +} + +void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) +{ + if (!root->height) + return; + verify_node(root->rnode, tag, root->height, !!root_tag_get(root, tag)); +} + +void item_kill_tree(struct radix_tree_root *root) +{ + struct item *items[32]; + int nfound; + + while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) { + int i; + + for (i = 0; i < nfound; i++) { + void *ret; + + ret = radix_tree_delete(root, items[i]->index); + assert(ret == items[i]); + free(items[i]); + } + } + assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0); + assert(root->rnode == NULL); +} + +void tree_verify_min_height(struct radix_tree_root *root, int maxindex) +{ + assert(radix_tree_maxindex(root->height) >= maxindex); + if (root->height > 1) + assert(radix_tree_maxindex(root->height-1) < maxindex); + else if (root->height == 1) + assert(radix_tree_maxindex(root->height-1) <= maxindex); +} diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h new file mode 100644 index 000000000000..4e1d95faaa94 --- /dev/null +++ b/tools/testing/radix-tree/test.h @@ -0,0 +1,40 @@ +#include <linux/gfp.h> +#include <linux/types.h> +#include <linux/radix-tree.h> +#include <linux/rcupdate.h> + +struct item { + unsigned long index; +}; + +struct item *item_create(unsigned long index); +int __item_insert(struct radix_tree_root *root, struct item *item); +int item_insert(struct radix_tree_root *root, unsigned long index); +int item_delete(struct radix_tree_root *root, unsigned long index); +struct item *item_lookup(struct radix_tree_root *root, unsigned long index); + +void item_check_present(struct radix_tree_root *root, unsigned long index); +void item_check_absent(struct radix_tree_root *root, unsigned long index); +void item_gang_check_present(struct radix_tree_root *root, + unsigned long start, unsigned long nr, + int chunk, int hop); +void item_full_scan(struct radix_tree_root *root, unsigned long start, + unsigned long nr, int chunk); +void item_kill_tree(struct radix_tree_root *root); + +void tag_check(void); + +struct item * +item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); +struct item * +item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag); +int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag); +void tree_verify_min_height(struct radix_tree_root *root, int maxindex); +void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag); + +extern int nr_allocated; + +/* Normally private parts of lib/radix-tree.c */ +void *indirect_to_ptr(void *ptr); +int root_tag_get(struct radix_tree_root *root, unsigned int tag); +unsigned long radix_tree_maxindex(unsigned int height); diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore index 9b3193d06608..a23bb4a6f06c 100644 --- a/tools/testing/selftests/breakpoints/.gitignore +++ b/tools/testing/selftests/breakpoints/.gitignore @@ -1 +1,2 @@ breakpoint_test +step_after_suspend_test diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index c0d957015f52..74e533fd4bc5 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -6,9 +6,11 @@ ifeq ($(ARCH),x86) TEST_PROGS := breakpoint_test endif +TEST_PROGS += step_after_suspend_test + all: $(TEST_PROGS) include ../lib.mk clean: - rm -fr breakpoint_test + rm -fr breakpoint_test step_after_suspend_test diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c new file mode 100644 index 000000000000..60b8a95dac26 --- /dev/null +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/timerfd.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "../kselftest.h" + +void child(int cpu) +{ + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu, &set); + if (sched_setaffinity(0, sizeof(set), &set) != 0) { + perror("sched_setaffinity() failed"); + _exit(1); + } + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) { + perror("ptrace(PTRACE_TRACEME) failed"); + _exit(1); + } + + if (raise(SIGSTOP) != 0) { + perror("raise(SIGSTOP) failed"); + _exit(1); + } + + _exit(0); +} + +bool run_test(int cpu) +{ + int status; + pid_t pid = fork(); + pid_t wpid; + + if (pid < 0) { + perror("fork() failed"); + return false; + } + if (pid == 0) + child(cpu); + + wpid = waitpid(pid, &status, __WALL); + if (wpid != pid) { + perror("waitpid() failed"); + return false; + } + if (!WIFSTOPPED(status)) { + printf("child did not stop\n"); + return false; + } + if (WSTOPSIG(status) != SIGSTOP) { + printf("child did not stop with SIGSTOP\n"); + return false; + } + + if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) { + if (errno == EIO) { + printf("ptrace(PTRACE_SINGLESTEP) not supported on this architecture\n"); + ksft_exit_skip(); + } + perror("ptrace(PTRACE_SINGLESTEP) failed"); + return false; + } + + wpid = waitpid(pid, &status, __WALL); + if (wpid != pid) { + perror("waitpid() failed"); + return false; + } + if (WIFEXITED(status)) { + printf("child did not single-step\n"); + return false; + } + if (!WIFSTOPPED(status)) { + printf("child did not stop\n"); + return false; + } + if (WSTOPSIG(status) != SIGTRAP) { + printf("child did not stop with SIGTRAP\n"); + return false; + } + + if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) { + perror("ptrace(PTRACE_CONT) failed"); + return false; + } + + wpid = waitpid(pid, &status, __WALL); + if (wpid != pid) { + perror("waitpid() failed"); + return false; + } + if (!WIFEXITED(status)) { + printf("child did not exit after PTRACE_CONT\n"); + return false; + } + + return true; +} + +void suspend(void) +{ + int power_state_fd; + struct sigevent event = {}; + int timerfd; + int err; + struct itimerspec spec = {}; + + power_state_fd = open("/sys/power/state", O_RDWR); + if (power_state_fd < 0) { + perror("open(\"/sys/power/state\") failed (is this test running as root?)"); + ksft_exit_fail(); + } + + timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); + if (timerfd < 0) { + perror("timerfd_create() failed"); + ksft_exit_fail(); + } + + spec.it_value.tv_sec = 5; + err = timerfd_settime(timerfd, 0, &spec, NULL); + if (err < 0) { + perror("timerfd_settime() failed"); + ksft_exit_fail(); + } + + if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem")) { + perror("entering suspend failed"); + ksft_exit_fail(); + } + + close(timerfd); + close(power_state_fd); +} + +int main(int argc, char **argv) +{ + int opt; + bool do_suspend = true; + bool succeeded = true; + cpu_set_t available_cpus; + int err; + int cpu; + + while ((opt = getopt(argc, argv, "n")) != -1) { + switch (opt) { + case 'n': + do_suspend = false; + break; + default: + printf("Usage: %s [-n]\n", argv[0]); + printf(" -n: do not trigger a suspend/resume cycle before the test\n"); + return -1; + } + } + + if (do_suspend) + suspend(); + + err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus); + if (err < 0) { + perror("sched_getaffinity() failed"); + ksft_exit_fail(); + } + + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { + bool test_success; + + if (!CPU_ISSET(cpu, &available_cpus)) + continue; + + test_success = run_test(cpu); + printf("CPU %d: ", cpu); + if (test_success) { + printf("[OK]\n"); + ksft_inc_pass_cnt(); + } else { + printf("[FAILED]\n"); + ksft_inc_fail_cnt(); + succeeded = false; + } + } + + ksft_print_cnts(); + if (succeeded) + ksft_exit_pass(); + else + ksft_exit_fail(); +} diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config new file mode 100644 index 000000000000..e6ab090cfbf3 --- /dev/null +++ b/tools/testing/selftests/cpu-hotplug/config @@ -0,0 +1,2 @@ +CONFIG_NOTIFIER_ERROR_INJECTION=y +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config new file mode 100644 index 000000000000..c8137f70e291 --- /dev/null +++ b/tools/testing/selftests/firmware/config @@ -0,0 +1 @@ +CONFIG_TEST_FIRMWARE=y diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config new file mode 100644 index 000000000000..ef8214661612 --- /dev/null +++ b/tools/testing/selftests/ftrace/config @@ -0,0 +1 @@ +CONFIG_FTRACE=y diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore new file mode 100644 index 000000000000..84b66a3c1f74 --- /dev/null +++ b/tools/testing/selftests/ipc/.gitignore @@ -0,0 +1 @@ +msgque_test diff --git a/tools/testing/selftests/ipc/config b/tools/testing/selftests/ipc/config new file mode 100644 index 000000000000..0702447109f5 --- /dev/null +++ b/tools/testing/selftests/ipc/config @@ -0,0 +1,2 @@ +CONFIG_EXPERT=y +CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 47147b968514..08360060ab14 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh +TEST_PROGS := printf.sh bitmap.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh new file mode 100755 index 000000000000..2da187b6ddad --- /dev/null +++ b/tools/testing/selftests/lib/bitmap.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs bitmap infrastructure tests using test_bitmap kernel module + +if /sbin/modprobe -q test_bitmap; then + /sbin/modprobe -q -r test_bitmap + echo "bitmap: ok" +else + echo "bitmap: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore new file mode 100644 index 000000000000..1c0711708b98 --- /dev/null +++ b/tools/testing/selftests/media_tests/.gitignore @@ -0,0 +1 @@ +media_device_test diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile new file mode 100644 index 000000000000..7071bcc1d066 --- /dev/null +++ b/tools/testing/selftests/media_tests/Makefile @@ -0,0 +1,7 @@ +TEST_PROGS := media_device_test +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + rm -fr media_device_test diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c new file mode 100644 index 000000000000..cbf53a032ab5 --- /dev/null +++ b/tools/testing/selftests/media_tests/media_device_test.c @@ -0,0 +1,95 @@ +/* + * media_devkref_test.c - Media Controller Device Kref API Test + * + * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> + * Copyright (c) 2016 Samsung Electronics Co., Ltd. + * + * This file is released under the GPLv2. + */ + +/* + * This file adds a test for Media Controller API. + * This test should be run as root and should not be + * included in the Kselftest run. This test should be + * run when hardware and driver that makes use Media + * Controller API are present in the system. + * + * This test opens user specified Media Device and calls + * MEDIA_IOC_DEVICE_INFO ioctl in a loop once every 10 + * seconds. + * + * Usage: + * sudo ./media_device_test -d /dev/mediaX + * + * While test is running, remove the device and + * ensure there are no use after free errors and + * other Oops in the dmesg. Enable KaSan kernel + * config option for use-after-free error detection. +*/ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <linux/media.h> + +int main(int argc, char **argv) +{ + int opt; + char media_device[256]; + int count = 0; + struct media_device_info mdi; + int ret; + int fd; + + if (argc < 2) { + printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]); + exit(-1); + } + + /* Process arguments */ + while ((opt = getopt(argc, argv, "d:")) != -1) { + switch (opt) { + case 'd': + strncpy(media_device, optarg, sizeof(media_device) - 1); + media_device[sizeof(media_device)-1] = '\0'; + break; + default: + printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]); + exit(-1); + } + } + + if (getuid() != 0) { + printf("Please run the test as root - Exiting.\n"); + exit(-1); + } + + /* Open Media device and keep it open */ + fd = open(media_device, O_RDWR); + if (fd == -1) { + printf("Media Device open errno %s\n", strerror(errno)); + exit(-1); + } + + printf("\nNote:\n" + "While test is running, remove the device and\n" + "ensure there are no use after free errors and\n" + "other Oops in the dmesg. Enable KaSan kernel\n" + "config option for use-after-free error detection.\n\n"); + + while (count < 100) { + ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi); + if (ret < 0) + printf("Media Device Info errno %s\n", strerror(errno)); + else + printf("Media device model %s driver %s\n", + mdi.model, mdi.driver); + sleep(10); + count++; + } +} diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config new file mode 100644 index 000000000000..2fde30191a47 --- /dev/null +++ b/tools/testing/selftests/memory-hotplug/config @@ -0,0 +1,4 @@ +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_SPARSE=y +CONFIG_NOTIFIER_ERROR_INJECTION=y +CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m diff --git a/tools/testing/selftests/mount/config b/tools/testing/selftests/mount/config new file mode 100644 index 000000000000..b5d881e48548 --- /dev/null +++ b/tools/testing/selftests/mount/config @@ -0,0 +1,2 @@ +CONFIG_USER_NS=y +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 6fb23366b258..69bb3fc38fb2 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,3 +2,4 @@ socket psock_fanout psock_tpacket reuseport_bpf +reuseport_bpf_cpu diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 41449b5ad0a9..c658792d47b4 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf +NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu all: $(NET_PROGS) %: %.c diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config new file mode 100644 index 000000000000..e57b4ac40e72 --- /dev/null +++ b/tools/testing/selftests/net/config @@ -0,0 +1,3 @@ +CONFIG_USER_NS=y +CONFIG_BPF_SYSCALL=y +CONFIG_TEST_BPF=m diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index bec1b5dd2530..96ba386b1b7b 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -9,10 +9,12 @@ #include <errno.h> #include <error.h> +#include <fcntl.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/unistd.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -169,9 +171,15 @@ static void build_recv_group(const struct test_params p, int fd[], uint16_t mod, if (bind(fd[i], addr, sockaddr_size())) error(1, errno, "failed to bind recv socket %d", i); - if (p.protocol == SOCK_STREAM) + if (p.protocol == SOCK_STREAM) { + opt = 4; + if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt, + sizeof(opt))) + error(1, errno, + "failed to set TCP_FASTOPEN on %d", i); if (listen(fd[i], p.recv_socks * 10)) error(1, errno, "failed to listen on socket"); + } } free(addr); } @@ -189,10 +197,8 @@ static void send_from(struct test_params p, uint16_t sport, char *buf, if (bind(fd, saddr, sockaddr_size())) error(1, errno, "failed to bind send socket"); - if (connect(fd, daddr, sockaddr_size())) - error(1, errno, "failed to connect"); - if (send(fd, buf, len, 0) < 0) + if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0) error(1, errno, "failed to send message"); close(fd); @@ -260,7 +266,7 @@ static void test_recv_order(const struct test_params p, int fd[], int mod) } } -static void test_reuseport_ebpf(const struct test_params p) +static void test_reuseport_ebpf(struct test_params p) { int i, fd[p.recv_socks]; @@ -268,6 +274,7 @@ static void test_reuseport_ebpf(const struct test_params p) build_recv_group(p, fd, p.recv_socks, attach_ebpf); test_recv_order(p, fd, p.recv_socks); + p.send_port_min += p.recv_socks * 2; fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); attach_ebpf(fd[0], p.recv_socks / 2); test_recv_order(p, fd, p.recv_socks / 2); @@ -276,7 +283,7 @@ static void test_reuseport_ebpf(const struct test_params p) close(fd[i]); } -static void test_reuseport_cbpf(const struct test_params p) +static void test_reuseport_cbpf(struct test_params p) { int i, fd[p.recv_socks]; @@ -284,6 +291,7 @@ static void test_reuseport_cbpf(const struct test_params p) build_recv_group(p, fd, p.recv_socks, attach_cbpf); test_recv_order(p, fd, p.recv_socks); + p.send_port_min += p.recv_socks * 2; fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); attach_cbpf(fd[0], p.recv_socks / 2); test_recv_order(p, fd, p.recv_socks / 2); @@ -377,7 +385,7 @@ static void test_filter_no_reuseport(const struct test_params p) static void test_filter_without_bind(void) { - int fd1, fd2; + int fd1, fd2, opt = 1; fprintf(stderr, "Testing filter add without bind...\n"); fd1 = socket(AF_INET, SOCK_DGRAM, 0); @@ -386,6 +394,10 @@ static void test_filter_without_bind(void) fd2 = socket(AF_INET, SOCK_DGRAM, 0); if (fd2 < 0) error(1, errno, "failed to create socket 2"); + if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 1"); + if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 2"); attach_ebpf(fd1, 10); attach_cbpf(fd2, 10); @@ -394,6 +406,32 @@ static void test_filter_without_bind(void) close(fd2); } +void enable_fastopen(void) +{ + int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0); + int rw_mask = 3; /* bit 1: client side; bit-2 server side */ + int val, size; + char buf[16]; + + if (fd < 0) + error(1, errno, "Unable to open tcp_fastopen sysctl"); + if (read(fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to read tcp_fastopen sysctl"); + val = atoi(buf); + close(fd); + + if ((val & rw_mask) != rw_mask) { + fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR); + if (fd < 0) + error(1, errno, + "Unable to open tcp_fastopen sysctl for writing"); + val |= rw_mask; + size = snprintf(buf, 16, "%d", val); + if (write(fd, buf, size) <= 0) + error(1, errno, "Unable to write tcp_fastopen sysctl"); + close(fd); + } +} int main(void) { @@ -506,6 +544,71 @@ int main(void) .recv_port = 8007, .send_port_min = 9100}); + /* TCP fastopen is required for the TCP tests */ + enable_fastopen(); + fprintf(stderr, "---- IPv4 TCP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8008, + .send_port_min = 9120}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8009, + .send_port_min = 9160}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_port = 8010}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_port = 8011}); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8012, + .send_port_min = 9200}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8013, + .send_port_min = 9240}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_port = 8014}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_port = 8015}); + + fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8016, + .send_port_min = 9320}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8017, + .send_port_min = 9360}); test_filter_without_bind(); diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c new file mode 100644 index 000000000000..b23d6f54de7b --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c @@ -0,0 +1,258 @@ +/* + * Test functionality of BPF filters with SO_REUSEPORT. This program creates + * an SO_REUSEPORT receiver group containing one socket per CPU core. It then + * creates a BPF program that will select a socket from this group based + * on the core id that receives the packet. The sending code artificially + * moves itself to run on different core ids and sends one message from + * each core. Since these packets are delivered over loopback, they should + * arrive on the same core that sent them. The receiving code then ensures + * that the packet was received on the socket for the corresponding core id. + * This entire process is done for several different core id permutations + * and for each IPv4/IPv6 and TCP/UDP combination. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const int PORT = 8888; + +static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + size_t i; + int opt; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < len; ++i) { + rcv_fd[i] = socket(family, proto, 0); + if (rcv_fd[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void attach_bpf(int fd) +{ + struct sock_filter code[] = { + /* A = raw_smp_processor_id() */ + { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU }, + /* return A */ + { BPF_RET | BPF_A, 0, 0, 0 }, + }; + struct sock_fprog p = { + .len = 2, + .filter = code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF"); +} + +static void send_from_cpu(int cpu_id, int family, int proto) +{ + struct sockaddr_storage saddr, daddr; + struct sockaddr_in *saddr4, *daddr4; + struct sockaddr_in6 *saddr6, *daddr6; + cpu_set_t cpu_set; + int fd; + + switch (family) { + case AF_INET: + saddr4 = (struct sockaddr_in *)&saddr; + saddr4->sin_family = AF_INET; + saddr4->sin_addr.s_addr = htonl(INADDR_ANY); + saddr4->sin_port = 0; + + daddr4 = (struct sockaddr_in *)&daddr; + daddr4->sin_family = AF_INET; + daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr4->sin_port = htons(PORT); + break; + case AF_INET6: + saddr6 = (struct sockaddr_in6 *)&saddr; + saddr6->sin6_family = AF_INET6; + saddr6->sin6_addr = in6addr_any; + saddr6->sin6_port = 0; + + daddr6 = (struct sockaddr_in6 *)&daddr; + daddr6->sin6_family = AF_INET6; + daddr6->sin6_addr = in6addr_loopback; + daddr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + memset(&cpu_set, 0, sizeof(cpu_set)); + CPU_SET(cpu_id, &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) + error(1, errno, "failed to pin to cpu"); + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static +void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + for (i = 0; i < len; ++i) + if (ev.data.fd == rcv_fd[i]) + break; + if (i == len) + error(1, 0, "failed to find socket"); + fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i); + if (cpu_id != i) + error(1, 0, "cpu id/receive socket mismatch"); +} + +static void test(int *rcv_fd, int len, int family, int proto) +{ + struct epoll_event ev; + int epfd, cpu; + + build_rcv_group(rcv_fd, len, family, proto); + attach_bpf(rcv_fd[0]); + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (cpu = 0; cpu < len; ++cpu) { + ev.events = EPOLLIN; + ev.data.fd = rcv_fd[cpu]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + /* Forward iterate */ + for (cpu = 0; cpu < len; ++cpu) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Reverse iterate */ + for (cpu = len - 1; cpu >= 0; --cpu) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Even cores */ + for (cpu = 0; cpu < len; cpu += 2) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Odd cores */ + for (cpu = 1; cpu < len; cpu += 2) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + close(epfd); + for (cpu = 0; cpu < len; ++cpu) + close(rcv_fd[cpu]); +} + +int main(void) +{ + int *rcv_fd, cpus; + + cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus <= 0) + error(1, errno, "failed counting cpus"); + + rcv_fd = calloc(cpus, sizeof(int)); + if (!rcv_fd) + error(1, 0, "failed to allocate array"); + + fprintf(stderr, "---- IPv4 UDP ----\n"); + test(rcv_fd, cpus, AF_INET, SOCK_DGRAM); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM); + + fprintf(stderr, "---- IPv4 TCP ----\n"); + test(rcv_fd, cpus, AF_INET, SOCK_STREAM); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test(rcv_fd, cpus, AF_INET6, SOCK_STREAM); + + free(rcv_fd); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0c2706bda330..b08f77cbe31b 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) +CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) export CFLAGS @@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \ switch_endian \ syscalls \ tm \ - vphn + vphn \ + math endif diff --git a/tools/testing/selftests/powerpc/basic_asm.h b/tools/testing/selftests/powerpc/basic_asm.h new file mode 100644 index 000000000000..3349a0704d1a --- /dev/null +++ b/tools/testing/selftests/powerpc/basic_asm.h @@ -0,0 +1,70 @@ +#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H +#define _SELFTESTS_POWERPC_BASIC_ASM_H + +#include <ppc-asm.h> +#include <asm/unistd.h> + +#define LOAD_REG_IMMEDIATE(reg,expr) \ + lis reg,(expr)@highest; \ + ori reg,reg,(expr)@higher; \ + rldicr reg,reg,32,31; \ + oris reg,reg,(expr)@high; \ + ori reg,reg,(expr)@l; + +/* + * Note: These macros assume that variables being stored on the stack are + * doublewords, while this is usually the case it may not always be the + * case for each use case. + */ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STACK_FRAME_MIN_SIZE 32 +#define STACK_FRAME_TOC_POS 24 +#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8)) +#define __STACK_FRAME_LOCAL(_num_params,_var_num) ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8)) +#else +#define STACK_FRAME_MIN_SIZE 112 +#define STACK_FRAME_TOC_POS 40 +#define __STACK_FRAME_PARAM(i) (48 + ((i)*8)) + +/* + * Caveat: if a function passed more than 8 doublewords, the caller will have + * made more space... which would render the 112 incorrect. + */ +#define __STACK_FRAME_LOCAL(_num_params,_var_num) (112 + ((_var_num)*8)) +#endif + +/* Parameter x saved to the stack */ +#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var) + +/* Local variable x saved to the stack after x parameters */ +#define STACK_FRAME_LOCAL(num_params,var) __STACK_FRAME_LOCAL(num_params,var) +#define STACK_FRAME_LR_POS 16 +#define STACK_FRAME_CR_POS 8 + +/* + * It is very important to note here that _extra is the extra amount of + * stack space needed. This space can be accessed using STACK_FRAME_PARAM() + * or STACK_FRAME_LOCAL() macros. + * + * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp + * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence + * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets + * preprocessed incorrectly, hence r0. + */ +#define PUSH_BASIC_STACK(_extra) \ + mflr r0; \ + std r0,STACK_FRAME_LR_POS(%r1); \ + stdu %r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \ + mfcr r0; \ + stw r0,STACK_FRAME_CR_POS(%r1); \ + std %r2,STACK_FRAME_TOC_POS(%r1); + +#define POP_BASIC_STACK(_extra) \ + ld %r2,STACK_FRAME_TOC_POS(%r1); \ + lwz r0,STACK_FRAME_CR_POS(%r1); \ + mtcr r0; \ + addi %r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \ + ld r0,STACK_FRAME_LR_POS(%r1); \ + mtlr r0; + +#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore new file mode 100644 index 000000000000..4fe13a439fd7 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -0,0 +1,6 @@ +fpu_syscall +vmx_syscall +fpu_preempt +vmx_preempt +fpu_signal +vmx_signal diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile new file mode 100644 index 000000000000..5b88875d5955 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -0,0 +1,19 @@ +TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c +$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec + +fpu_syscall: fpu_asm.S +fpu_preempt: fpu_asm.S +fpu_signal: fpu_asm.S + +vmx_syscall: vmx_asm.S +vmx_preempt: vmx_asm.S +vmx_signal: vmx_asm.S + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S new file mode 100644 index 000000000000..f3711d80e709 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -0,0 +1,198 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +#define PUSH_FPU(pos) \ + stfd f14,pos(sp); \ + stfd f15,pos+8(sp); \ + stfd f16,pos+16(sp); \ + stfd f17,pos+24(sp); \ + stfd f18,pos+32(sp); \ + stfd f19,pos+40(sp); \ + stfd f20,pos+48(sp); \ + stfd f21,pos+56(sp); \ + stfd f22,pos+64(sp); \ + stfd f23,pos+72(sp); \ + stfd f24,pos+80(sp); \ + stfd f25,pos+88(sp); \ + stfd f26,pos+96(sp); \ + stfd f27,pos+104(sp); \ + stfd f28,pos+112(sp); \ + stfd f29,pos+120(sp); \ + stfd f30,pos+128(sp); \ + stfd f31,pos+136(sp); + +#define POP_FPU(pos) \ + lfd f14,pos(sp); \ + lfd f15,pos+8(sp); \ + lfd f16,pos+16(sp); \ + lfd f17,pos+24(sp); \ + lfd f18,pos+32(sp); \ + lfd f19,pos+40(sp); \ + lfd f20,pos+48(sp); \ + lfd f21,pos+56(sp); \ + lfd f22,pos+64(sp); \ + lfd f23,pos+72(sp); \ + lfd f24,pos+80(sp); \ + lfd f25,pos+88(sp); \ + lfd f26,pos+96(sp); \ + lfd f27,pos+104(sp); \ + lfd f28,pos+112(sp); \ + lfd f29,pos+120(sp); \ + lfd f30,pos+128(sp); \ + lfd f31,pos+136(sp); + +# Careful calling this, it will 'clobber' fpu (by design) +# Don't call this from C +FUNC_START(load_fpu) + lfd f14,0(r3) + lfd f15,8(r3) + lfd f16,16(r3) + lfd f17,24(r3) + lfd f18,32(r3) + lfd f19,40(r3) + lfd f20,48(r3) + lfd f21,56(r3) + lfd f22,64(r3) + lfd f23,72(r3) + lfd f24,80(r3) + lfd f25,88(r3) + lfd f26,96(r3) + lfd f27,104(r3) + lfd f28,112(r3) + lfd f29,120(r3) + lfd f30,128(r3) + lfd f31,136(r3) + blr +FUNC_END(load_fpu) + +FUNC_START(check_fpu) + mr r4,r3 + li r3,1 # assume a bad result + lfd f0,0(r4) + fcmpu cr1,f0,f14 + bne cr1,1f + lfd f0,8(r4) + fcmpu cr1,f0,f15 + bne cr1,1f + lfd f0,16(r4) + fcmpu cr1,f0,f16 + bne cr1,1f + lfd f0,24(r4) + fcmpu cr1,f0,f17 + bne cr1,1f + lfd f0,32(r4) + fcmpu cr1,f0,f18 + bne cr1,1f + lfd f0,40(r4) + fcmpu cr1,f0,f19 + bne cr1,1f + lfd f0,48(r4) + fcmpu cr1,f0,f20 + bne cr1,1f + lfd f0,56(r4) + fcmpu cr1,f0,f21 + bne cr1,1f + lfd f0,64(r4) + fcmpu cr1,f0,f22 + bne cr1,1f + lfd f0,72(r4) + fcmpu cr1,f0,f23 + bne cr1,1f + lfd f0,80(r4) + fcmpu cr1,f0,f24 + bne cr1,1f + lfd f0,88(r4) + fcmpu cr1,f0,f25 + bne cr1,1f + lfd f0,96(r4) + fcmpu cr1,f0,f26 + bne cr1,1f + lfd f0,104(r4) + fcmpu cr1,f0,f27 + bne cr1,1f + lfd f0,112(r4) + fcmpu cr1,f0,f28 + bne cr1,1f + lfd f0,120(r4) + fcmpu cr1,f0,f29 + bne cr1,1f + lfd f0,128(r4) + fcmpu cr1,f0,f30 + bne cr1,1f + lfd f0,136(r4) + fcmpu cr1,f0,f31 + bne cr1,1f + li r3,0 # Success!!! +1: blr + +FUNC_START(test_fpu) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # f14-f31 are non volatiles + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray + std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid + PUSH_FPU(STACK_FRAME_LOCAL(2,0)) + + bl load_fpu + nop + li r0,__NR_fork + sc + + # pass the result of the fork to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + + POP_FPU(STACK_FRAME_LOCAL(2,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(test_fpu) + +# int preempt_fpu(double *darray, int *threads_running, int *running) +# On starting will (atomically) decrement not_ready as a signal that the FPU +# has been loaded with darray. Will proceed to check the validity of the FPU +# registers while running is not zero. +FUNC_START(preempt_fpu) + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # double *darray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + PUSH_FPU(STACK_FRAME_LOCAL(3,0)) + + bl load_fpu + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_FPU(STACK_FRAME_LOCAL(3,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(preempt_fpu) diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c new file mode 100644 index 000000000000..0f85b79d883d --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -0,0 +1,113 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers change across preemption. + * Two things should be noted here a) The check_fpu function in asm only checks + * the non volatile registers as it is reused from the syscall test b) There is + * no way to be sure preemption happened so this test just uses many threads + * and a long wait. As such, a successful test doesn't mean much but a failure + * is bad. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + + +__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +int threads_starting; +int running; + +extern void preempt_fpu(double *darray, int *threads_starting, int *running); + +void *preempt_fpu_c(void *p) +{ + int i; + srand(pthread_self()); + for (i = 0; i < 21; i++) + darray[i] = rand(); + + /* Test failed if it ever returns */ + preempt_fpu(darray, &threads_starting, &running); + + return p; +} + +int test_preempt_fpu(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc((threads) * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really necessary but nice to wait for every thread to start */ + printf("\tWaiting for all workers to start..."); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_fpu + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_fpu, "fpu_preempt"); +} diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c new file mode 100644 index 000000000000..888aa51b4204 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_signal.c @@ -0,0 +1,135 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers are correctly reported in a + * signal context. Each worker just spins checking its FPU registers, at some + * point a signal will interrupt it and C code will check the signal context + * ensuring it is also the same. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Number of times each thread should receive the signal */ +#define ITERATIONS 10 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +bool bad_context; +int threads_starting; +int running; + +extern long preempt_fpu(double *darray, int *threads_starting, int *running); + +void signal_fpu_sig(int sig, siginfo_t *info, void *context) +{ + int i; + ucontext_t *uc = context; + mcontext_t *mc = &uc->uc_mcontext; + + /* Only the non volatiles were loaded up */ + for (i = 14; i < 32; i++) { + if (mc->fp_regs[i] != darray[i - 14]) { + bad_context = true; + break; + } + } +} + +void *signal_fpu_c(void *p) +{ + int i; + long rc; + struct sigaction act; + act.sa_sigaction = signal_fpu_sig; + act.sa_flags = SA_SIGINFO; + rc = sigaction(SIGUSR1, &act, NULL); + if (rc) + return p; + + srand(pthread_self()); + for (i = 0; i < 21; i++) + darray[i] = rand(); + + rc = preempt_fpu(darray, &threads_starting, &running); + + return (void *) rc; +} + +int test_signal_fpu(void) +{ + int i, j, rc, threads; + void *rc_p; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + printf("\tWaiting for all workers to start..."); + while (threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tSending signals to all threads %d times...", ITERATIONS); + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < threads; j++) { + pthread_kill(tids[j], SIGUSR1); + } + sleep(1); + } + printf("done\n"); + + printf("\tStopping workers..."); + running = 0; + for (i = 0; i < threads; i++) { + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why signal_fpu + * returned + */ + if ((long) rc_p || bad_context) + printf("oops\n"); + if (bad_context) + fprintf(stderr, "\t!! bad_context is true\n"); + FAIL_IF((long) rc_p || bad_context); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_signal_fpu, "fpu_signal"); +} diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c new file mode 100644 index 000000000000..949e6721256d --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c @@ -0,0 +1,90 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers change across a syscall (fork). + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> + +#include "utils.h" + +extern int test_fpu(double *darray, pid_t *pid); + +double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +int syscall_fpu(void) +{ + pid_t fork_pid; + int i; + int ret; + int child_ret; + for (i = 0; i < 1000; i++) { + /* test_fpu will fork() */ + ret = test_fpu(darray, &fork_pid); + if (fork_pid == -1) + return -1; + if (fork_pid == 0) + exit(ret); + waitpid(fork_pid, &child_ret, 0); + if (ret || child_ret) + return 1; + } + + return 0; +} + +int test_syscall_fpu(void) +{ + /* + * Setup an environment with much context switching + */ + pid_t pid2; + pid_t pid = fork(); + int ret; + int child_ret; + FAIL_IF(pid == -1); + + pid2 = fork(); + /* Can't FAIL_IF(pid2 == -1); because already forked once */ + if (pid2 == -1) { + /* + * Couldn't fork, ensure test is a fail + */ + child_ret = ret = 1; + } else { + ret = syscall_fpu(); + if (pid2) + waitpid(pid2, &child_ret, 0); + else + exit(ret); + } + + ret |= child_ret; + + if (pid) + waitpid(pid, &child_ret, 0); + else + exit(ret); + + FAIL_IF(ret || child_ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_syscall_fpu, "syscall_fpu"); + +} diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S new file mode 100644 index 000000000000..1b8c248b3ac1 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_asm.S @@ -0,0 +1,235 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +# POS MUST BE 16 ALIGNED! +#define PUSH_VMX(pos,reg) \ + li reg,pos; \ + stvx v20,reg,sp; \ + addi reg,reg,16; \ + stvx v21,reg,sp; \ + addi reg,reg,16; \ + stvx v22,reg,sp; \ + addi reg,reg,16; \ + stvx v23,reg,sp; \ + addi reg,reg,16; \ + stvx v24,reg,sp; \ + addi reg,reg,16; \ + stvx v25,reg,sp; \ + addi reg,reg,16; \ + stvx v26,reg,sp; \ + addi reg,reg,16; \ + stvx v27,reg,sp; \ + addi reg,reg,16; \ + stvx v28,reg,sp; \ + addi reg,reg,16; \ + stvx v29,reg,sp; \ + addi reg,reg,16; \ + stvx v30,reg,sp; \ + addi reg,reg,16; \ + stvx v31,reg,sp; + +# POS MUST BE 16 ALIGNED! +#define POP_VMX(pos,reg) \ + li reg,pos; \ + lvx v20,reg,sp; \ + addi reg,reg,16; \ + lvx v21,reg,sp; \ + addi reg,reg,16; \ + lvx v22,reg,sp; \ + addi reg,reg,16; \ + lvx v23,reg,sp; \ + addi reg,reg,16; \ + lvx v24,reg,sp; \ + addi reg,reg,16; \ + lvx v25,reg,sp; \ + addi reg,reg,16; \ + lvx v26,reg,sp; \ + addi reg,reg,16; \ + lvx v27,reg,sp; \ + addi reg,reg,16; \ + lvx v28,reg,sp; \ + addi reg,reg,16; \ + lvx v29,reg,sp; \ + addi reg,reg,16; \ + lvx v30,reg,sp; \ + addi reg,reg,16; \ + lvx v31,reg,sp; + +# Carefull this will 'clobber' vmx (by design) +# Don't call this from C +FUNC_START(load_vmx) + li r5,0 + lvx v20,r5,r3 + addi r5,r5,16 + lvx v21,r5,r3 + addi r5,r5,16 + lvx v22,r5,r3 + addi r5,r5,16 + lvx v23,r5,r3 + addi r5,r5,16 + lvx v24,r5,r3 + addi r5,r5,16 + lvx v25,r5,r3 + addi r5,r5,16 + lvx v26,r5,r3 + addi r5,r5,16 + lvx v27,r5,r3 + addi r5,r5,16 + lvx v28,r5,r3 + addi r5,r5,16 + lvx v29,r5,r3 + addi r5,r5,16 + lvx v30,r5,r3 + addi r5,r5,16 + lvx v31,r5,r3 + blr +FUNC_END(load_vmx) + +# Should be safe from C, only touches r4, r5 and v0,v1,v2 +FUNC_START(check_vmx) + PUSH_BASIC_STACK(32) + mr r4,r3 + li r3,1 # assume a bad result + li r5,0 + lvx v0,r5,r4 + vcmpequd. v1,v0,v20 + vmr v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v21 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v22 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v23 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v24 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v25 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v26 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v27 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v28 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v29 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v30 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v31 + vand v2,v2,v1 + + li r5,STACK_FRAME_LOCAL(0,0) + stvx v2,r5,sp + ldx r0,r5,sp + cmpdi r0,0xffffffffffffffff + bne 1f + li r3,0 +1: POP_BASIC_STACK(32) + blr +FUNC_END(check_vmx) + +# Safe from C +FUNC_START(test_vmx) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # v20-v31 are non-volatile + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray + std r4,STACK_FRAME_PARAM(1)(sp) # address of pid + PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4) + + bl load_vmx + nop + + li r0,__NR_fork + sc + # Pass the result of fork back to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vmx + nop + + POP_VMX(STACK_FRAME_LOCAL(2,0),r4) + POP_BASIC_STACK(512) + blr +FUNC_END(test_vmx) + +# int preempt_vmx(vector int *varray, int *threads_starting, int *running) +# On starting will (atomically) decrement threads_starting as a signal that +# the VMX have been loaded with varray. Will proceed to check the validity of +# the VMX registers while running is not zero. +FUNC_START(preempt_vmx) + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0) + PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4) + + bl load_vmx + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vmx + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4) + POP_BASIC_STACK(512) + blr +FUNC_END(preempt_vmx) diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c new file mode 100644 index 000000000000..9ef376c55b13 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers change across preemption. + * Two things should be noted here a) The check_vmx function in asm only checks + * the non volatile registers as it is reused from the syscall test b) There is + * no way to be sure preemption happened so this test just uses many threads + * and a long wait. As such, a successful test doesn't mean much but a failure + * is bad. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +int threads_starting; +int running; + +extern void preempt_vmx(vector int *varray, int *threads_starting, int *running); + +void *preempt_vmx_c(void *p) +{ + int i, j; + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) + varray[i][j] = rand(); + + /* Test fails if it ever returns */ + preempt_vmx(varray, &threads_starting, &running); + return p; +} + +int test_preempt_vmx(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really nessesary but nice to wait for every thread to start */ + printf("\tWaiting for all workers to start..."); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_vmx + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_vmx, "vmx_preempt"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c new file mode 100644 index 000000000000..671d7533a557 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_signal.c @@ -0,0 +1,156 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers are correctly reported in a + * signal context. Each worker just spins checking its VMX registers, at some + * point a signal will interrupt it and C code will check the signal context + * ensuring it is also the same. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> +#include <altivec.h> + +#include "utils.h" + +/* Number of times each thread should receive the signal */ +#define ITERATIONS 10 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +bool bad_context; +int running; +int threads_starting; + +extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal); + +void signal_vmx_sig(int sig, siginfo_t *info, void *context) +{ + int i; + ucontext_t *uc = context; + mcontext_t *mc = &uc->uc_mcontext; + + /* Only the non volatiles were loaded up */ + for (i = 20; i < 32; i++) { + if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) { + int j; + /* + * Shouldn't printf() in a signal handler, however, this is a + * test and we've detected failure. Understanding what failed + * is paramount. All that happens after this is tests exit with + * failure. + */ + printf("VMX mismatch at reg %d!\n", i); + printf("Reg | Actual | Expected\n"); + for (j = 20; j < 32; j++) { + printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0], + mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3], + varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]); + } + bad_context = true; + break; + } + } +} + +void *signal_vmx_c(void *p) +{ + int i, j; + long rc; + struct sigaction act; + act.sa_sigaction = signal_vmx_sig; + act.sa_flags = SA_SIGINFO; + rc = sigaction(SIGUSR1, &act, NULL); + if (rc) + return p; + + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) + varray[i][j] = rand(); + + rc = preempt_vmx(varray, &threads_starting, &running); + + return (void *) rc; +} + +int test_signal_vmx(void) +{ + int i, j, rc, threads; + void *rc_p; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + printf("\tWaiting for %d workers to start... %d", threads, threads_starting); + while (threads_starting) { + asm volatile("": : :"memory"); + usleep(1000); + printf(", %d", threads_starting); + } + printf(" ...done\n"); + + printf("\tSending signals to all threads %d times...", ITERATIONS); + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < threads; j++) { + pthread_kill(tids[j], SIGUSR1); + } + sleep(1); + } + printf("done\n"); + + printf("\tKilling workers..."); + running = 0; + for (i = 0; i < threads; i++) { + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why signal_vmx + * returned + */ + if ((long) rc_p || bad_context) + printf("oops\n"); + if (bad_context) + fprintf(stderr, "\t!! bad_context is true\n"); + FAIL_IF((long) rc_p || bad_context); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_signal_vmx, "vmx_signal"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c new file mode 100644 index 000000000000..a017918ee1ca --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c @@ -0,0 +1,91 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers change across a syscall (fork). + */ + +#include <altivec.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "utils.h" + +vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +extern int test_vmx(vector int *varray, pid_t *pid); + +int vmx_syscall(void) +{ + pid_t fork_pid; + int i; + int ret; + int child_ret; + for (i = 0; i < 1000; i++) { + /* test_vmx will fork() */ + ret = test_vmx(varray, &fork_pid); + if (fork_pid == -1) + return -1; + if (fork_pid == 0) + exit(ret); + waitpid(fork_pid, &child_ret, 0); + if (ret || child_ret) + return 1; + } + + return 0; +} + +int test_vmx_syscall(void) +{ + /* + * Setup an environment with much context switching + */ + pid_t pid2; + pid_t pid = fork(); + int ret; + int child_ret; + FAIL_IF(pid == -1); + + pid2 = fork(); + ret = vmx_syscall(); + /* Can't FAIL_IF(pid2 == -1); because we've already forked */ + if (pid2 == -1) { + /* + * Couldn't fork, ensure child_ret is set and is a fail + */ + ret = child_ret = 1; + } else { + if (pid2) + waitpid(pid2, &child_ret, 0); + else + exit(ret); + } + + ret |= child_ret; + + if (pid) + waitpid(pid, &child_ret, 0); + else + exit(ret); + + FAIL_IF(ret || child_ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_vmx_syscall, "vmx_syscall"); + +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c index d86653f282b1..8c54d18b3e9a 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c @@ -40,7 +40,7 @@ void signal_usr1(int signum, siginfo_t *info, void *uc) #ifdef __powerpc64__ ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32); #else - ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL); + ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL); #endif /* Should segv on return becuase of invalid context */ segv_expected = 1; diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config new file mode 100644 index 000000000000..6a8e5a9bfc10 --- /dev/null +++ b/tools/testing/selftests/pstore/config @@ -0,0 +1,4 @@ +CONFIG_MISC_FILESYSTEMS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_PMSG=y +CONFIG_PSTORE_CONSOLE=y diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 844787a0d7be..5eb49b7f864c 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -33,7 +33,7 @@ if grep -Pq '\x00' < $file then print_warning Console output contains nul bytes, old qemu still running? fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags if test -s $1.diags then print_warning Assertion failure in $file $title @@ -64,10 +64,12 @@ then then summary="$summary lockdep: $n_badness" fi - n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1` + n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` if test "$n_stalls" -ne 0 then summary="$summary Stalls: $n_stalls" fi print_warning Summary: $summary +else + rm $1.diags fi diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config new file mode 100644 index 000000000000..db1e11b08c8a --- /dev/null +++ b/tools/testing/selftests/seccomp/config @@ -0,0 +1,2 @@ +CONFIG_SECCOMP=y +CONFIG_SECCOMP_FILTER=y diff --git a/tools/testing/selftests/static_keys/config b/tools/testing/selftests/static_keys/config new file mode 100644 index 000000000000..d538fb774b96 --- /dev/null +++ b/tools/testing/selftests/static_keys/config @@ -0,0 +1 @@ +CONFIG_TEST_STATIC_KEYS=m diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 72cacf5383dd..2b361b830395 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -153,7 +153,7 @@ int main(void) alarmcount = 0; if (timer_create(alarm_clock_id, &se, &tm1) == -1) { - printf("timer_create failled, %s unspported?\n", + printf("timer_create failed, %s unsupported?\n", clockstring(alarm_clock_id)); break; } diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config new file mode 100644 index 000000000000..784ed8416324 --- /dev/null +++ b/tools/testing/selftests/user/config @@ -0,0 +1 @@ +CONFIG_TEST_USER_COPY=m diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config new file mode 100644 index 000000000000..698c7ed28a26 --- /dev/null +++ b/tools/testing/selftests/vm/config @@ -0,0 +1 @@ +CONFIG_USERFAULTFD=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d0c473f65850..b47ebd170690 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,15 +4,16 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall -TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ + check_initial_reg_state sigreturn ldt_gdt iopl +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ - ldt_gdt \ vdso_restorer TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) -BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) +BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) CFLAGS := -O2 -g -std=gnu99 -pthread -Wall @@ -40,7 +41,7 @@ clean: $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm -$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c +$(TARGETS_C_64BIT_ALL:%=%_64): %_64: %.c $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl # x86_64 users should be encouraged to install 32-bit libraries @@ -65,3 +66,9 @@ endif sysret_ss_attrs_64: thunks.S ptrace_syscall_32: raw_syscall_helper_32.S test_syscall_vdso_32: thunks_32.S + +# check_initial_reg_state is special: it needs a custom entry, and it +# needs to be static so that its interpreter doesn't destroy its initial +# state. +check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static +check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c new file mode 100644 index 000000000000..6aaed9b85baf --- /dev/null +++ b/tools/testing/selftests/x86/check_initial_reg_state.c @@ -0,0 +1,109 @@ +/* + * check_initial_reg_state.c - check that execve sets the correct state + * Copyright (c) 2014-2016 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include <stdio.h> + +unsigned long ax, bx, cx, dx, si, di, bp, sp, flags; +unsigned long r8, r9, r10, r11, r12, r13, r14, r15; + +asm ( + ".pushsection .text\n\t" + ".type real_start, @function\n\t" + ".global real_start\n\t" + "real_start:\n\t" +#ifdef __x86_64__ + "mov %rax, ax\n\t" + "mov %rbx, bx\n\t" + "mov %rcx, cx\n\t" + "mov %rdx, dx\n\t" + "mov %rsi, si\n\t" + "mov %rdi, di\n\t" + "mov %rbp, bp\n\t" + "mov %rsp, sp\n\t" + "mov %r8, r8\n\t" + "mov %r9, r9\n\t" + "mov %r10, r10\n\t" + "mov %r11, r11\n\t" + "mov %r12, r12\n\t" + "mov %r13, r13\n\t" + "mov %r14, r14\n\t" + "mov %r15, r15\n\t" + "pushfq\n\t" + "popq flags\n\t" +#else + "mov %eax, ax\n\t" + "mov %ebx, bx\n\t" + "mov %ecx, cx\n\t" + "mov %edx, dx\n\t" + "mov %esi, si\n\t" + "mov %edi, di\n\t" + "mov %ebp, bp\n\t" + "mov %esp, sp\n\t" + "pushfl\n\t" + "popl flags\n\t" +#endif + "jmp _start\n\t" + ".size real_start, . - real_start\n\t" + ".popsection"); + +int main() +{ + int nerrs = 0; + + if (sp == 0) { + printf("[FAIL]\tTest was built incorrectly\n"); + return 1; + } + + if (ax || bx || cx || dx || si || di || bp +#ifdef __x86_64__ + || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15 +#endif + ) { + printf("[FAIL]\tAll GPRs except SP should be 0\n"); +#define SHOW(x) printf("\t" #x " = 0x%lx\n", x); + SHOW(ax); + SHOW(bx); + SHOW(cx); + SHOW(dx); + SHOW(si); + SHOW(di); + SHOW(bp); + SHOW(sp); +#ifdef __x86_64__ + SHOW(r8); + SHOW(r9); + SHOW(r10); + SHOW(r11); + SHOW(r12); + SHOW(r13); + SHOW(r14); + SHOW(r15); +#endif + nerrs++; + } else { + printf("[OK]\tAll GPRs except SP are 0\n"); + } + + if (flags != 0x202) { + printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags); + nerrs++; + } else { + printf("[OK]\tFLAGS is 0x202\n"); + } + + return nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c new file mode 100644 index 000000000000..c496ca97bc18 --- /dev/null +++ b/tools/testing/selftests/x86/iopl.c @@ -0,0 +1,135 @@ +/* + * iopl.c - Test case for a Linux on Xen 64-bit bug + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdbool.h> +#include <sched.h> +#include <sys/io.h> + +static int nerrs = 0; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +int main(void) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); + + /* Probe for iopl support. Note that iopl(0) works even as nonroot. */ + if (iopl(3) != 0) { + printf("[OK]\tiopl(3) failed (%d) -- try running as root\n", + errno); + return 0; + } + + /* Restore our original state prior to starting the test. */ + if (iopl(0) != 0) + err(1, "iopl(0)"); + + pid_t child = fork(); + if (child == -1) + err(1, "fork"); + + if (child == 0) { + printf("\tchild: set IOPL to 3\n"); + if (iopl(3) != 0) + err(1, "iopl"); + + printf("[RUN]\tchild: write to 0x80\n"); + asm volatile ("outb %%al, $0x80" : : "a" (0)); + + return 0; + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tparent: write to 0x80 (should fail)\n"); + + sethandler(SIGSEGV, sigsegv, 0); + if (sigsetjmp(jmpbuf, 1) != 0) { + printf("[OK]\twrite was denied\n"); + } else { + asm volatile ("outb %%al, $0x80" : : "a" (0)); + printf("[FAIL]\twrite was allowed\n"); + nerrs++; + } + + /* Test the capability checks. */ + printf("\tiopl(3)\n"); + if (iopl(3) != 0) + err(1, "iopl(3)"); + + printf("\tDrop privileges\n"); + if (setresuid(1, 1, 1) != 0) { + printf("[WARN]\tDropping privileges failed\n"); + goto done; + } + + printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n"); + if (iopl(3) != 0) { + printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n"); + nerrs++; + } + + printf("[RUN]\tiopl(0) unprivileged\n"); + if (iopl(0) != 0) { + printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n"); + nerrs++; + } + + printf("[RUN]\tiopl(3) unprivileged\n"); + if (iopl(3) == 0) { + printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n"); + nerrs++; + } else { + printf("[OK]\tFailed as expected\n"); + } + +done: + return nerrs ? 1 : 0; +} + diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 5105b49cd8aa..421456784bc6 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -103,6 +103,17 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), err(1, "sigaction"); } +static void setsigign(int sig, int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = (void *)SIG_IGN; + sa.sa_flags = flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + static void clearhandler(int sig) { struct sigaction sa; @@ -187,7 +198,7 @@ static void test_ptrace_syscall_restart(void) printf("[RUN]\tSYSEMU\n"); if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) - err(1, "PTRACE_SYSCALL"); + err(1, "PTRACE_SYSEMU"); wait_trap(chld); if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) @@ -218,7 +229,7 @@ static void test_ptrace_syscall_restart(void) err(1, "PTRACE_SETREGS"); if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) - err(1, "PTRACE_SYSCALL"); + err(1, "PTRACE_SYSEMU"); wait_trap(chld); if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) @@ -250,7 +261,7 @@ static void test_ptrace_syscall_restart(void) err(1, "PTRACE_SETREGS"); if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) - err(1, "PTRACE_SYSCALL"); + err(1, "PTRACE_SYSEMU"); wait_trap(chld); if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) @@ -277,6 +288,119 @@ static void test_ptrace_syscall_restart(void) } } +static void test_restart_under_ptrace(void) +{ + printf("[RUN]\tkernel syscall restart under ptrace\n"); + pid_t chld = fork(); + if (chld < 0) + err(1, "fork"); + + if (chld == 0) { + if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) + err(1, "PTRACE_TRACEME"); + + printf("\tChild will take a nap until signaled\n"); + setsigign(SIGUSR1, SA_RESTART); + raise(SIGSTOP); + + syscall(SYS_pause, 0, 0, 0, 0, 0, 0); + _exit(0); + } + + int status; + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + struct user_regs_struct regs; + + printf("[RUN]\tSYSCALL\n"); + if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0) + err(1, "PTRACE_SYSCALL"); + wait_trap(chld); + + /* We should be stopped at pause(2) entry. */ + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_pause || + regs.user_arg0 != 0 || regs.user_arg1 != 0 || + regs.user_arg2 != 0 || regs.user_arg3 != 0 || + regs.user_arg4 != 0 || regs.user_arg5 != 0) { + printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tInitial nr and args are correct\n"); + } + + /* Interrupt it. */ + kill(chld, SIGUSR1); + + /* Advance. We should be stopped at exit. */ + printf("[RUN]\tSYSCALL\n"); + if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0) + err(1, "PTRACE_SYSCALL"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_pause || + regs.user_arg0 != 0 || regs.user_arg1 != 0 || + regs.user_arg2 != 0 || regs.user_arg3 != 0 || + regs.user_arg4 != 0 || regs.user_arg5 != 0) { + printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n", + (long)regs.user_ax); + } + + /* Poke the regs back in. This must not break anything. */ + if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_SETREGS"); + + /* Catch the (ignored) SIGUSR1. */ + if (ptrace(PTRACE_CONT, chld, 0, 0) != 0) + err(1, "PTRACE_CONT"); + if (waitpid(chld, &status, 0) != chld) + err(1, "waitpid"); + if (!WIFSTOPPED(status)) { + printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status); + nerrs++; + } else { + printf("[OK]\tChild got SIGUSR1\n"); + } + + /* The next event should be pause(2) again. */ + printf("[RUN]\tStep again\n"); + if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0) + err(1, "PTRACE_SYSCALL"); + wait_trap(chld); + + /* We should be stopped at pause(2) entry. */ + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_pause || + regs.user_arg0 != 0 || regs.user_arg1 != 0 || + regs.user_arg2 != 0 || regs.user_arg3 != 0 || + regs.user_arg4 != 0 || regs.user_arg5 != 0) { + printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tpause(2) restarted correctly\n"); + } + + /* Kill it. */ + kill(chld, SIGKILL); + if (waitpid(chld, &status, 0) != chld) + err(1, "waitpid"); +} + int main() { printf("[RUN]\tCheck int80 return regs\n"); @@ -290,5 +414,7 @@ int main() test_ptrace_syscall_restart(); + test_restart_under_ptrace(); + return 0; } diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index b5aa1bab7416..8a577e7070c6 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -54,6 +54,37 @@ #include <sys/ptrace.h> #include <sys/user.h> +/* Pull in AR_xyz defines. */ +typedef unsigned int u32; +typedef unsigned short u16; +#include "../../../../arch/x86/include/asm/desc_defs.h" + +/* + * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc + * headers. + */ +#ifdef __x86_64__ +/* + * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on + * kernels that save SS in the sigcontext. All kernels that set + * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp + * regardless of SS (i.e. they implement espfix). + * + * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS + * when delivering a signal that came from 64-bit code. + * + * Sigreturn restores SS as follows: + * + * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || + * saved CS is not 64-bit) + * new SS = saved SS (will fail IRET and signal if invalid) + * else + * new SS = a flat 32-bit data segment + */ +#define UC_SIGCONTEXT_SS 0x2 +#define UC_STRICT_RESTORE_SS 0x4 +#endif + /* * In principle, this test can run on Linux emulation layers (e.g. * Illumos "LX branded zones"). Solaris-based kernels reserve LDT @@ -267,6 +298,9 @@ static gregset_t initial_regs, requested_regs, resulting_regs; /* Instructions for the SIGUSR1 handler. */ static volatile unsigned short sig_cs, sig_ss; static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; +#ifdef __x86_64__ +static volatile sig_atomic_t sig_corrupt_final_ss; +#endif /* Abstractions for some 32-bit vs 64-bit differences. */ #ifdef __x86_64__ @@ -305,9 +339,105 @@ static greg_t *csptr(ucontext_t *ctx) } #endif +/* + * Checks a given selector for its code bitness or returns -1 if it's not + * a usable code segment selector. + */ +int cs_bitness(unsigned short cs) +{ + uint32_t valid = 0, ar; + asm ("lar %[cs], %[ar]\n\t" + "jnz 1f\n\t" + "mov $1, %[valid]\n\t" + "1:" + : [ar] "=r" (ar), [valid] "+rm" (valid) + : [cs] "r" (cs)); + + if (!valid) + return -1; + + bool db = (ar & (1 << 22)); + bool l = (ar & (1 << 21)); + + if (!(ar & (1<<11))) + return -1; /* Not code. */ + + if (l && !db) + return 64; + else if (!l && db) + return 32; + else if (!l && !db) + return 16; + else + return -1; /* Unknown bitness. */ +} + +/* + * Checks a given selector for its code bitness or returns -1 if it's not + * a usable code segment selector. + */ +bool is_valid_ss(unsigned short cs) +{ + uint32_t valid = 0, ar; + asm ("lar %[cs], %[ar]\n\t" + "jnz 1f\n\t" + "mov $1, %[valid]\n\t" + "1:" + : [ar] "=r" (ar), [valid] "+rm" (valid) + : [cs] "r" (cs)); + + if (!valid) + return false; + + if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && + (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) + return false; + + return (ar & AR_P); +} + /* Number of errors in the current test case. */ static volatile sig_atomic_t nerrs; +static void validate_signal_ss(int sig, ucontext_t *ctx) +{ +#ifdef __x86_64__ + bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); + + if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { + printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); + nerrs++; + + /* + * This happens on Linux 4.1. The rest will fail, too, so + * return now to reduce the noise. + */ + return; + } + + /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ + if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { + printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", + sig); + nerrs++; + } + + if (is_valid_ss(*ssptr(ctx))) { + /* + * DOSEMU was written before 64-bit sigcontext had SS, and + * it tries to figure out the signal source SS by looking at + * the physical register. Make sure that keeps working. + */ + unsigned short hw_ss; + asm ("mov %%ss, %0" : "=rm" (hw_ss)); + if (hw_ss != *ssptr(ctx)) { + printf("[FAIL]\tHW SS didn't match saved SS\n"); + nerrs++; + } + } +#endif +} + /* * SIGUSR1 handler. Sets CS and SS as requested and points IP to the * int3 trampoline. Sets SP to a large known value so that we can see @@ -317,6 +447,8 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t*)ctx_void; + validate_signal_ss(sig, ctx); + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); *csptr(ctx) = sig_cs; @@ -334,13 +466,16 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) } /* - * Called after a successful sigreturn. Restores our state so that - * the original raise(SIGUSR1) returns. + * Called after a successful sigreturn (via int3) or from a failed + * sigreturn (directly by kernel). Restores our state so that the + * original raise(SIGUSR1) returns. */ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t*)ctx_void; + validate_signal_ss(sig, ctx); + sig_err = ctx->uc_mcontext.gregs[REG_ERR]; sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; @@ -358,41 +493,62 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); +#ifdef __x86_64__ + if (sig_corrupt_final_ss) { + if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { + printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); + nerrs++; + } else { + /* + * DOSEMU transitions from 32-bit to 64-bit mode by + * adjusting sigcontext, and it requires that this work + * even if the saved SS is bogus. + */ + printf("\tCorrupting SS on return to 64-bit mode\n"); + *ssptr(ctx) = 0; + } + } +#endif + sig_trapped = sig; } -/* - * Checks a given selector for its code bitness or returns -1 if it's not - * a usable code segment selector. - */ -int cs_bitness(unsigned short cs) +#ifdef __x86_64__ +/* Tests recovery if !UC_STRICT_RESTORE_SS */ +static void sigusr2(int sig, siginfo_t *info, void *ctx_void) { - uint32_t valid = 0, ar; - asm ("lar %[cs], %[ar]\n\t" - "jnz 1f\n\t" - "mov $1, %[valid]\n\t" - "1:" - : [ar] "=r" (ar), [valid] "+rm" (valid) - : [cs] "r" (cs)); + ucontext_t *ctx = (ucontext_t*)ctx_void; - if (!valid) - return -1; + if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { + printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); + nerrs++; + return; /* We can't do the rest. */ + } - bool db = (ar & (1 << 22)); - bool l = (ar & (1 << 21)); + ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; + *ssptr(ctx) = 0; - if (!(ar & (1<<11))) - return -1; /* Not code. */ + /* Return. The kernel should recover without sending another signal. */ +} - if (l && !db) - return 64; - else if (!l && db) - return 32; - else if (!l && !db) - return 16; - else - return -1; /* Unknown bitness. */ +static int test_nonstrict_ss(void) +{ + clearhandler(SIGUSR1); + clearhandler(SIGTRAP); + clearhandler(SIGSEGV); + clearhandler(SIGILL); + sethandler(SIGUSR2, sigusr2, 0); + + nerrs = 0; + + printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); + raise(SIGUSR2); + if (!nerrs) + printf("[OK]\tIt worked\n"); + + return nerrs; } +#endif /* Finds a usable code segment of the requested bitness. */ int find_cs(int bitness) @@ -576,6 +732,12 @@ static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) errdesc, strsignal(sig_trapped)); return 0; } else { + /* + * This also implicitly tests UC_STRICT_RESTORE_SS: + * We check that these signals set UC_STRICT_RESTORE_SS and, + * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, + * then we won't get SIGSEGV. + */ printf("[FAIL]\tDid not get SIGSEGV\n"); return 1; } @@ -632,6 +794,14 @@ int main() GDT3(gdt_data16_idx)); } +#ifdef __x86_64__ + /* Nasty ABI case: check SS corruption handling. */ + sig_corrupt_final_ss = 1; + total_nerrs += test_valid_sigreturn(32, false, -1); + total_nerrs += test_valid_sigreturn(32, true, -1); + sig_corrupt_final_ss = 0; +#endif + /* * We're done testing valid sigreturn cases. Now we test states * for which sigreturn itself will succeed but the subsequent @@ -680,5 +850,9 @@ int main() if (gdt_npdata32_idx) test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); +#ifdef __x86_64__ + total_nerrs += test_nonstrict_ss(); +#endif + return total_nerrs ? 1 : 0; } diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c index 60c06af4646a..43fcab367fb0 100644 --- a/tools/testing/selftests/x86/syscall_nt.c +++ b/tools/testing/selftests/x86/syscall_nt.c @@ -17,6 +17,9 @@ #include <stdio.h> #include <unistd.h> +#include <string.h> +#include <signal.h> +#include <err.h> #include <sys/syscall.h> #include <asm/processor-flags.h> @@ -26,6 +29,8 @@ # define WIDTH "l" #endif +static unsigned int nerrs; + static unsigned long get_eflags(void) { unsigned long eflags; @@ -39,16 +44,52 @@ static void set_eflags(unsigned long eflags) : : "rm" (eflags) : "flags"); } -int main() +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) { - printf("[RUN]\tSet NT and issue a syscall\n"); - set_eflags(get_eflags() | X86_EFLAGS_NT); + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void sigtrap(int sig, siginfo_t *si, void *ctx_void) +{ +} + +static void do_it(unsigned long extraflags) +{ + unsigned long flags; + + set_eflags(get_eflags() | extraflags); syscall(SYS_getpid); - if (get_eflags() & X86_EFLAGS_NT) { - printf("[OK]\tThe syscall worked and NT is still set\n"); - return 0; + flags = get_eflags(); + if ((flags & extraflags) == extraflags) { + printf("[OK]\tThe syscall worked and flags are still set\n"); } else { - printf("[FAIL]\tThe syscall worked but NT was cleared\n"); - return 1; + printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n", + flags, extraflags); + nerrs++; } } + +int main(void) +{ + printf("[RUN]\tSet NT and issue a syscall\n"); + do_it(X86_EFLAGS_NT); + + /* + * Now try it again with TF set -- TF forces returns via IRET in all + * cases except non-ptregs-using 64-bit full fast path syscalls. + */ + + sethandler(SIGTRAP, sigtrap, 0); + + printf("[RUN]\tSet NT|TF and issue a syscall\n"); + do_it(X86_EFLAGS_NT | X86_EFLAGS_TF); + + return nerrs == 0 ? 0 : 1; +} diff --git a/tools/testing/selftests/zram/config b/tools/testing/selftests/zram/config new file mode 100644 index 000000000000..e0cc47e2c7e2 --- /dev/null +++ b/tools/testing/selftests/zram/config @@ -0,0 +1,2 @@ +CONFIG_ZSMALLOC=y +CONFIG_ZRAM=m diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h new file mode 100644 index 000000000000..4f93af89ae16 --- /dev/null +++ b/tools/virtio/linux/dma-mapping.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_DMA_MAPPING_H +#define _LINUX_DMA_MAPPING_H + +#ifdef CONFIG_HAS_DMA +# error Virtio userspace code does not support CONFIG_HAS_DMA +#endif + +#define PCI_DMA_BUS_IS_PHYS 1 + +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +#endif diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 5a6016224bb9..e92903fc7113 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -61,6 +61,8 @@ #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1) #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +#define MAX_SWAPFILES_SHIFT 5 +#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT) #define PM_SOFT_DIRTY (1ULL << 55) #define PM_MMAP_EXCLUSIVE (1ULL << 56) #define PM_FILE (1ULL << 61) @@ -73,6 +75,7 @@ #define KPF_BYTES 8 #define PROC_KPAGEFLAGS "/proc/kpageflags" +#define PROC_KPAGECGROUP "/proc/kpagecgroup" /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 @@ -92,7 +95,8 @@ #define KPF_SLOB_FREE 49 #define KPF_SLUB_FROZEN 50 #define KPF_SLUB_DEBUG 51 -#define KPF_FILE 62 +#define KPF_FILE 61 +#define KPF_SWAP 62 #define KPF_MMAP_EXCLUSIVE 63 #define KPF_ALL_BITS ((uint64_t)~0ULL) @@ -146,6 +150,7 @@ static const char * const page_flag_names[] = { [KPF_SLUB_DEBUG] = "E:slub_debug", [KPF_FILE] = "F:file", + [KPF_SWAP] = "w:swap", [KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive", }; @@ -164,7 +169,9 @@ static int opt_raw; /* for kernel developers */ static int opt_list; /* list pages (in ranges) */ static int opt_no_summary; /* don't show summary */ static pid_t opt_pid; /* process to walk */ -const char * opt_file; +const char * opt_file; /* file or directory path */ +static uint64_t opt_cgroup; /* cgroup inode */ +static int opt_list_cgroup;/* list page cgroup */ #define MAX_ADDR_RANGES 1024 static int nr_addr_ranges; @@ -185,6 +192,7 @@ static int page_size; static int pagemap_fd; static int kpageflags_fd; +static int kpagecgroup_fd = -1; static int opt_hwpoison; static int opt_unpoison; @@ -278,6 +286,16 @@ static unsigned long kpageflags_read(uint64_t *buf, return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); } +static unsigned long kpagecgroup_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + if (kpagecgroup_fd < 0) + return pages; + + return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages); +} + static unsigned long pagemap_read(uint64_t *buf, unsigned long index, unsigned long pages) @@ -297,6 +315,10 @@ static unsigned long pagemap_pfn(uint64_t val) return pfn; } +static unsigned long pagemap_swap_offset(uint64_t val) +{ + return val & PM_SWAP ? PM_SWAP_OFFSET(val) : 0; +} /* * page flag names @@ -346,14 +368,15 @@ static char *page_flag_longname(uint64_t flags) */ static void show_page_range(unsigned long voffset, unsigned long offset, - unsigned long size, uint64_t flags) + unsigned long size, uint64_t flags, uint64_t cgroup) { static uint64_t flags0; + static uint64_t cgroup0; static unsigned long voff; static unsigned long index; static unsigned long count; - if (flags == flags0 && offset == index + count && + if (flags == flags0 && cgroup == cgroup0 && offset == index + count && size && voffset == voff + count) { count += size; return; @@ -364,11 +387,14 @@ static void show_page_range(unsigned long voffset, unsigned long offset, printf("%lx\t", voff); if (opt_file) printf("%lu\t", voff); + if (opt_list_cgroup) + printf("@%llu\t", (unsigned long long)cgroup0); printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); } flags0 = flags; + cgroup0= cgroup; index = offset; voff = voffset; count = size; @@ -376,16 +402,18 @@ static void show_page_range(unsigned long voffset, unsigned long offset, static void flush_page_range(void) { - show_page_range(0, 0, 0, 0); + show_page_range(0, 0, 0, 0, 0); } -static void show_page(unsigned long voffset, - unsigned long offset, uint64_t flags) +static void show_page(unsigned long voffset, unsigned long offset, + uint64_t flags, uint64_t cgroup) { if (opt_pid) printf("%lx\t", voffset); if (opt_file) printf("%lu\t", voffset); + if (opt_list_cgroup) + printf("@%llu\t", (unsigned long long)cgroup); printf("%lx\t%s\n", offset, page_flag_name(flags)); } @@ -452,6 +480,8 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) flags |= BIT(SOFTDIRTY); if (pme & PM_FILE) flags |= BIT(FILE); + if (pme & PM_SWAP) + flags |= BIT(SWAP); if (pme & PM_MMAP_EXCLUSIVE) flags |= BIT(MMAP_EXCLUSIVE); @@ -566,23 +596,26 @@ static size_t hash_slot(uint64_t flags) exit(EXIT_FAILURE); } -static void add_page(unsigned long voffset, - unsigned long offset, uint64_t flags, uint64_t pme) +static void add_page(unsigned long voffset, unsigned long offset, + uint64_t flags, uint64_t cgroup, uint64_t pme) { flags = kpageflags_flags(flags, pme); if (!bit_mask_ok(flags)) return; + if (opt_cgroup && cgroup != (uint64_t)opt_cgroup) + return; + if (opt_hwpoison) hwpoison_page(offset); if (opt_unpoison) unpoison_page(offset); if (opt_list == 1) - show_page_range(voffset, offset, 1, flags); + show_page_range(voffset, offset, 1, flags, cgroup); else if (opt_list == 2) - show_page(voffset, offset, flags); + show_page(voffset, offset, flags, cgroup); nr_pages[hash_slot(flags)]++; total_pages++; @@ -595,24 +628,57 @@ static void walk_pfn(unsigned long voffset, uint64_t pme) { uint64_t buf[KPAGEFLAGS_BATCH]; + uint64_t cgi[KPAGEFLAGS_BATCH]; unsigned long batch; unsigned long pages; unsigned long i; + /* + * kpagecgroup_read() reads only if kpagecgroup were opened, but + * /proc/kpagecgroup might even not exist, so it's better to fill + * them with zeros here. + */ + if (count == 1) + cgi[0] = 0; + else + memset(cgi, 0, sizeof cgi); + while (count) { batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); pages = kpageflags_read(buf, index, batch); if (pages == 0) break; + if (kpagecgroup_read(cgi, index, pages) != pages) + fatal("kpagecgroup returned fewer pages than expected"); + for (i = 0; i < pages; i++) - add_page(voffset + i, index + i, buf[i], pme); + add_page(voffset + i, index + i, buf[i], cgi[i], pme); index += pages; count -= pages; } } +static void walk_swap(unsigned long voffset, uint64_t pme) +{ + uint64_t flags = kpageflags_flags(0, pme); + + if (!bit_mask_ok(flags)) + return; + + if (opt_cgroup) + return; + + if (opt_list == 1) + show_page_range(voffset, pagemap_swap_offset(pme), 1, flags, 0); + else if (opt_list == 2) + show_page(voffset, pagemap_swap_offset(pme), flags, 0); + + nr_pages[hash_slot(flags)]++; + total_pages++; +} + #define PAGEMAP_BATCH (64 << 10) static void walk_vma(unsigned long index, unsigned long count) { @@ -632,6 +698,8 @@ static void walk_vma(unsigned long index, unsigned long count) pfn = pagemap_pfn(buf[i]); if (pfn) walk_pfn(index + i, pfn, 1, buf[i]); + if (buf[i] & PM_SWAP) + walk_swap(index + i, buf[i]); } index += pages; @@ -713,10 +781,12 @@ static void usage(void) " -d|--describe flags Describe flags\n" " -a|--addr addr-spec Walk a range of pages\n" " -b|--bits bits-spec Walk pages with specified bits\n" +" -c|--cgroup path|@inode Walk pages within memory cgroup\n" " -p|--pid pid Walk process address space\n" " -f|--file filename Walk file address space\n" " -l|--list Show page details in ranges\n" " -L|--list-each Show page details one by one\n" +" -C|--list-cgroup Show cgroup inode for pages\n" " -N|--no-summary Don't show summary info\n" " -X|--hwpoison hwpoison pages\n" " -x|--unpoison unpoison pages\n" @@ -851,6 +921,7 @@ static void walk_file(const char *name, const struct stat *st) { uint8_t vec[PAGEMAP_BATCH]; uint64_t buf[PAGEMAP_BATCH], flags; + uint64_t cgroup = 0; unsigned long nr_pages, pfn, i; off_t off, end = st->st_size; int fd; @@ -908,12 +979,15 @@ got_sigbus: continue; if (!kpageflags_read(&flags, pfn, 1)) continue; + if (!kpagecgroup_read(&cgroup, pfn, 1)) + fatal("kpagecgroup_read failed"); if (first && opt_list) { first = 0; flush_page_range(); show_file(name, st); } - add_page(off / page_size + i, pfn, flags, buf[i]); + add_page(off / page_size + i, pfn, + flags, cgroup, buf[i]); } } @@ -965,6 +1039,24 @@ static void parse_file(const char *name) opt_file = name; } +static void parse_cgroup(const char *path) +{ + if (path[0] == '@') { + opt_cgroup = parse_number(path + 1); + return; + } + + struct stat st; + + if (stat(path, &st)) + fatal("stat failed: %s: %m\n", path); + + if (!S_ISDIR(st.st_mode)) + fatal("cgroup supposed to be a directory: %s\n", path); + + opt_cgroup = st.st_ino; +} + static void parse_addr_range(const char *optarg) { unsigned long offset; @@ -1088,9 +1180,11 @@ static const struct option opts[] = { { "file" , 1, NULL, 'f' }, { "addr" , 1, NULL, 'a' }, { "bits" , 1, NULL, 'b' }, + { "cgroup" , 1, NULL, 'c' }, { "describe" , 1, NULL, 'd' }, { "list" , 0, NULL, 'l' }, { "list-each" , 0, NULL, 'L' }, + { "list-cgroup", 0, NULL, 'C' }, { "no-summary", 0, NULL, 'N' }, { "hwpoison" , 0, NULL, 'X' }, { "unpoison" , 0, NULL, 'x' }, @@ -1105,7 +1199,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); while ((c = getopt_long(argc, argv, - "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) { + "rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) { switch (c) { case 'r': opt_raw = 1; @@ -1122,6 +1216,12 @@ int main(int argc, char *argv[]) case 'b': parse_bits_mask(optarg); break; + case 'c': + parse_cgroup(optarg); + break; + case 'C': + opt_list_cgroup = 1; + break; case 'd': describe_flags(optarg); exit(0); @@ -1151,10 +1251,15 @@ int main(int argc, char *argv[]) } } + if (opt_cgroup || opt_list_cgroup) + kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY); + if (opt_list && opt_pid) printf("voffset\t"); if (opt_list && opt_file) printf("foffset\t"); + if (opt_list && opt_list_cgroup) + printf("cgroup\t"); if (opt_list == 1) printf("offset\tlen\tflags\n"); if (opt_list == 2) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 86e698d07e20..1889163f2f05 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -135,7 +135,7 @@ static void usage(void) "\nValid debug options (FZPUT may be combined)\n" "a / A Switch on all debug options (=FZUP)\n" "- Switch off all debug options\n" - "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" + "f / F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" "z / Z Redzoning\n" "p / P Poisoning\n" "u / U Tracking\n" |