diff options
Diffstat (limited to 'tools/perf')
32 files changed, 327 insertions, 138 deletions
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 83e2887f91a3..fbbfdc39271d 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -6,12 +6,15 @@ tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c tools/include/asm/bug.h +tools/include/asm-generic/bitops/arch_hweight.h tools/include/asm-generic/bitops/atomic.h +tools/include/asm-generic/bitops/const_hweight.h tools/include/asm-generic/bitops/__ffs.h tools/include/asm-generic/bitops/__fls.h tools/include/asm-generic/bitops/find.h tools/include/asm-generic/bitops/fls64.h tools/include/asm-generic/bitops/fls.h +tools/include/asm-generic/bitops/hweight.h tools/include/asm-generic/bitops.h tools/include/linux/bitops.h tools/include/linux/compiler.h @@ -19,6 +22,8 @@ tools/include/linux/export.h tools/include/linux/hash.h tools/include/linux/log2.h tools/include/linux/types.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h @@ -29,6 +34,7 @@ include/linux/list.h include/linux/hash.h include/linux/stringify.h lib/find_next_bit.c +lib/hweight.c lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 67a03a825b3c..aa6a50447c32 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -232,12 +232,15 @@ LIB_H += ../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h LIB_H += ../include/linux/bitops.h +LIB_H += ../include/asm-generic/bitops/arch_hweight.h LIB_H += ../include/asm-generic/bitops/atomic.h +LIB_H += ../include/asm-generic/bitops/const_hweight.h LIB_H += ../include/asm-generic/bitops/find.h LIB_H += ../include/asm-generic/bitops/fls64.h LIB_H += ../include/asm-generic/bitops/fls.h LIB_H += ../include/asm-generic/bitops/__ffs.h LIB_H += ../include/asm-generic/bitops/__fls.h +LIB_H += ../include/asm-generic/bitops/hweight.h LIB_H += ../include/asm-generic/bitops.h LIB_H += ../include/linux/compiler.h LIB_H += ../include/linux/log2.h @@ -255,7 +258,6 @@ LIB_H += util/include/linux/linkage.h LIB_H += util/include/asm/asm-offsets.h LIB_H += ../include/asm/bug.h LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/hweight.h LIB_H += util/include/asm/swab.h LIB_H += util/include/asm/system.h LIB_H += util/include/asm/uaccess.h @@ -462,10 +464,12 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o # Benchmark modules BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(RAW_ARCH),x86_64) +ifeq ($(ARCH), x86) +ifeq ($(IS_64_BIT), 1) BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif +endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o @@ -743,6 +747,9 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3bb50eac5542..0c370f81e002 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -103,7 +103,7 @@ static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc) return NULL; } - result = dwarf_cfi_addrframe(cfi, pc, &frame); + result = dwarf_cfi_addrframe(cfi, pc-bias, &frame); if (result) { pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); return NULL; @@ -128,7 +128,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) return NULL; } - result = dwarf_cfi_addrframe(cfi, pc, &frame); + result = dwarf_cfi_addrframe(cfi, pc-bias, &frame); if (result) { pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); return NULL; @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) * yet used) * -1 in case of errors */ -static int check_return_addr(struct dso *dso, Dwarf_Addr pc) +static int check_return_addr(struct dso *dso, u64 map_start, Dwarf_Addr pc) { int rc = -1; Dwfl *dwfl; @@ -155,6 +155,7 @@ static int check_return_addr(struct dso *dso, Dwarf_Addr pc) Dwarf_Addr start = pc; Dwarf_Addr end = pc; bool signalp; + const char *exec_file = dso->long_name; dwfl = dso->dwfl; @@ -165,8 +166,10 @@ static int check_return_addr(struct dso *dso, Dwarf_Addr pc) return -1; } - if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) { - pr_debug("dwfl_report_offline() failed %s\n", + mod = dwfl_report_elf(dwfl, exec_file, exec_file, -1, + map_start, false); + if (!mod) { + pr_debug("dwfl_report_elf() failed %s\n", dwarf_errmsg(-1)); /* * We normally cache the DWARF debug info and never @@ -256,10 +259,10 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) return skip_slot; } - rc = check_return_addr(dso, ip); + rc = check_return_addr(dso, al.map->start, ip); - pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n", - dso->long_name, chain->nr, ip, rc); + pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n", + dso->long_name, al.sym->name, ip, rc); if (rc == 0) { /* diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 07a8d7646a15..005cc283790c 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -19,12 +19,12 @@ #include <stdlib.h> #include <signal.h> #include <sys/wait.h> -#include <linux/unistd.h> #include <string.h> #include <errno.h> #include <assert.h> #include <sys/time.h> #include <sys/types.h> +#include <sys/syscall.h> #include <pthread.h> diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e7417fe97a97..747f86103599 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -232,7 +232,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1ce425d101a9..1fd96c13f199 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -545,6 +545,42 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, return __hist_entry__cmp_compute(p_left, p_right, c); } +static int64_t +hist_entry__cmp_nop(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int64_t +hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) +{ + if (sort_compute) + return 0; + + if (left->stat.period == right->stat.period) + return 0; + return left->stat.period > right->stat.period ? 1 : -1; +} + +static int64_t +hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); +} + +static int64_t +hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); +} + +static int64_t +hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); +} + static void insert_hist_entry_by_compute(struct rb_root *root, struct hist_entry *he, int c) @@ -605,7 +641,7 @@ static void hists__process(struct hists *hists) hists__precompute(hists); hists__compute_resort(hists); } else { - hists__output_resort(hists); + hists__output_resort(hists, NULL); } hists__fprintf(hists, true, 0, 0, 0, stdout); @@ -1038,27 +1074,35 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->header = hpp__header; fmt->width = hpp__width; fmt->entry = hpp__entry_global; + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; /* TODO more colors */ switch (idx) { case PERF_HPP_DIFF__BASELINE: fmt->color = hpp__color_baseline; + fmt->sort = hist_entry__cmp_baseline; break; case PERF_HPP_DIFF__DELTA: fmt->color = hpp__color_delta; + fmt->sort = hist_entry__cmp_delta; break; case PERF_HPP_DIFF__RATIO: fmt->color = hpp__color_ratio; + fmt->sort = hist_entry__cmp_ratio; break; case PERF_HPP_DIFF__WEIGHTED_DIFF: fmt->color = hpp__color_wdiff; + fmt->sort = hist_entry__cmp_wdiff; break; default: + fmt->sort = hist_entry__cmp_nop; break; } init_header(d, dfmt); perf_hpp__column_register(fmt); + perf_hpp__register_sort_field(fmt); } static void ui_init(void) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 011195e38f21..198f3c3aff95 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,7 +19,9 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; - const struct option list_options[] = { + bool raw_dump = false; + struct option list_options[] = { + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_END() }; const char * const list_usage[] = { @@ -27,11 +29,18 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); setup_pager(); + if (raw_dump) { + print_events(NULL, true); + return 0; + } + if (argc == 0) { print_events(NULL, false); return 0; @@ -53,8 +62,6 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, false); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, false); - else if (strcmp(argv[i], "--raw-dump") == 0) - print_events(NULL, true); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 39367609c707..072ae8ad67fc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -457,6 +457,19 @@ static void report__collapse_hists(struct report *rep) ui_progress__finish(); } +static void report__output_resort(struct report *rep) +{ + struct ui_progress prog; + struct perf_evsel *pos; + + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); + + evlist__for_each(rep->session->evlist, pos) + hists__output_resort(evsel__hists(pos), &prog); + + ui_progress__finish(); +} + static int __cmd_report(struct report *rep) { int ret; @@ -505,13 +518,20 @@ static int __cmd_report(struct report *rep) if (session_done()) return 0; + /* + * recalculate number of entries after collapsing since it + * might be changed during the collapse phase. + */ + rep->nr_entries = 0; + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (rep->nr_entries == 0) { ui__error("The %s file has no samples!\n", file->path); return 0; } - evlist__for_each(session->evlist, pos) - hists__output_resort(evsel__hists(pos)); + report__output_resort(rep); return report__browse_hists(rep); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0aa7747ff139..616f0fcb4701 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -66,7 +66,6 @@ #include <sys/utsname.h> #include <sys/mman.h> -#include <linux/unistd.h> #include <linux/types.h> static volatile int done; @@ -285,7 +284,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -554,7 +553,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); } static void *display_thread_tui(void *arg) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5d4b039fe1ed..648e31ff4021 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -20,7 +20,7 @@ NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) - ifeq (${IS_X86_64}, 1) + ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index 851cd0172a76..ff95a68741d1 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -1,7 +1,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ +RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ @@ -9,23 +9,23 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/tile.*/tile/ ) # Additional ARCH settings for x86 -ifeq ($(ARCH),i386) - override ARCH := x86 +ifeq ($(RAW_ARCH),i386) + ARCH ?= x86 endif -ifeq ($(ARCH),x86_64) - override ARCH := x86 - IS_X86_64 := 0 - ifeq (, $(findstring m32,$(CFLAGS))) - IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1) - RAW_ARCH := x86_64 +ifeq ($(RAW_ARCH),x86_64) + ARCH ?= x86 + + ifneq (, $(findstring m32,$(CFLAGS))) + RAW_ARCH := x86_32 endif endif -ifeq (${IS_X86_64}, 1) +ARCH ?= $(RAW_ARCH) + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) IS_64_BIT := 1 -else ifeq ($(ARCH),x86) - IS_64_BIT := 0 else - IS_64_BIT := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) + IS_64_BIT := 0 endif diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index a3b13d7dc1d4..6ef68165c9db 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -6,7 +6,6 @@ #include <sys/syscall.h> #include <linux/types.h> #include <linux/perf_event.h> -#include <asm/unistd.h> #if defined(__i386__) #define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index ab28cca2cb97..0bf06bec68c7 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -11,6 +11,9 @@ #include "thread.h" #include "callchain.h" +/* For bsearch. We try to unwind functions in shared object. */ +#include <stdlib.h> + static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -28,7 +31,7 @@ static int init_live_machine(struct machine *machine) mmap_handler, machine, true); } -#define MAX_STACK 6 +#define MAX_STACK 8 static int unwind_entry(struct unwind_entry *entry, void *arg) { @@ -37,6 +40,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) static const char *funcs[MAX_STACK] = { "test__arch_unwind_sample", "unwind_thread", + "compare", + "bsearch", "krava_3", "krava_2", "krava_1", @@ -88,10 +93,37 @@ static int unwind_thread(struct thread *thread) return err; } +static int global_unwind_retval = -INT_MAX; + +__attribute__ ((noinline)) +static int compare(void *p1, void *p2) +{ + /* Any possible value should be 'thread' */ + struct thread *thread = *(struct thread **)p1; + + if (global_unwind_retval == -INT_MAX) + global_unwind_retval = unwind_thread(thread); + + return p1 - p2; +} + __attribute__ ((noinline)) static int krava_3(struct thread *thread) { - return unwind_thread(thread); + struct thread *array[2] = {thread, thread}; + void *fp = &bsearch; + /* + * make _bsearch a volatile function pointer to + * prevent potential optimization, which may expand + * bsearch and call compare directly from this function, + * instead of libc shared object. + */ + void *(*volatile _bsearch)(void *, void *, size_t, + size_t, int (*)(void *, void *)); + + _bsearch = fp; + _bsearch(array, &thread, 2, sizeof(struct thread **), compare); + return global_unwind_retval; } __attribute__ ((noinline)) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 614d5c4978ab..8d110dec393e 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -187,7 +187,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", @@ -454,12 +454,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) * 30.00% 10.00% perf perf [.] cmd_record * 20.00% 0.00% bash libc [.] malloc * 10.00% 10.00% bash [kernel] [k] page_fault - * 10.00% 10.00% perf [kernel] [k] schedule - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% bash bash [.] xmalloc * 10.00% 10.00% perf [kernel] [k] page_fault - * 10.00% 10.00% perf libc [.] free * 10.00% 10.00% perf libc [.] malloc - * 10.00% 10.00% bash bash [.] xmalloc + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% perf libc [.] free + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -468,12 +468,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, }; symbol_conf.use_callchain = false; @@ -537,10 +537,13 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * malloc * main * - * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% bash bash [.] xmalloc * | - * --- schedule - * run_command + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc * main * * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open @@ -556,6 +559,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * * 10.00% 10.00% perf libc [.] free * | * --- free @@ -570,15 +579,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * - * 10.00% 10.00% bash bash [.] xmalloc - * | - * --- xmalloc - * malloc - * xmalloc <--- NOTE: there's a cycle - * malloc - * xmalloc - * main - * */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -587,12 +587,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, }; struct callchain_result expected_callchain[] = { { @@ -622,9 +622,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "bash", "main" }, }, }, { - 3, { { "[kernel]", "schedule" }, - { "perf", "run_command" }, - { "perf", "main" }, }, + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, }, { 3, { { "[kernel]", "sys_perf_event_open" }, @@ -638,6 +641,11 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "main" }, }, }, { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { 4, { { "libc", "free" }, { "perf", "cmd_record" }, { "perf", "run_command" }, @@ -649,14 +657,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "run_command" }, { "perf", "main" }, }, }, - { - 6, { { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "bash", "main" }, }, - }, }; symbol_conf.use_callchain = true; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 74f257a81265..59e53db7914c 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -138,7 +138,7 @@ int test__hists_filter(void) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a748f2be1222..f5547610da02 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -152,7 +152,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -252,7 +252,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -306,7 +306,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -384,7 +384,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -487,7 +487,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e6bb04b5b09b..788506eef567 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -550,7 +550,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = !!rb_next(node); + need_percent = node && rb_next(node); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index dc0d095f318c..482adae3cc44 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -204,6 +204,9 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, if (ret) return ret; + if (a->thread != b->thread || !symbol_conf.use_callchain) + return 0; + ret = b->callchain->max_depth - a->callchain->max_depth; } return ret; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 2f612562978c..3c38f25b1695 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,5 +1,8 @@ #include <signal.h> #include <stdbool.h> +#ifdef HAVE_BACKTRACE_SUPPORT +#include <execinfo.h> +#endif #include "../../util/cache.h" #include "../../util/debug.h" @@ -88,6 +91,25 @@ int ui__getch(int delay_secs) return SLkp_getkey(); } +#ifdef HAVE_BACKTRACE_SUPPORT +static void ui__signal_backtrace(int sig) +{ + void *stackdump[32]; + size_t size; + + ui__exit(false); + psignal(sig, "perf"); + + printf("-------- backtrace --------\n"); + size = backtrace(stackdump, ARRAY_SIZE(stackdump)); + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); + + exit(0); +} +#else +# define ui__signal_backtrace ui__signal +#endif + static void ui__signal(int sig) { ui__exit(false); @@ -122,8 +144,8 @@ int ui__init(void) ui_browser__init(); tui_progress__init(); - signal(SIGSEGV, ui__signal); - signal(SIGFPE, ui__signal); + signal(SIGSEGV, ui__signal_backtrace); + signal(SIGFPE, ui__signal_backtrace); signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0784a9420528..cadbdc90a5cb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -116,11 +116,6 @@ struct annotation { struct annotated_source *src; }; -struct sannotation { - struct annotation annotation; - struct symbol symbol; -}; - static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { return (((void *)¬es->src->histograms) + @@ -129,8 +124,7 @@ static inline struct sym_hist *annotation__histogram(struct annotation *notes, i static inline struct annotation *symbol__annotation(struct symbol *sym) { - struct sannotation *a = container_of(sym, struct sannotation, symbol); - return &a->annotation; + return (void *)sym - symbol_conf.priv_size; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 5cf9e1b5989d..d04d770d90f6 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -71,7 +71,9 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +#ifndef __UCLIBC__ /* Matches the libc/libbsd function attribute so we declare this unconditionally: */ extern size_t strlcpy(char *dest, const char *src, size_t size); +#endif #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 64b377e591e4..14e7a123d43b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -841,3 +841,33 @@ char *callchain_list__sym_name(struct callchain_list *cl, return bf; } + +static void free_callchain_node(struct callchain_node *node) +{ + struct callchain_list *list, *tmp; + struct callchain_node *child; + struct rb_node *n; + + list_for_each_entry_safe(list, tmp, &node->val, list) { + list_del(&list->list); + free(list); + } + + n = rb_first(&node->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + n = rb_next(n); + rb_erase(&child->rb_node_in, &node->rb_root_in); + + free_callchain_node(child); + free(child); + } +} + +void free_callchain(struct callchain_root *root) +{ + if (!symbol_conf.use_callchain) + return; + + free_callchain_node(&root->node); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index dbc08cf5f970..c0ec1acc38e4 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -198,4 +198,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); +void free_callchain(struct callchain_root *root); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6e88b9e395df..182395546ddc 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -6,6 +6,7 @@ #include "evlist.h" #include "evsel.h" #include "annotate.h" +#include "ui/progress.h" #include <math.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -303,7 +304,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, size_t callchain_size = 0; struct hist_entry *he; - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + if (symbol_conf.use_callchain) callchain_size = sizeof(struct callchain_root); he = zalloc(sizeof(*he) + callchain_size); @@ -736,7 +737,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &callchain_cursor, sample->period); + hist_entry__append_callchain(he, sample); /* * We need to re-initialize the cursor since callchain_append() @@ -809,7 +810,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &cursor, sample->period); + if (symbol_conf.use_callchain) + callchain_append(he->callchain, &cursor, sample->period); return 0; } @@ -945,6 +947,7 @@ void hist_entry__free(struct hist_entry *he) zfree(&he->mem_info); zfree(&he->stat_acc); free_srcline(he->srcline); + free_callchain(he->callchain); free(he); } @@ -987,6 +990,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, else p = &(*p)->rb_right; } + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); @@ -1024,7 +1028,10 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (!sort__need_collapse) return; + hists->nr_entries = 0; + root = hists__get_rotate_entries_in(hists); + next = rb_first(root); while (next) { @@ -1119,7 +1126,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists) +void hists__output_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; @@ -1148,6 +1155,9 @@ void hists__output_resort(struct hists *hists) if (!n->filtered) hists__calc_col_len(hists, n); + + if (prog) + ui_progress__update(prog, 1); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d0ef9a19a744..46bd50344f85 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -121,7 +121,7 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); -void hists__output_resort(struct hists *hists); +void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); diff --git a/tools/perf/util/hweight.c b/tools/perf/util/hweight.c deleted file mode 100644 index 5c1d0d099f0d..000000000000 --- a/tools/perf/util/hweight.c +++ /dev/null @@ -1,31 +0,0 @@ -#include <linux/bitops.h> - -/** - * hweightN - returns the hamming weight of a N-bit word - * @x: the word to weigh - * - * The Hamming Weight of a number is the total number of bits set in it. - */ - -unsigned int hweight32(unsigned int w) -{ - unsigned int res = w - ((w >> 1) & 0x55555555); - res = (res & 0x33333333) + ((res >> 2) & 0x33333333); - res = (res + (res >> 4)) & 0x0F0F0F0F; - res = res + (res >> 8); - return (res + (res >> 16)) & 0x000000FF; -} - -unsigned long hweight64(__u64 w) -{ -#if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); -#elif BITS_PER_LONG == 64 - __u64 res = w - ((w >> 1) & 0x5555555555555555ul); - res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); - res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; - res = res + (res >> 8); - res = res + (res >> 16); - return (res + (res >> 32)) & 0x00000000000000FFul; -#endif -} diff --git a/tools/perf/util/include/asm/hweight.h b/tools/perf/util/include/asm/hweight.h deleted file mode 100644 index 36cf26d434a5..000000000000 --- a/tools/perf/util/include/asm/hweight.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef PERF_HWEIGHT_H -#define PERF_HWEIGHT_H - -#include <linux/types.h> -unsigned int hweight32(unsigned int w); -unsigned long hweight64(__u64 w); - -#endif /* PERF_HWEIGHT_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 94de3e48b490..1bca3a9f2b16 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -389,7 +389,6 @@ static struct thread *__machine__findnew_thread(struct machine *machine, if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, &machine->threads); - machine->last_match = th; /* * We have to initialize map_groups separately @@ -400,9 +399,12 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { + rb_erase(&th->rb_node, &machine->threads); thread__delete(th); return NULL; } + + machine->last_match = th; } return th; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28eb1417cb2a..94a717bf007d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -495,9 +495,11 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found.\n", + pr_warning("Probe point '%s' not found in debuginfo.\n", synthesize_perf_probe_point(&pev->point)); - return -ENOENT; + if (need_dwarf) + return -ENOENT; + return 0; } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); @@ -2050,9 +2052,11 @@ static int write_probe_trace_event(int fd, struct probe_trace_event *tev) pr_debug("Writing event: %s\n", buf); if (!probe_event_dry_run) { ret = write(fd, buf, strlen(buf)); - if (ret <= 0) + if (ret <= 0) { + ret = -errno; pr_warning("Failed to write event: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); + } } free(buf); return ret; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c7918f83b300..b5247d777f0e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -989,8 +989,24 @@ static int debuginfo__find_probes(struct debuginfo *dbg, int ret = 0; #if _ELFUTILS_PREREQ(0, 142) + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + /* Get the call frame information from this dwarf */ - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg)); + elf = dwarf_getelf(dbg->dbg); + if (elf == NULL) + return -EINVAL; + + if (gelf_getehdr(elf, &ehdr) == NULL) + return -EINVAL; + + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && + shdr.sh_type == SHT_PROGBITS) { + pf->cfi = dwarf_getcfi_elf(elf); + } else { + pf->cfi = dwarf_getcfi(dbg->dbg); + } #endif off = 0; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 16a475a7d492..6c6a6953fa93 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,7 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c -util/hweight.c +../../lib/hweight.c util/thread_map.c util/util.c util/xyarray.c diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 371219a6daf1..6edf535f65c2 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -185,6 +185,28 @@ static u64 elf_section_offset(int fd, const char *name) return offset; } +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int elf_is_exec(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + int retval = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out; + + retval = (ehdr.e_type == ET_EXEC); + +out: + elf_end(elf); + pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); + return retval; +} +#endif + struct table_entry { u32 start_ip_offset; u32 fde_offset; @@ -322,8 +344,12 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, #ifndef NO_LIBUNWIND_DEBUG_FRAME /* Check the .debug_frame section for unwinding info */ if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + int fd = dso__data_fd(map->dso, ui->machine); + int is_exec = elf_is_exec(fd, map->dso->name); + unw_word_t base = is_exec ? 0 : map->start; + memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name, + if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name, map->start, map->end)) return dwarf_search_unwind_table(as, ip, &di, pi, need_unwind_info, arg); |