From cc8b7c2bf553151a579a8009020875faa1d43e29 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Oct 2014 15:26:17 -0300 Subject: perf thread: Adopt resolve_callchain method from machine Shortening function signature lenght too, since a thread's machine can be obtained from thread->mg->machine, no need to pass thread, machine. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5wb6css280ty0cel5p0zo2b1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index c84d3f8dcb75..00229809a904 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -754,8 +754,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || sort__has_parent) { - return machine__resolve_callchain(al->machine, evsel, al->thread, - sample, parent, al, max_stack); + return thread__resolve_callchain(al->thread, evsel, sample, + parent, al, max_stack); } return 0; } -- cgit v1.2.3 From 2989ccaac48f8c3da7f77101bbf98e0ea8773d83 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:23 -0800 Subject: perf callchain: Use a common function to resolve symbol or name Refactor the duplicated code to resolve the symbol name or the address of a symbol into a single function. Used in next patch to add common functionality. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-6-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 17 ----------------- tools/perf/ui/gtk/hists.c | 11 +---------- tools/perf/ui/stdio/hist.c | 23 +++++++++-------------- tools/perf/util/callchain.c | 19 +++++++++++++++++++ tools/perf/util/callchain.h | 3 +++ 5 files changed, 32 insertions(+), 41 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cfb976b3de3a..12c17c5a3d68 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -463,23 +463,6 @@ out: return key; } -static char *callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize, bool show_dso) -{ - int printed; - - if (cl->ms.sym) - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); - else - printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); - - if (show_dso) - scnprintf(bf + printed, bfsize - printed, " %s", - cl->ms.map ? cl->ms.map->dso->short_name : "unknown"); - - return bf; -} - struct callchain_print_arg { /* for hists browser */ off_t row_offset; diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index fc654fb77ace..4b3585eed1e8 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -89,15 +89,6 @@ void perf_gtk__init_hpp(void) perf_gtk__hpp_color_overhead_acc; } -static void callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize) -{ - if (cl->ms.sym) - scnprintf(bf, bfsize, "%s", cl->ms.sym->name); - else - scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); -} - static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, GtkTreeIter *parent, int col, u64 total) { @@ -128,7 +119,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, scnprintf(buf, sizeof(buf), "%5.2f%%", percent); gtk_tree_store_set(store, &iter, 0, buf, -1); - callchain_list__sym_name(chain, buf, sizeof(buf)); + callchain_list__sym_name(chain, buf, sizeof(buf), false); gtk_tree_store_set(store, &iter, col, buf, -1); if (need_new_parent) { diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 15b451acbde6..dfcbc90146ef 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -41,6 +41,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, { int i; size_t ret = 0; + char bf[1024]; ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) { @@ -56,11 +57,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, } else ret += fprintf(fp, "%s", " "); } - if (chain->ms.sym) - ret += fprintf(fp, "%s\n", chain->ms.sym->name); - else - ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); - + fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp); + fputc('\n', fp); return ret; } @@ -168,6 +166,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, struct rb_node *node; int i = 0; int ret = 0; + char bf[1024]; /* * If have one single callchain root, don't bother printing @@ -196,10 +195,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, } else ret += callchain__fprintf_left_margin(fp, left_margin); - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); + ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf), + false)); if (++entries_printed == callchain_param.print_limit) break; @@ -219,6 +216,7 @@ static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node, { struct callchain_list *chain; size_t ret = 0; + char bf[1024]; if (!node) return 0; @@ -229,11 +227,8 @@ static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node, list_for_each_entry(chain, &node->val, list) { if (chain->ip >= PERF_CONTEXT_MAX) continue; - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", - (void *)(long)chain->ip); + ret += fprintf(fp, " %s\n", callchain_list__sym_name(chain, + bf, sizeof(bf), false)); } return ret; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 00229809a904..38da69c8c1ff 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -808,3 +808,22 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * out: return 1; } + +char *callchain_list__sym_name(struct callchain_list *cl, + char *bf, size_t bfsize, bool show_dso) +{ + int printed; + + if (cl->ms.sym) { + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + } else + printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); + + if (show_dso) + scnprintf(bf + printed, bfsize - printed, " %s", + cl->ms.map ? + cl->ms.map->dso->short_name : + "unknown"); + + return bf; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3caccc2c173c..3e1ed15d11f1 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -193,4 +193,7 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, } #endif +char *callchain_list__sym_name(struct callchain_list *cl, + char *bf, size_t bfsize, bool show_dso); + #endif /* __PERF_CALLCHAIN_H */ -- cgit v1.2.3 From 23f0981bbd89fcc1496d0490ec39ca7c91599e32 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:24 -0800 Subject: perf callchain: Enable printing the srcline in the history For lbr-as-callgraph we need to see the line number in the history, because many LBR entries can be in a single function, and just showing the same function name many times is not useful. When the history code is configured to sort by address, also try to resolve the address to a file:srcline and display this in the browser. If that doesn't work still display the address. This can be also useful without LBRs for understanding which call in a large function (or in which inlined function) called something else. Contains fixes from Namhyung Kim v2: Refactor code into common function v3: Fix GTK build v4: Rebase Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-7-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 11 ++++++++++- tools/perf/util/callchain.h | 1 + tools/perf/util/srcline.c | 6 ++++-- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 38da69c8c1ff..b6624aeaaca9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -815,7 +815,16 @@ char *callchain_list__sym_name(struct callchain_list *cl, int printed; if (cl->ms.sym) { - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + if (callchain_param.key == CCKEY_ADDRESS && + cl->ms.map && !cl->srcline) + cl->srcline = get_srcline(cl->ms.map->dso, + map__rip_2objdump(cl->ms.map, + cl->ip)); + if (cl->srcline) + printed = scnprintf(bf, bfsize, "%s %s", + cl->ms.sym->name, cl->srcline); + else + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); } else printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3e1ed15d11f1..3f158474c892 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -70,6 +70,7 @@ extern struct callchain_param callchain_param; struct callchain_list { u64 ip; struct map_symbol ms; + char *srcline; struct list_head list; }; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 77c180637138..ac877f96fed7 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -258,7 +258,7 @@ char *get_srcline(struct dso *dso, unsigned long addr) const char *dso_name; if (!dso->has_srcline) - return SRCLINE_UNKNOWN; + goto out; if (dso->symsrc_filename) dso_name = dso->symsrc_filename; @@ -289,7 +289,9 @@ out: dso->has_srcline = 0; dso__free_a2l(dso); } - return SRCLINE_UNKNOWN; + if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) + return SRCLINE_UNKNOWN; + return srcline; } void free_srcline(char *srcline) -- cgit v1.2.3 From 85c116a6cb91a5c09b7a6c95ffc6a6cbd32cd237 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:27 -0800 Subject: perf callchain: Make get_srcline fall back to sym+offset When the source line is not found fall back to sym + offset. This is generally much more useful than a raw address. For this we need to pass in the symbol from the caller. For some callers it's awkward to compute, so we stay at the old behaviour. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-10-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- tools/perf/util/callchain.c | 3 ++- tools/perf/util/map.c | 2 +- tools/perf/util/sort.c | 6 ++++-- tools/perf/util/srcline.c | 11 +++++++++-- tools/perf/util/util.h | 4 +++- 6 files changed, 20 insertions(+), 8 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e5670f1af737..79999ceaf2be 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1192,7 +1192,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, goto next; offset = start + i; - src_line->path = get_srcline(map->dso, offset); + src_line->path = get_srcline(map->dso, offset, NULL, false); insert_source_line(&tmp_root, src_line); next: diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index b6624aeaaca9..517ed84db97a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -819,7 +819,8 @@ char *callchain_list__sym_name(struct callchain_list *cl, cl->ms.map && !cl->srcline) cl->srcline = get_srcline(cl->ms.map->dso, map__rip_2objdump(cl->ms.map, - cl->ip)); + cl->ip), + cl->ms.sym, false); if (cl->srcline) printed = scnprintf(bf, bfsize, "%s %s", cl->ms.sym->name, cl->srcline); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 040a785c857b..62ca9f2607d5 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -360,7 +360,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr)); + map__rip_2objdump(map, addr), NULL, true); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 82a5596241a7..9139dda9f9a3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -291,7 +291,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) else { struct map *map = left->ms.map; left->srcline = get_srcline(map->dso, - map__rip_2objdump(map, left->ip)); + map__rip_2objdump(map, left->ip), + left->ms.sym, true); } } if (!right->srcline) { @@ -300,7 +301,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) else { struct map *map = right->ms.map; right->srcline = get_srcline(map->dso, - map__rip_2objdump(map, right->ip)); + map__rip_2objdump(map, right->ip), + right->ms.sym, true); } } return strcmp(right->srcline, left->srcline); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index ac877f96fed7..e73b6a5c9e0f 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -8,6 +8,8 @@ #include "util/util.h" #include "util/debug.h" +#include "symbol.h" + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -250,7 +252,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused) */ #define A2L_FAIL_LIMIT 123 -char *get_srcline(struct dso *dso, unsigned long addr) +char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym, + bool show_sym) { char *file = NULL; unsigned line = 0; @@ -289,7 +292,11 @@ out: dso->has_srcline = 0; dso__free_a2l(dso); } - if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) + if (sym) { + if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "", + addr - sym->start) < 0) + return SRCLINE_UNKNOWN; + } else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; return srcline; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 76d23d83eae5..419bee030f83 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -337,8 +337,10 @@ static inline int path__join3(char *bf, size_t size, } struct dso; +struct symbol; -char *get_srcline(struct dso *dso, unsigned long addr); +char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym, + bool show_sym); void free_srcline(char *srcline); int filename__read_int(const char *filename, int *value); -- cgit v1.2.3 From 8b7bad58efb7e3aaff60f7c1fa4361fb8c23181d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:20 -0800 Subject: perf callchain: Support handling complete branch stacks as histograms Currently branch stacks can be only shown as edge histograms for individual branches. I never found this display particularly useful. This implements an alternative mode that creates histograms over complete branch traces, instead of individual branches, similar to how normal callgraphs are handled. This is done by putting it in front of the normal callgraph and then using the normal callgraph histogram infrastructure to unify them. This way in complex functions we can understand the control flow that lead to a particular sample, and may even see some control flow in the caller for short functions. Example (simplified, of course for such simple code this is usually not needed), please run this after the whole patchkit is in, as at this point in the patch order there is no --branch-history, that will be added in a patch after this one: tcall.c: volatile a = 10000, b = 100000, c; __attribute__((noinline)) f2() { c = a / b; } __attribute__((noinline)) f1() { f2(); f2(); } main() { int i; for (i = 0; i < 1000000; i++) f1(); } % perf record -b -g ./tsrc/tcall [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data (~1923 samples) ] % perf report --no-children --branch-history ... 54.91% tcall.c:6 [.] f2 tcall | |--65.53%-- f2 tcall.c:5 | | | |--70.83%-- f1 tcall.c:11 | | f1 tcall.c:10 | | main tcall.c:18 | | main tcall.c:18 | | main tcall.c:17 | | main tcall.c:17 | | f1 tcall.c:13 | | f1 tcall.c:13 | | f2 tcall.c:7 | | f2 tcall.c:5 | | f1 tcall.c:12 | | f1 tcall.c:12 | | f2 tcall.c:7 | | f2 tcall.c:5 | | f1 tcall.c:11 | | | --29.17%-- f1 tcall.c:12 | f1 tcall.c:12 | f2 tcall.c:7 | f2 tcall.c:5 | f1 tcall.c:11 | f1 tcall.c:10 | main tcall.c:18 | main tcall.c:18 | main tcall.c:17 | main tcall.c:17 | f1 tcall.c:13 | f1 tcall.c:13 | f2 tcall.c:7 | f2 tcall.c:5 | f1 tcall.c:12 The default output is unchanged. This is only implemented in perf report, no change to record or anywhere else. This adds the basic code to report: - add a new "branch" option to the -g option parser to enable this mode - when the flag is set include the LBR into the callstack in machine.c. The rest of the history code is unchanged and doesn't know the difference between LBR entry and normal call entry. - detect overlaps with the callchain - remove small loop duplicates in the LBR Current limitations: - The LBR flags (mispredict etc.) are not shown in the history and LBR entries have no special marker. - It would be nice if annotate marked the LBR entries somehow (e.g. with arrows) v2: Various fixes. v3: Merge further patches into this one. Fix white space. v4: Improve manpage. Address review feedback. v5: Rename functions. Better error message without -g. Fix crash without -b. v6: Rebase v7: Rebase. Use NO_ENTRY in memset. v8: Port to latest tip. Move add_callchain_ip to separate patch. Skip initial entries in callchain. Minor cleanups. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 7 +- tools/perf/builtin-report.c | 4 +- tools/perf/util/callchain.c | 4 + tools/perf/util/callchain.h | 1 + tools/perf/util/machine.c | 126 ++++++++++++++++++++++++++++--- tools/perf/util/symbol.h | 3 +- 6 files changed, 132 insertions(+), 13 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..22706beffabc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -159,7 +159,7 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order[,key]]:: +-g [type,min[,limit],order[,key][,branch]]:: --call-graph:: Display call chains using type, min percent threshold, optional print limit and order. @@ -177,6 +177,11 @@ OPTIONS - function: compare on functions - address: compare on individual code addresses + branch can be: + - branch: include last branch information in callgraph + when available. Usually more convenient to use --branch-history + for this. + Default: fractal,0.5,callee,function. --children:: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..410d44fac64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -637,8 +637,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 517ed84db97a..cf524a35cc84 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) callchain_param.key = CCKEY_ADDRESS; return 0; } + if (!strncmp(value, "branch", strlen(value))) { + callchain_param.branch_callstack = 1; + return 0; + } return -1; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3f158474c892..dbc08cf5f970 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -63,6 +63,7 @@ struct callchain_param { sort_chain_func_t sort; enum chain_order order; enum chain_key key; + bool branch_callstack; }; extern struct callchain_param callchain_param; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -12,6 +12,7 @@ #include #include #include "unwind.h" +#include "linux/hash.h" static void dsos__init(struct dsos *dsos) { @@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + if (cpumode == -1) + thread__find_cpumode_addr_location(thread, MAP__FUNCTION, + ip, &al); + else + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, ip, &al); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, return bi; } +#define CHASHSZ 127 +#define CHASHBITS 7 +#define NO_ENTRY 0xff + +#define PERF_MAX_BRANCH_DEPTH 127 + +/* Remove loops. */ +static int remove_loops(struct branch_entry *l, int nr) +{ + int i, j, off; + unsigned char chash[CHASHSZ]; + + memset(chash, NO_ENTRY, sizeof(chash)); + + BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); + + for (i = 0; i < nr; i++) { + int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; + + /* no collision handling for now */ + if (chash[h] == NO_ENTRY) { + chash[h] = i; + } else if (l[chash[h]].from == l[i].from) { + bool is_loop = true; + /* check if it is a real loop */ + off = 0; + for (j = chash[h]; j < i && i + off < nr; j++, off++) + if (l[j].from != l[i + off].from) { + is_loop = false; + break; + } + if (is_loop) { + memmove(l + i, l + i + off, + (nr - (i + off)) * sizeof(*l)); + nr -= off; + } + } + } + return nr; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain, + struct branch_stack *branch, struct symbol **parent, struct addr_location *root_al, int max_stack) @@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, int i; int j; int err; - int skip_idx __maybe_unused; + int skip_idx = -1; + int first_call = 0; + + /* + * Based on DWARF debug information, some architectures skip + * a callchain entry saved by the kernel. + */ + if (chain->nr < PERF_MAX_STACK_DEPTH) + skip_idx = arch_skip_callchain_idx(thread, chain); callchain_cursor_reset(&callchain_cursor); + /* + * Add branches to call stack for easier browsing. This gives + * more context for a sample than just the callers. + * + * This uses individual histograms of paths compared to the + * aggregated histograms the normal LBR mode uses. + * + * Limitations for now: + * - No extra filters + * - No annotations (should annotate somehow) + */ + + if (branch && callchain_param.branch_callstack) { + int nr = min(max_stack, (int)branch->nr); + struct branch_entry be[nr]; + + if (branch->nr > PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted branch chain. skipping...\n"); + goto check_calls; + } + + for (i = 0; i < nr; i++) { + if (callchain_param.order == ORDER_CALLEE) { + be[i] = branch->entries[i]; + /* + * Check for overlap into the callchain. + * The return address is one off compared to + * the branch entry. To adjust for this + * assume the calling instruction is not longer + * than 8 bytes. + */ + if (i == skip_idx || + chain->ips[first_call] >= PERF_CONTEXT_MAX) + first_call++; + else if (be[i].from < chain->ips[first_call] && + be[i].from >= chain->ips[first_call] - 8) + first_call++; + } else + be[i] = branch->entries[branch->nr - i - 1]; + } + + nr = remove_loops(be, nr); + + for (i = 0; i < nr; i++) { + err = add_callchain_ip(thread, parent, root_al, + -1, be[i].to); + if (!err) + err = add_callchain_ip(thread, parent, root_al, + -1, be[i].from); + if (err == -EINVAL) + break; + if (err) + return err; + } + chain_nr -= nr; + } + +check_calls: if (chain->nr > PERF_MAX_STACK_DEPTH) { pr_warning("corrupted callchain. skipping...\n"); return 0; } - /* - * Based on DWARF debug information, some architectures skip - * a callchain entry saved by the kernel. - */ - skip_idx = arch_skip_callchain_idx(thread, chain); - - for (i = 0; i < chain_nr; i++) { + for (i = first_call; i < chain_nr; i++) { u64 ip; if (callchain_param.order == ORDER_CALLEE) @@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, int max_stack) { int ret = thread__resolve_callchain_sample(thread, sample->callchain, + sample->branch_stack, parent, root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0b297c50f9d..9d602e9c6f59 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -102,7 +102,8 @@ struct symbol_conf { demangle, demangle_kernel, filter_relative, - show_hist_headers; + show_hist_headers, + branch_callstack; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3