diff options
author | Kan Liang <kan.liang@intel.com> | 2015-01-05 19:23:04 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-18 17:16:17 +0100 |
commit | aad2b21c151273fa7abc419dac51a980eff1dd17 (patch) | |
tree | 6408263bd2daf71567178e4fe07d094de934414c | |
parent | perf/x86/intel: Expose LBR callstack to user space tooling (diff) | |
download | linux-aad2b21c151273fa7abc419dac51a980eff1dd17.tar.xz linux-aad2b21c151273fa7abc419dac51a980eff1dd17.zip |
perf tools: Enable LBR call stack support
Currently, there are two call chain recording options, fp and dwarf.
Haswell has a new feature that utilizes the existing LBR facility to
record call chains. Kernel side LBR support code provides this as a
third option to record call chains. This patch enables the lbr call
stack support on the tooling side.
LBR call stack has some limitations:
- It reuses current LBR facility, so LBR call stack and branch record
can not be enabled at the same time.
- It is only available for user-space callchains.
However, it also offers some advantages:
- LBR call stack can work on user apps which don't have frame-pointers
or dwarf debug info compiled. It is a good alternative when nothing
else works.
Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Jacob Shin <jacob.w.shin@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rodrigo Campos <rodrigo@sdfg.com.ar>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1420482185-29830-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 8 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 6 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 2 | ||||
-rw-r--r-- | tools/perf/util/callchain.c | 8 | ||||
-rw-r--r-- | tools/perf/util/callchain.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 21 |
6 files changed, 40 insertions, 6 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 31e977459c51..1c7e50f62b1f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -115,13 +115,19 @@ OPTIONS implies -g. Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) as the method to collect + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect the information used to show the call graphs. In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. -q:: --quiet:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 404ab3434052..d0d02a811ecd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -658,7 +658,7 @@ error: static void callchain_debug(void) { - static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); @@ -751,9 +751,9 @@ static struct record record = { #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "fp"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; #endif /* diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2f91094e228b..0ba5f07906fb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep) if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 14e7a123d43b..9f643ee77001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) callchain_param.dump_size = size; } #endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + callchain_param.record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c0ec1acc38e4..6033a0a212ca 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,6 +11,7 @@ enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, CALLCHAIN_DWARF, + CALLCHAIN_LBR, CALLCHAIN_MAX }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ea51a90e20a0..f93e5208c762 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) } static void -perf_evsel__config_callgraph(struct perf_evsel *evsel) +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (!opts->branch_stack) { + if (attr->exclude_user) { + pr_warning("LBR callstack option is only available " + "to get user callchain information. " + "Falling back to framepointers.\n"); + } else { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK; + } + } else + pr_warning("Cannot use LBR callstack with branch stack. " + "Falling back to framepointers.\n"); + } + if (callchain_param.record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); @@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel); + perf_evsel__config_callgraph(evsel, opts); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; |