diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-11-24 05:09:31 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-11-24 05:09:31 +0100 |
commit | 47414424c53a70eceb0fc6e0a35a31a2b763d5b2 (patch) | |
tree | 07979aa784313ba03712df2b85a3b3f71f1733d6 /tools | |
parent | Merge branch 'linus' into perf/core, to pick up fixes (diff) | |
parent | perf sched timehist: Add -V/--cpu-visual option (diff) | |
download | linux-47414424c53a70eceb0fc6e0a35a31a2b763d5b2.tar.xz linux-47414424c53a70eceb0fc6e0a35a31a2b763d5b2.zip |
Merge tag 'perf-core-for-mingo-20161123' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
New tool:
- 'perf sched timehist' provides an analysis of scheduling events.
Example usage:
perf sched record -- sleep 1
perf sched timehist
By default it shows the individual schedule events, including the wait
time (time between sched-out and next sched-in events for the task), the
task scheduling delay (time between wakeup and actually running) and run
time for the task:
time cpu task name wait time sch delay run time
[tid/pid] (msec) (msec) (msec)
-------- ------ ---------------- --------- --------- --------
1.874569 [0011] gcc[31949] 0.014 0.000 1.148
1.874591 [0010] gcc[31951] 0.000 0.000 0.024
1.874603 [0010] migration/10[59] 3.350 0.004 0.011
1.874604 [0011] <idle> 1.148 0.000 0.035
1.874723 [0005] <idle> 0.016 0.000 1.383
1.874746 [0005] gcc[31949] 0.153 0.078 0.022
...
Times are in msec.usec. (David Ahern, Namhyung Kim)
Improvements:
- Make 'perf c2c report' support -f/--force, to allow skipping the
ownership check for root users, for instance, just like the other
tools (Jiri Olsa)
- Allow sorting cachelines by total number of HITMs, in addition to
local and remote numbers (Jiri Olsa)
Fixes:
- Make sure errors aren't suppressed by the TUI reset at the end of
a 'perf c2c report' session (Jiri Olsa)
Infrastructure changes:
- Initial work on having the annotate code better support multiple
architectures, including the ability to cross-annotate, i.e. to
annotate perf.data files collected on an ARM system on a x86_64
workstation (Arnaldo Carvalho de Melo, Ravi Bangoria, Kim Phillips)
- Use USECS_PER_SEC instead of hard coded number in libtraceevent (Steven Rostedt)
- Add retrieval of preempt count and latency flags in libtraceevent (Steven Rostedt)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/lib/traceevent/event-parse.c | 41 | ||||
-rw-r--r-- | tools/lib/traceevent/event-parse.h | 5 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-c2c.txt | 8 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-sched.txt | 66 | ||||
-rw-r--r-- | tools/perf/arch/arm/annotate/instructions.c | 90 | ||||
-rw-r--r-- | tools/perf/arch/x86/annotate/instructions.c | 78 | ||||
-rw-r--r-- | tools/perf/builtin-c2c.c | 80 | ||||
-rw-r--r-- | tools/perf/builtin-sched.c | 914 | ||||
-rw-r--r-- | tools/perf/builtin-top.c | 2 | ||||
-rw-r--r-- | tools/perf/ui/browsers/annotate.c | 2 | ||||
-rw-r--r-- | tools/perf/ui/gtk/annotate.c | 2 | ||||
-rw-r--r-- | tools/perf/util/annotate.c | 251 | ||||
-rw-r--r-- | tools/perf/util/annotate.h | 6 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 6 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel_fprintf.c | 12 | ||||
-rw-r--r-- | tools/perf/util/mem-events.c | 12 | ||||
-rw-r--r-- | tools/perf/util/mem-events.h | 1 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 3 | ||||
-rw-r--r-- | tools/perf/util/symbol_fprintf.c | 11 |
20 files changed, 1406 insertions, 185 deletions
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 664c90c8e22b..14a4f623c1a5 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -33,6 +33,7 @@ #include <stdint.h> #include <limits.h> #include <linux/string.h> +#include <linux/time64.h> #include <netinet/in.h> #include "event-parse.h" @@ -5191,11 +5192,11 @@ struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type } /** - * pevent_data_pid - parse the PID from raw data + * pevent_data_pid - parse the PID from record * @pevent: a handle to the pevent * @rec: the record to parse * - * This returns the PID from a raw data. + * This returns the PID from a record. */ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec) { @@ -5203,6 +5204,32 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec) } /** + * pevent_data_prempt_count - parse the preempt count from the record + * @pevent: a handle to the pevent + * @rec: the record to parse + * + * This returns the preempt count from a record. + */ +int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec) +{ + return parse_common_pc(pevent, rec->data); +} + +/** + * pevent_data_flags - parse the latency flags from the record + * @pevent: a handle to the pevent + * @rec: the record to parse + * + * This returns the latency flags from a record. + * + * Use trace_flag_type enum for the flags (see event-parse.h). + */ +int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec) +{ + return parse_common_flags(pevent, rec->data); +} + +/** * pevent_data_comm_from_pid - return the command line from PID * @pevent: a handle to the pevent * @pid: the PID of the task to search for @@ -5424,8 +5451,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, use_usec_format = is_timestamp_in_us(pevent->trace_clock, use_trace_clock); if (use_usec_format) { - secs = record->ts / NSECS_PER_SEC; - nsecs = record->ts - secs * NSECS_PER_SEC; + secs = record->ts / NSEC_PER_SEC; + nsecs = record->ts - secs * NSEC_PER_SEC; } if (pevent->latency_format) { @@ -5437,10 +5464,10 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, usecs = nsecs; p = 9; } else { - usecs = (nsecs + 500) / NSECS_PER_USEC; + usecs = (nsecs + 500) / NSEC_PER_USEC; /* To avoid usecs larger than 1 sec */ - if (usecs >= 1000000) { - usecs -= 1000000; + if (usecs >= USEC_PER_SEC) { + usecs -= USEC_PER_SEC; secs++; } p = 6; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 9ffde377e89d..7aae746ec2fe 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -172,9 +172,6 @@ struct pevent_plugin_option { #define PEVENT_PLUGIN_OPTIONS_NAME MAKE_STR(PEVENT_PLUGIN_OPTIONS) #define PEVENT_PLUGIN_ALIAS_NAME MAKE_STR(PEVENT_PLUGIN_ALIAS) -#define NSECS_PER_SEC 1000000000ULL -#define NSECS_PER_USEC 1000ULL - enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, @@ -712,6 +709,8 @@ void pevent_data_lat_fmt(struct pevent *pevent, int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type); int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); +int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec); +int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); struct cmdline; struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 21810d711f5f..3f06730c7f47 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -100,6 +100,14 @@ REPORT OPTIONS --show-all:: Show all captured HITM lines, with no regard to HITM % 0.0005 limit. +-f:: +--force:: + Don't do ownership validation. + +-d:: +--display:: + Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default. + C2C RECORD ---------- The perf c2c record command setup options related to HITM cacheline analysis diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 1cc08cc47ac5..fb9e52d65fca 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies) SYNOPSIS -------- [verse] -'perf sched' {record|latency|map|replay|script} +'perf sched' {record|latency|map|replay|script|timehist} DESCRIPTION ----------- -There are five variants of perf sched: +There are several variants of 'perf sched': 'perf sched record <command>' to record the scheduling events of an arbitrary workload. @@ -36,6 +36,30 @@ There are five variants of perf sched: are running on a CPU. A '*' denotes the CPU that had the event, and a dot signals an idle CPU. + 'perf sched timehist' provides an analysis of scheduling events. + + Example usage: + perf sched record -- sleep 1 + perf sched timehist + + By default it shows the individual schedule events, including the wait + time (time between sched-out and next sched-in events for the task), the + task scheduling delay (time between wakeup and actually running) and run + time for the task: + + time cpu task name wait time sch delay run time + [tid/pid] (msec) (msec) (msec) + -------------- ------ -------------------- --------- --------- --------- + 79371.874569 [0011] gcc[31949] 0.014 0.000 1.148 + 79371.874591 [0010] gcc[31951] 0.000 0.000 0.024 + 79371.874603 [0010] migration/10[59] 3.350 0.004 0.011 + 79371.874604 [0011] <idle> 1.148 0.000 0.035 + 79371.874723 [0005] <idle> 0.016 0.000 1.383 + 79371.874746 [0005] gcc[31949] 0.153 0.078 0.022 + ... + + Times are in msec.usec. + OPTIONS ------- -i:: @@ -66,6 +90,44 @@ OPTIONS for 'perf sched map' --color-pids:: Highlight the given pids. +OPTIONS for 'perf sched timehist' +--------------------------------- +-k:: +--vmlinux=<file>:: + vmlinux pathname + +--kallsyms=<file>:: + kallsyms pathname + +-g:: +--no-call-graph:: + Do not display call chains if present. + +--max-stack:: + Maximum number of functions to display in backtrace, default 5. + +-s:: +--summary:: + Show only a summary of scheduling by thread with min, max, and average + run times (in sec) and relative stddev. + +-S:: +--with-summary:: + Show all scheduling events followed by a summary by thread with min, + max, and average run times (in sec) and relative stddev. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + +-V:: +--cpu-visual:: + Show visual aid for sched switches by CPU: 'i' marks idle time, + 's' are scheduler events. + +-w:: +--wakeups:: + Show wakeup events. + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c new file mode 100644 index 000000000000..d67b8aa26274 --- /dev/null +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -0,0 +1,90 @@ +static struct ins arm__instructions[] = { + { .name = "add", .ops = &mov_ops, }, + { .name = "addl", .ops = &mov_ops, }, + { .name = "addq", .ops = &mov_ops, }, + { .name = "addw", .ops = &mov_ops, }, + { .name = "and", .ops = &mov_ops, }, + { .name = "b", .ops = &jump_ops, }, // might also be a call + { .name = "bcc", .ops = &jump_ops, }, + { .name = "bcs", .ops = &jump_ops, }, + { .name = "beq", .ops = &jump_ops, }, + { .name = "bge", .ops = &jump_ops, }, + { .name = "bgt", .ops = &jump_ops, }, + { .name = "bhi", .ops = &jump_ops, }, + { .name = "bl", .ops = &call_ops, }, + { .name = "bls", .ops = &jump_ops, }, + { .name = "blt", .ops = &jump_ops, }, + { .name = "blx", .ops = &call_ops, }, + { .name = "bne", .ops = &jump_ops, }, + { .name = "bts", .ops = &mov_ops, }, + { .name = "call", .ops = &call_ops, }, + { .name = "callq", .ops = &call_ops, }, + { .name = "cmp", .ops = &mov_ops, }, + { .name = "cmpb", .ops = &mov_ops, }, + { .name = "cmpl", .ops = &mov_ops, }, + { .name = "cmpq", .ops = &mov_ops, }, + { .name = "cmpw", .ops = &mov_ops, }, + { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "dec", .ops = &dec_ops, }, + { .name = "decl", .ops = &dec_ops, }, + { .name = "imul", .ops = &mov_ops, }, + { .name = "inc", .ops = &dec_ops, }, + { .name = "incl", .ops = &dec_ops, }, + { .name = "ja", .ops = &jump_ops, }, + { .name = "jae", .ops = &jump_ops, }, + { .name = "jb", .ops = &jump_ops, }, + { .name = "jbe", .ops = &jump_ops, }, + { .name = "jc", .ops = &jump_ops, }, + { .name = "jcxz", .ops = &jump_ops, }, + { .name = "je", .ops = &jump_ops, }, + { .name = "jecxz", .ops = &jump_ops, }, + { .name = "jg", .ops = &jump_ops, }, + { .name = "jge", .ops = &jump_ops, }, + { .name = "jl", .ops = &jump_ops, }, + { .name = "jle", .ops = &jump_ops, }, + { .name = "jmp", .ops = &jump_ops, }, + { .name = "jmpq", .ops = &jump_ops, }, + { .name = "jna", .ops = &jump_ops, }, + { .name = "jnae", .ops = &jump_ops, }, + { .name = "jnb", .ops = &jump_ops, }, + { .name = "jnbe", .ops = &jump_ops, }, + { .name = "jnc", .ops = &jump_ops, }, + { .name = "jne", .ops = &jump_ops, }, + { .name = "jng", .ops = &jump_ops, }, + { .name = "jnge", .ops = &jump_ops, }, + { .name = "jnl", .ops = &jump_ops, }, + { .name = "jnle", .ops = &jump_ops, }, + { .name = "jno", .ops = &jump_ops, }, + { .name = "jnp", .ops = &jump_ops, }, + { .name = "jns", .ops = &jump_ops, }, + { .name = "jnz", .ops = &jump_ops, }, + { .name = "jo", .ops = &jump_ops, }, + { .name = "jp", .ops = &jump_ops, }, + { .name = "jpe", .ops = &jump_ops, }, + { .name = "jpo", .ops = &jump_ops, }, + { .name = "jrcxz", .ops = &jump_ops, }, + { .name = "js", .ops = &jump_ops, }, + { .name = "jz", .ops = &jump_ops, }, + { .name = "lea", .ops = &mov_ops, }, + { .name = "lock", .ops = &lock_ops, }, + { .name = "mov", .ops = &mov_ops, }, + { .name = "movb", .ops = &mov_ops, }, + { .name = "movdqa", .ops = &mov_ops, }, + { .name = "movl", .ops = &mov_ops, }, + { .name = "movq", .ops = &mov_ops, }, + { .name = "movslq", .ops = &mov_ops, }, + { .name = "movzbl", .ops = &mov_ops, }, + { .name = "movzwl", .ops = &mov_ops, }, + { .name = "nop", .ops = &nop_ops, }, + { .name = "nopl", .ops = &nop_ops, }, + { .name = "nopw", .ops = &nop_ops, }, + { .name = "or", .ops = &mov_ops, }, + { .name = "orl", .ops = &mov_ops, }, + { .name = "test", .ops = &mov_ops, }, + { .name = "testb", .ops = &mov_ops, }, + { .name = "testl", .ops = &mov_ops, }, + { .name = "xadd", .ops = &mov_ops, }, + { .name = "xbeginl", .ops = &jump_ops, }, + { .name = "xbeginq", .ops = &jump_ops, }, + { .name = "retq", .ops = &ret_ops, }, +}; diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c new file mode 100644 index 000000000000..c1625f256df3 --- /dev/null +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -0,0 +1,78 @@ +static struct ins x86__instructions[] = { + { .name = "add", .ops = &mov_ops, }, + { .name = "addl", .ops = &mov_ops, }, + { .name = "addq", .ops = &mov_ops, }, + { .name = "addw", .ops = &mov_ops, }, + { .name = "and", .ops = &mov_ops, }, + { .name = "bts", .ops = &mov_ops, }, + { .name = "call", .ops = &call_ops, }, + { .name = "callq", .ops = &call_ops, }, + { .name = "cmp", .ops = &mov_ops, }, + { .name = "cmpb", .ops = &mov_ops, }, + { .name = "cmpl", .ops = &mov_ops, }, + { .name = "cmpq", .ops = &mov_ops, }, + { .name = "cmpw", .ops = &mov_ops, }, + { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "dec", .ops = &dec_ops, }, + { .name = "decl", .ops = &dec_ops, }, + { .name = "imul", .ops = &mov_ops, }, + { .name = "inc", .ops = &dec_ops, }, + { .name = "incl", .ops = &dec_ops, }, + { .name = "ja", .ops = &jump_ops, }, + { .name = "jae", .ops = &jump_ops, }, + { .name = "jb", .ops = &jump_ops, }, + { .name = "jbe", .ops = &jump_ops, }, + { .name = "jc", .ops = &jump_ops, }, + { .name = "jcxz", .ops = &jump_ops, }, + { .name = "je", .ops = &jump_ops, }, + { .name = "jecxz", .ops = &jump_ops, }, + { .name = "jg", .ops = &jump_ops, }, + { .name = "jge", .ops = &jump_ops, }, + { .name = "jl", .ops = &jump_ops, }, + { .name = "jle", .ops = &jump_ops, }, + { .name = "jmp", .ops = &jump_ops, }, + { .name = "jmpq", .ops = &jump_ops, }, + { .name = "jna", .ops = &jump_ops, }, + { .name = "jnae", .ops = &jump_ops, }, + { .name = "jnb", .ops = &jump_ops, }, + { .name = "jnbe", .ops = &jump_ops, }, + { .name = "jnc", .ops = &jump_ops, }, + { .name = "jne", .ops = &jump_ops, }, + { .name = "jng", .ops = &jump_ops, }, + { .name = "jnge", .ops = &jump_ops, }, + { .name = "jnl", .ops = &jump_ops, }, + { .name = "jnle", .ops = &jump_ops, }, + { .name = "jno", .ops = &jump_ops, }, + { .name = "jnp", .ops = &jump_ops, }, + { .name = "jns", .ops = &jump_ops, }, + { .name = "jnz", .ops = &jump_ops, }, + { .name = "jo", .ops = &jump_ops, }, + { .name = "jp", .ops = &jump_ops, }, + { .name = "jpe", .ops = &jump_ops, }, + { .name = "jpo", .ops = &jump_ops, }, + { .name = "jrcxz", .ops = &jump_ops, }, + { .name = "js", .ops = &jump_ops, }, + { .name = "jz", .ops = &jump_ops, }, + { .name = "lea", .ops = &mov_ops, }, + { .name = "lock", .ops = &lock_ops, }, + { .name = "mov", .ops = &mov_ops, }, + { .name = "movb", .ops = &mov_ops, }, + { .name = "movdqa", .ops = &mov_ops, }, + { .name = "movl", .ops = &mov_ops, }, + { .name = "movq", .ops = &mov_ops, }, + { .name = "movslq", .ops = &mov_ops, }, + { .name = "movzbl", .ops = &mov_ops, }, + { .name = "movzwl", .ops = &mov_ops, }, + { .name = "nop", .ops = &nop_ops, }, + { .name = "nopl", .ops = &nop_ops, }, + { .name = "nopw", .ops = &nop_ops, }, + { .name = "or", .ops = &mov_ops, }, + { .name = "orl", .ops = &mov_ops, }, + { .name = "test", .ops = &mov_ops, }, + { .name = "testb", .ops = &mov_ops, }, + { .name = "testl", .ops = &mov_ops, }, + { .name = "xadd", .ops = &mov_ops, }, + { .name = "xbeginl", .ops = &jump_ops, }, + { .name = "xbeginq", .ops = &jump_ops, }, + { .name = "retq", .ops = &ret_ops, }, +}; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c6d0dda594d9..4b419631753d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -91,6 +91,19 @@ struct perf_c2c { enum { DISPLAY_LCL, DISPLAY_RMT, + DISPLAY_TOT, + DISPLAY_MAX, +}; + +static const char *display_str[DISPLAY_MAX] = { + [DISPLAY_LCL] = "Local", + [DISPLAY_RMT] = "Remote", + [DISPLAY_TOT] = "Total", +}; + +static const struct option c2c_options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_END() }; static struct perf_c2c c2c; @@ -745,6 +758,10 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he) case DISPLAY_LCL: st = stats->lcl_hitm; tot = total->lcl_hitm; + break; + case DISPLAY_TOT: + st = stats->tot_hitm; + tot = total->tot_hitm; default: break; } @@ -1044,6 +1061,9 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, break; case DISPLAY_LCL: DISPLAY_HITM(lcl_hitm); + break; + case DISPLAY_TOT: + DISPLAY_HITM(tot_hitm); default: break; } @@ -1351,6 +1371,7 @@ static struct c2c_dimension dim_tot_loads = { static struct c2c_header percent_hitm_header[] = { [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"), [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"), + [DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"), }; static struct c2c_dimension dim_percent_hitm = { @@ -1794,6 +1815,9 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) break; case DISPLAY_RMT: FILTER_HITM(rmt_hitm); + break; + case DISPLAY_TOT: + FILTER_HITM(tot_hitm); default: break; }; @@ -1809,8 +1833,9 @@ static inline int valid_hitm_or_store(struct hist_entry *he) bool has_hitm; c2c_he = container_of(he, struct c2c_hist_entry, he); - has_hitm = c2c.display == DISPLAY_LCL ? - c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm; + has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm : + c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm : + c2c_he->stats.rmt_hitm; return has_hitm || c2c_he->stats.store; } @@ -2095,7 +2120,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session) first = false; } fprintf(out, " Cachelines sort on : %s HITMs\n", - c2c.display == DISPLAY_LCL ? "Local" : "Remote"); + display_str[c2c.display]); fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort); } @@ -2250,7 +2275,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser, "Shared Data Cache Line Table " "(%lu entries, sorted on %s HITMs)", browser->nr_non_filtered_entries, - c2c.display == DISPLAY_LCL ? "local" : "remote"); + display_str[c2c.display]); return 0; } @@ -2387,9 +2412,11 @@ static int setup_callchain(struct perf_evlist *evlist) static int setup_display(const char *str) { - const char *display = str ?: "rmt"; + const char *display = str ?: "tot"; - if (!strcmp(display, "rmt")) + if (!strcmp(display, "tot")) + c2c.display = DISPLAY_TOT; + else if (!strcmp(display, "rmt")) c2c.display = DISPLAY_RMT; else if (!strcmp(display, "lcl")) c2c.display = DISPLAY_LCL; @@ -2474,6 +2501,8 @@ static int setup_coalesce(const char *coalesce, bool no_source) return -1; if (asprintf(&c2c.cl_resort, "offset,%s", + c2c.display == DISPLAY_TOT ? + "tot_hitm" : c2c.display == DISPLAY_RMT ? "rmt_hitm,lcl_hitm" : "lcl_hitm,rmt_hitm") < 0) @@ -2496,11 +2525,9 @@ static int perf_c2c__report(int argc, const char **argv) const char *display = NULL; const char *coalesce = NULL; bool no_source = false; - const struct option c2c_options[] = { + const struct option options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), OPT_STRING('i', "input", &input_name, "file", "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, @@ -2520,32 +2547,28 @@ static int perf_c2c__report(int argc, const char **argv) "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt, callchain_default_opt), - OPT_STRING('d', "display", &display, NULL, "lcl,rmt"), + OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"), OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", "coalesce fields: pid,tid,iaddr,dso"), + OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_PARENT(c2c_options), OPT_END() }; int err = 0; - argc = parse_options(argc, argv, c2c_options, report_c2c_usage, + argc = parse_options(argc, argv, options, report_c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc) - usage_with_options(report_c2c_usage, c2c_options); + usage_with_options(report_c2c_usage, options); if (c2c.stats_only) c2c.use_stdio = true; - if (c2c.use_stdio) - use_browser = 0; - else - use_browser = 1; - - setup_browser(false); - if (!input_name || !strlen(input_name)) input_name = "perf.data"; - file.path = input_name; + file.path = input_name; + file.force = symbol_conf.force; err = setup_display(display); if (err) @@ -2568,6 +2591,7 @@ static int perf_c2c__report(int argc, const char **argv) pr_debug("No memory for session\n"); goto out; } + err = setup_nodes(session); if (err) { pr_err("Failed setup nodes\n"); @@ -2587,6 +2611,13 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } + if (c2c.use_stdio) + use_browser = 0; + else + use_browser = 1; + + setup_browser(false); + err = perf_session__process_events(session); if (err) { pr_err("failed to process sample\n"); @@ -2605,6 +2636,7 @@ static int perf_c2c__report(int argc, const char **argv) "tot_loads," "ld_fbhit,ld_l1hit,ld_l2hit," "ld_lclhit,ld_rmthit", + c2c.display == DISPLAY_TOT ? "tot_hitm" : c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" ); @@ -2655,11 +2687,10 @@ static int perf_c2c__record(int argc, const char **argv) OPT_CALLBACK('e', "event", &event_set, "event", "event selector. Use 'perf mem record -e list' to list available events", parse_record_events), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"), + OPT_PARENT(c2c_options), OPT_END() }; @@ -2731,11 +2762,6 @@ static int perf_c2c__record(int argc, const char **argv) int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) { - const struct option c2c_options[] = { - OPT_INCR('v', "verbose", &verbose, "be more verbose"), - OPT_END() - }; - argc = parse_options(argc, argv, c2c_options, c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index fb3441211e4b..829468defa07 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -13,12 +13,15 @@ #include "util/cloexec.h" #include "util/thread_map.h" #include "util/color.h" +#include "util/stat.h" +#include "util/callchain.h" #include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/debug.h" +#include <linux/log2.h> #include <sys/prctl.h> #include <sys/resource.h> @@ -192,8 +195,40 @@ struct perf_sched { bool force; bool skip_merge; struct perf_sched_map map; + + /* options for timehist command */ + bool summary; + bool summary_only; + bool show_callchain; + unsigned int max_stack; + bool show_cpu_visual; + bool show_wakeups; + u64 skipped_samples; +}; + +/* per thread run time data */ +struct thread_runtime { + u64 last_time; /* time of previous sched in/out event */ + u64 dt_run; /* run time */ + u64 dt_wait; /* time between CPU access (off cpu) */ + u64 dt_delay; /* time between wakeup and sched-in */ + u64 ready_to_run; /* time of wakeup */ + + struct stats run_stats; + u64 total_run_time; }; +/* per event run time data */ +struct evsel_runtime { + u64 *last_time; /* time this event was last seen per cpu */ + u32 ncpu; /* highest cpu slot allocated */ +}; + +/* track idle times per cpu */ +static struct thread **idle_threads; +static int idle_max_cpu; +static char idle_comm[] = "<idle>"; + static u64 get_nsecs(void) { struct timespec ts; @@ -1654,6 +1689,837 @@ out_delete: return rc; } +/* + * scheduling times are printed as msec.usec + */ +static inline void print_sched_time(unsigned long long nsecs, int width) +{ + unsigned long msecs; + unsigned long usecs; + + msecs = nsecs / NSEC_PER_MSEC; + nsecs -= msecs * NSEC_PER_MSEC; + usecs = nsecs / NSEC_PER_USEC; + printf("%*lu.%03lu ", width, msecs, usecs); +} + +/* + * returns runtime data for event, allocating memory for it the + * first time it is used. + */ +static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) +{ + struct evsel_runtime *r = evsel->priv; + + if (r == NULL) { + r = zalloc(sizeof(struct evsel_runtime)); + evsel->priv = r; + } + + return r; +} + +/* + * save last time event was seen per cpu + */ +static void perf_evsel__save_time(struct perf_evsel *evsel, + u64 timestamp, u32 cpu) +{ + struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + + if (r == NULL) + return; + + if ((cpu >= r->ncpu) || (r->last_time == NULL)) { + int i, n = __roundup_pow_of_two(cpu+1); + void *p = r->last_time; + + p = realloc(r->last_time, n * sizeof(u64)); + if (!p) + return; + + r->last_time = p; + for (i = r->ncpu; i < n; ++i) + r->last_time[i] = (u64) 0; + + r->ncpu = n; + } + + r->last_time[cpu] = timestamp; +} + +/* returns last time this event was seen on the given cpu */ +static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) +{ + struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + + if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) + return 0; + + return r->last_time[cpu]; +} + +static int comm_width = 20; + +static char *timehist_get_commstr(struct thread *thread) +{ + static char str[32]; + const char *comm = thread__comm_str(thread); + pid_t tid = thread->tid; + pid_t pid = thread->pid_; + int n; + + if (pid == 0) + n = scnprintf(str, sizeof(str), "%s", comm); + + else if (tid != pid) + n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); + + else + n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); + + if (n > comm_width) + comm_width = n; + + return str; +} + +static void timehist_header(struct perf_sched *sched) +{ + u32 ncpus = sched->max_cpu + 1; + u32 i, j; + + printf("%15s %6s ", "time", "cpu"); + + if (sched->show_cpu_visual) { + printf(" "); + for (i = 0, j = 0; i < ncpus; ++i) { + printf("%x", j++); + if (j > 15) + j = 0; + } + printf(" "); + } + + printf(" %-20s %9s %9s %9s", + "task name", "wait time", "sch delay", "run time"); + + printf("\n"); + + /* + * units row + */ + printf("%15s %-6s ", "", ""); + + if (sched->show_cpu_visual) + printf(" %*s ", ncpus, ""); + + printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); + + /* + * separator + */ + printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); + + if (sched->show_cpu_visual) + printf(" %.*s ", ncpus, graph_dotted_line); + + printf(" %.20s %.9s %.9s %.9s", + graph_dotted_line, graph_dotted_line, graph_dotted_line, + graph_dotted_line); + + printf("\n"); +} + +static void timehist_print_sample(struct perf_sched *sched, + struct perf_sample *sample, + struct addr_location *al, + struct thread *thread) +{ + struct thread_runtime *tr = thread__priv(thread); + u32 max_cpus = sched->max_cpu + 1; + char tstr[64]; + + timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); + printf("%15s [%04d] ", tstr, sample->cpu); + + if (sched->show_cpu_visual) { + u32 i; + char c; + + printf(" "); + for (i = 0; i < max_cpus; ++i) { + /* flag idle times with 'i'; others are sched events */ + if (i == sample->cpu) + c = (thread->tid == 0) ? 'i' : 's'; + else + c = ' '; + printf("%c", c); + } + printf(" "); + } + + printf(" %-*s ", comm_width, timehist_get_commstr(thread)); + + print_sched_time(tr->dt_wait, 6); + print_sched_time(tr->dt_delay, 6); + print_sched_time(tr->dt_run, 6); + + if (sched->show_wakeups) + printf(" %-*s", comm_width, ""); + + if (thread->tid == 0) + goto out; + + if (sched->show_callchain) + printf(" "); + + sample__fprintf_sym(sample, al, 0, + EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | + EVSEL__PRINT_CALLCHAIN_ARROW, + &callchain_cursor, stdout); + +out: + printf("\n"); +} + +/* + * Explanation of delta-time stats: + * + * t = time of current schedule out event + * tprev = time of previous sched out event + * also time of schedule-in event for current task + * last_time = time of last sched change event for current task + * (i.e, time process was last scheduled out) + * ready_to_run = time of wakeup for current task + * + * -----|------------|------------|------------|------ + * last ready tprev t + * time to run + * + * |-------- dt_wait --------| + * |- dt_delay -|-- dt_run --| + * + * dt_run = run time of current task + * dt_wait = time between last schedule out event for task and tprev + * represents time spent off the cpu + * dt_delay = time between wakeup and schedule-in of task + */ + +static void timehist_update_runtime_stats(struct thread_runtime *r, + u64 t, u64 tprev) +{ + r->dt_delay = 0; + r->dt_wait = 0; + r->dt_run = 0; + if (tprev) { + r->dt_run = t - tprev; + if (r->ready_to_run) { + if (r->ready_to_run > tprev) + pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); + else + r->dt_delay = tprev - r->ready_to_run; + } + + if (r->last_time > tprev) + pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); + else if (r->last_time) + r->dt_wait = tprev - r->last_time; + } + + update_stats(&r->run_stats, r->dt_run); + r->total_run_time += r->dt_run; +} + +static bool is_idle_sample(struct perf_sched *sched, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct thread *thread; + struct callchain_cursor *cursor = &callchain_cursor; + + /* pid 0 == swapper == idle task */ + if (sample->pid == 0) + return true; + + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { + if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) + return true; + } + + /* want main thread for process - has maps */ + thread = machine__findnew_thread(machine, sample->pid, sample->pid); + if (thread == NULL) { + pr_debug("Failed to get thread for pid %d.\n", sample->pid); + return false; + } + + if (!symbol_conf.use_callchain || sample->callchain == NULL) + return false; + + if (thread__resolve_callchain(thread, cursor, evsel, sample, + NULL, NULL, sched->max_stack) != 0) { + if (verbose) + error("Failed to resolve callchain. Skipping\n"); + + return false; + } + callchain_cursor_commit(cursor); + return false; +} + +/* + * Track idle stats per cpu by maintaining a local thread + * struct for the idle task on each cpu. + */ +static int init_idle_threads(int ncpu) +{ + int i; + + idle_threads = zalloc(ncpu * sizeof(struct thread *)); + if (!idle_threads) + return -ENOMEM; + + idle_max_cpu = ncpu - 1; + + /* allocate the actual thread struct if needed */ + for (i = 0; i < ncpu; ++i) { + idle_threads[i] = thread__new(0, 0); + if (idle_threads[i] == NULL) + return -ENOMEM; + + thread__set_comm(idle_threads[i], idle_comm, 0); + } + + return 0; +} + +static void free_idle_threads(void) +{ + int i; + + if (idle_threads == NULL) + return; + + for (i = 0; i <= idle_max_cpu; ++i) { + if ((idle_threads[i])) + thread__delete(idle_threads[i]); + } + + free(idle_threads); +} + +static struct thread *get_idle_thread(int cpu) +{ + /* + * expand/allocate array of pointers to local thread + * structs if needed + */ + if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { + int i, j = __roundup_pow_of_two(cpu+1); + void *p; + + p = realloc(idle_threads, j * sizeof(struct thread *)); + if (!p) + return NULL; + + idle_threads = (struct thread **) p; + i = idle_max_cpu ? idle_max_cpu + 1 : 0; + for (; i < j; ++i) + idle_threads[i] = NULL; + + idle_max_cpu = j; + } + + /* allocate a new thread struct if needed */ + if (idle_threads[cpu] == NULL) { + idle_threads[cpu] = thread__new(0, 0); + + if (idle_threads[cpu]) { + idle_threads[cpu]->tid = 0; + thread__set_comm(idle_threads[cpu], idle_comm, 0); + } + } + + return idle_threads[cpu]; +} + +/* + * handle runtime stats saved per thread + */ +static struct thread_runtime *thread__init_runtime(struct thread *thread) +{ + struct thread_runtime *r; + + r = zalloc(sizeof(struct thread_runtime)); + if (!r) + return NULL; + + init_stats(&r->run_stats); + thread__set_priv(thread, r); + + return r; +} + +static struct thread_runtime *thread__get_runtime(struct thread *thread) +{ + struct thread_runtime *tr; + + tr = thread__priv(thread); + if (tr == NULL) { + tr = thread__init_runtime(thread); + if (tr == NULL) + pr_debug("Failed to malloc memory for runtime data.\n"); + } + + return tr; +} + +static struct thread *timehist_get_thread(struct perf_sched *sched, + struct perf_sample *sample, + struct machine *machine, + struct perf_evsel *evsel) +{ + struct thread *thread; + + if (is_idle_sample(sched, sample, evsel, machine)) { + thread = get_idle_thread(sample->cpu); + if (thread == NULL) + pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); + + } else { + thread = machine__findnew_thread(machine, sample->pid, sample->tid); + if (thread == NULL) { + pr_debug("Failed to get thread for tid %d. skipping sample.\n", + sample->tid); + } + } + + return thread; +} + +static bool timehist_skip_sample(struct perf_sched *sched, + struct thread *thread) +{ + bool rc = false; + + if (thread__is_filtered(thread)) { + rc = true; + sched->skipped_samples++; + } + + return rc; +} + +static void timehist_print_wakeup_event(struct perf_sched *sched, + struct perf_sample *sample, + struct machine *machine, + struct thread *awakened) +{ + struct thread *thread; + char tstr[64]; + + thread = machine__findnew_thread(machine, sample->pid, sample->tid); + if (thread == NULL) + return; + + /* show wakeup unless both awakee and awaker are filtered */ + if (timehist_skip_sample(sched, thread) && + timehist_skip_sample(sched, awakened)) { + return; + } + + timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); + printf("%15s [%04d] ", tstr, sample->cpu); + if (sched->show_cpu_visual) + printf(" %*s ", sched->max_cpu + 1, ""); + + printf(" %-*s ", comm_width, timehist_get_commstr(thread)); + + /* dt spacer */ + printf(" %9s %9s %9s ", "", "", ""); + + printf("awakened: %s", timehist_get_commstr(awakened)); + + printf("\n"); +} + +static int timehist_sched_wakeup_event(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct thread *thread; + struct thread_runtime *tr = NULL; + /* want pid of awakened task not pid in sample */ + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + + thread = machine__findnew_thread(machine, 0, pid); + if (thread == NULL) + return -1; + + tr = thread__get_runtime(thread); + if (tr == NULL) + return -1; + + if (tr->ready_to_run == 0) + tr->ready_to_run = sample->time; + + /* show wakeups if requested */ + if (sched->show_wakeups) + timehist_print_wakeup_event(sched, sample, machine, thread); + + return 0; +} + +static int timehist_sched_change_event(struct perf_tool *tool, + union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct addr_location al; + struct thread *thread; + struct thread_runtime *tr = NULL; + u64 tprev; + int rc = 0; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event. skipping it\n", + event->header.type); + rc = -1; + goto out; + } + + thread = timehist_get_thread(sched, sample, machine, evsel); + if (thread == NULL) { + rc = -1; + goto out; + } + + if (timehist_skip_sample(sched, thread)) + goto out; + + tr = thread__get_runtime(thread); + if (tr == NULL) { + rc = -1; + goto out; + } + + tprev = perf_evsel__get_time(evsel, sample->cpu); + + timehist_update_runtime_stats(tr, sample->time, tprev); + if (!sched->summary_only) + timehist_print_sample(sched, sample, &al, thread); + +out: + if (tr) { + /* time of this sched_switch event becomes last time task seen */ + tr->last_time = sample->time; + + /* sched out event for task so reset ready to run time */ + tr->ready_to_run = 0; + } + + perf_evsel__save_time(evsel, sample->time, sample->cpu); + + return rc; +} + +static int timehist_sched_switch_event(struct perf_tool *tool, + union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + return timehist_sched_change_event(tool, event, evsel, sample, machine); +} + +static int process_lost(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + char tstr[64]; + + timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); + printf("%15s ", tstr); + printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); + + return 0; +} + + +static void print_thread_runtime(struct thread *t, + struct thread_runtime *r) +{ + double mean = avg_stats(&r->run_stats); + float stddev; + + printf("%*s %5d %9" PRIu64 " ", + comm_width, timehist_get_commstr(t), t->ppid, + (u64) r->run_stats.n); + + print_sched_time(r->total_run_time, 8); + stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); + print_sched_time(r->run_stats.min, 6); + printf(" "); + print_sched_time((u64) mean, 6); + printf(" "); + print_sched_time(r->run_stats.max, 6); + printf(" "); + printf("%5.2f", stddev); + printf("\n"); +} + +struct total_run_stats { + u64 sched_count; + u64 task_count; + u64 total_run_time; +}; + +static int __show_thread_runtime(struct thread *t, void *priv) +{ + struct total_run_stats *stats = priv; + struct thread_runtime *r; + + if (thread__is_filtered(t)) + return 0; + + r = thread__priv(t); + if (r && r->run_stats.n) { + stats->task_count++; + stats->sched_count += r->run_stats.n; + stats->total_run_time += r->total_run_time; + print_thread_runtime(t, r); + } + + return 0; +} + +static int show_thread_runtime(struct thread *t, void *priv) +{ + if (t->dead) + return 0; + + return __show_thread_runtime(t, priv); +} + +static int show_deadthread_runtime(struct thread *t, void *priv) +{ + if (!t->dead) + return 0; + + return __show_thread_runtime(t, priv); +} + +static void timehist_print_summary(struct perf_sched *sched, + struct perf_session *session) +{ + struct machine *m = &session->machines.host; + struct total_run_stats totals; + u64 task_count; + struct thread *t; + struct thread_runtime *r; + int i; + + memset(&totals, 0, sizeof(totals)); + + if (comm_width < 30) + comm_width = 30; + + printf("\nRuntime summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time min-run avg-run max-run stddev\n"); + printf("%*s (count) ", comm_width, ""); + printf(" (msec) (msec) (msec) (msec) %%\n"); + printf("%.105s\n", graph_dotted_line); + + machine__for_each_thread(m, show_thread_runtime, &totals); + task_count = totals.task_count; + if (!task_count) + printf("<no still running tasks>\n"); + + printf("\nTerminated tasks:\n"); + machine__for_each_thread(m, show_deadthread_runtime, &totals); + if (task_count == totals.task_count) + printf("<no terminated tasks>\n"); + + /* CPU idle stats not tracked when samples were skipped */ + if (sched->skipped_samples) + return; + + printf("\nIdle stats:\n"); + for (i = 0; i <= idle_max_cpu; ++i) { + t = idle_threads[i]; + if (!t) + continue; + + r = thread__priv(t); + if (r && r->run_stats.n) { + totals.sched_count += r->run_stats.n; + printf(" CPU %2d idle for ", i); + print_sched_time(r->total_run_time, 6); + printf(" msec\n"); + } else + printf(" CPU %2d idle entire time window\n", i); + } + + printf("\n" + " Total number of unique tasks: %" PRIu64 "\n" + "Total number of context switches: %" PRIu64 "\n" + " Total run time (msec): ", + totals.task_count, totals.sched_count); + + print_sched_time(totals.total_run_time, 2); + printf("\n"); +} + +typedef int (*sched_handler)(struct perf_tool *tool, + union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine); + +static int perf_timehist__process_sample(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + int err = 0; + int this_cpu = sample->cpu; + + if (this_cpu > sched->max_cpu) + sched->max_cpu = this_cpu; + + if (evsel->handler != NULL) { + sched_handler f = evsel->handler; + + err = f(tool, event, evsel, sample, machine); + } + + return err; +} + +static int timehist_check_attr(struct perf_sched *sched, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct evsel_runtime *er; + + list_for_each_entry(evsel, &evlist->entries, node) { + er = perf_evsel__get_runtime(evsel); + if (er == NULL) { + pr_err("Failed to allocate memory for evsel runtime data\n"); + return -1; + } + + if (sched->show_callchain && + !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { + pr_info("Samples do not have callchains.\n"); + sched->show_callchain = 0; + symbol_conf.use_callchain = 0; + } + } + + return 0; +} + +static int perf_sched__timehist(struct perf_sched *sched) +{ + const struct perf_evsel_str_handler handlers[] = { + { "sched:sched_switch", timehist_sched_switch_event, }, + { "sched:sched_wakeup", timehist_sched_wakeup_event, }, + { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, + }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; + + struct perf_session *session; + struct perf_evlist *evlist; + int err = -1; + + /* + * event handlers for timehist option + */ + sched->tool.sample = perf_timehist__process_sample; + sched->tool.mmap = perf_event__process_mmap; + sched->tool.comm = perf_event__process_comm; + sched->tool.exit = perf_event__process_exit; + sched->tool.fork = perf_event__process_fork; + sched->tool.lost = process_lost; + sched->tool.attr = perf_event__process_attr; + sched->tool.tracing_data = perf_event__process_tracing_data; + sched->tool.build_id = perf_event__process_build_id; + + sched->tool.ordered_events = true; + sched->tool.ordering_requires_timestamps = true; + + symbol_conf.use_callchain = sched->show_callchain; + + session = perf_session__new(&file, false, &sched->tool); + if (session == NULL) + return -ENOMEM; + + evlist = session->evlist; + + symbol__init(&session->header.env); + + if (timehist_check_attr(sched, evlist) != 0) + goto out; + + setup_pager(); + + /* setup per-evsel handlers */ + if (perf_session__set_tracepoints_handlers(session, handlers)) + goto out; + + if (!perf_session__has_traces(session, "record -R")) + goto out; + + /* pre-allocate struct for per-CPU idle stats */ + sched->max_cpu = session->header.env.nr_cpus_online; + if (sched->max_cpu == 0) + sched->max_cpu = 4; + if (init_idle_threads(sched->max_cpu)) + goto out; + + /* summary_only implies summary option, but don't overwrite summary if set */ + if (sched->summary_only) + sched->summary = sched->summary_only; + + if (!sched->summary_only) + timehist_header(sched); + + err = perf_session__process_events(session); + if (err) { + pr_err("Failed to process events, error %d", err); + goto out; + } + + sched->nr_events = evlist->stats.nr_events[0]; + sched->nr_lost_events = evlist->stats.total_lost; + sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; + + if (sched->summary) + timehist_print_summary(sched, session); + +out: + free_idle_threads(); + perf_session__delete(session); + + return err; +} + + static void print_bad_events(struct perf_sched *sched) { if (sched->nr_unordered_timestamps && sched->nr_timestamps) { @@ -1957,6 +2823,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .next_shortname1 = 'A', .next_shortname2 = '0', .skip_merge = 0, + .show_callchain = 1, + .max_stack = 5, }; const struct option sched_options[] = { OPT_STRING('i', "input", &input_name, "file", @@ -1970,8 +2838,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) const struct option latency_options[] = { OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", "sort by key(s): runtime, switch, avg, max"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), OPT_INTEGER('C', "CPU", &sched.profile_cpu, "CPU to profile on"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, @@ -1983,8 +2849,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) const struct option replay_options[] = { OPT_UINTEGER('r', "repeat", &sched.replay_repeat, "repeat the workload replay N times (-1: infinite)"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), @@ -2001,6 +2865,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "display given CPUs in map"), OPT_PARENT(sched_options) }; + const struct option timehist_options[] = { + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), + OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, + "Display call chains if present (default on)"), + OPT_UINTEGER(0, "max-stack", &sched.max_stack, + "Maximum number of functions to display backtrace."), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), + OPT_BOOLEAN('s', "summary", &sched.summary_only, + "Show only syscall summary with statistics"), + OPT_BOOLEAN('S', "with-summary", &sched.summary, + "Show all syscalls and summary with statistics"), + OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), + OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), + OPT_PARENT(sched_options) + }; + const char * const latency_usage[] = { "perf sched latency [<options>]", NULL @@ -2013,8 +2897,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "perf sched map [<options>]", NULL }; + const char * const timehist_usage[] = { + "perf sched timehist [<options>]", + NULL + }; const char *const sched_subcommands[] = { "record", "latency", "map", - "replay", "script", NULL }; + "replay", "script", + "timehist", NULL }; const char *sched_usage[] = { NULL, NULL @@ -2077,6 +2966,21 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(replay_usage, replay_options); } return perf_sched__replay(&sched); + } else if (!strcmp(argv[0], "timehist")) { + if (argc) { + argc = parse_options(argc, argv, timehist_options, + timehist_usage, 0); + if (argc) + usage_with_options(timehist_usage, timehist_options); + } + if (sched.show_wakeups && sched.summary_only) { + pr_err(" Error: -s and -w are mutually exclusive.\n"); + parse_options_usage(timehist_usage, timehist_options, "s", true); + parse_options_usage(NULL, timehist_options, "w", true); + return -EINVAL; + } + + return perf_sched__timehist(&sched); } else { usage_with_options(sched_usage, sched_options); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fe3af9535e85..3df4178ba378 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -130,7 +130,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, 0); + err = symbol__disassemble(sym, map, NULL, 0); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4c18271c71c9..e6e9f7d80dbd 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1050,7 +1050,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__disassemble(sym, map, sizeof_bdl); + err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 42d319927762..8c9308ac30b7 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__disassemble(sym, map, 0); + err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index aeb5a441bd74..095d90a9077f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -18,17 +18,61 @@ #include "annotate.h" #include "evsel.h" #include "block-range.h" +#include "arch/common.h" #include <regex.h> #include <pthread.h> #include <linux/bitops.h> +#include <sys/utsname.h> const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; -static struct ins *ins__find(const char *name); +static struct ins *ins__find(struct arch *arch, const char *name); static int disasm_line__parse(char *line, char **namep, char **rawp); +struct arch { + const char *name; + struct ins *instructions; + size_t nr_instructions; + bool sorted_instructions; + struct { + char comment_char; + char skip_functions_char; + } objdump; +}; + +static struct ins_ops call_ops; +static struct ins_ops dec_ops; +static struct ins_ops jump_ops; +static struct ins_ops mov_ops; +static struct ins_ops nop_ops; +static struct ins_ops lock_ops; +static struct ins_ops ret_ops; + +#include "arch/arm/annotate/instructions.c" +#include "arch/x86/annotate/instructions.c" + +static struct arch architectures[] = { + { + .name = "arm", + .instructions = arm__instructions, + .nr_instructions = ARRAY_SIZE(arm__instructions), + .objdump = { + .comment_char = ';', + .skip_functions_char = '+', + }, + }, + { + .name = "x86", + .instructions = x86__instructions, + .nr_instructions = ARRAY_SIZE(x86__instructions), + .objdump = { + .comment_char = '#', + }, + }, +}; + static void ins__delete(struct ins_operands *ops) { if (ops == NULL) @@ -54,7 +98,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size, return ins__raw_scnprintf(ins, bf, size, ops); } -static int call__parse(struct ins_operands *ops, struct map *map) +static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { char *endptr, *tok, *name; @@ -66,10 +110,9 @@ static int call__parse(struct ins_operands *ops, struct map *map) name++; -#ifdef __arm__ - if (strchr(name, '+')) + if (arch->objdump.skip_functions_char && + strchr(name, arch->objdump.skip_functions_char)) return -1; -#endif tok = strchr(name, '>'); if (tok == NULL) @@ -118,7 +161,7 @@ bool ins__is_call(const struct ins *ins) return ins->ops == &call_ops; } -static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused) +static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { const char *s = strchr(ops->raw, '+'); @@ -173,7 +216,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; } -static int lock__parse(struct ins_operands *ops, struct map *map) +static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { char *name; @@ -184,7 +227,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map) if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) goto out_free_ops; - ops->locked.ins = ins__find(name); + ops->locked.ins = ins__find(arch, name); free(name); if (ops->locked.ins == NULL) @@ -194,7 +237,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map) return 0; if (ops->locked.ins->ops->parse && - ops->locked.ins->ops->parse(ops->locked.ops, map) < 0) + ops->locked.ins->ops->parse(arch, ops->locked.ops, map) < 0) goto out_free_ops; return 0; @@ -237,7 +280,7 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; -static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) +static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -252,11 +295,7 @@ static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) return -1; target = ++s; -#ifdef __arm__ - comment = strchr(s, ';'); -#else - comment = strchr(s, '#'); -#endif + comment = strchr(s, arch->objdump.comment_char); if (comment != NULL) s = comment - 1; @@ -304,7 +343,7 @@ static struct ins_ops mov_ops = { .scnprintf = mov__scnprintf, }; -static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused) +static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { char *target, *comment, *s, prev; @@ -364,99 +403,6 @@ bool ins__is_ret(const struct ins *ins) return ins->ops == &ret_ops; } -static struct ins instructions[] = { - { .name = "add", .ops = &mov_ops, }, - { .name = "addl", .ops = &mov_ops, }, - { .name = "addq", .ops = &mov_ops, }, - { .name = "addw", .ops = &mov_ops, }, - { .name = "and", .ops = &mov_ops, }, -#ifdef __arm__ - { .name = "b", .ops = &jump_ops, }, // might also be a call - { .name = "bcc", .ops = &jump_ops, }, - { .name = "bcs", .ops = &jump_ops, }, - { .name = "beq", .ops = &jump_ops, }, - { .name = "bge", .ops = &jump_ops, }, - { .name = "bgt", .ops = &jump_ops, }, - { .name = "bhi", .ops = &jump_ops, }, - { .name = "bl", .ops = &call_ops, }, - { .name = "bls", .ops = &jump_ops, }, - { .name = "blt", .ops = &jump_ops, }, - { .name = "blx", .ops = &call_ops, }, - { .name = "bne", .ops = &jump_ops, }, -#endif - { .name = "bts", .ops = &mov_ops, }, - { .name = "call", .ops = &call_ops, }, - { .name = "callq", .ops = &call_ops, }, - { .name = "cmp", .ops = &mov_ops, }, - { .name = "cmpb", .ops = &mov_ops, }, - { .name = "cmpl", .ops = &mov_ops, }, - { .name = "cmpq", .ops = &mov_ops, }, - { .name = "cmpw", .ops = &mov_ops, }, - { .name = "cmpxch", .ops = &mov_ops, }, - { .name = "dec", .ops = &dec_ops, }, - { .name = "decl", .ops = &dec_ops, }, - { .name = "imul", .ops = &mov_ops, }, - { .name = "inc", .ops = &dec_ops, }, - { .name = "incl", .ops = &dec_ops, }, - { .name = "ja", .ops = &jump_ops, }, - { .name = "jae", .ops = &jump_ops, }, - { .name = "jb", .ops = &jump_ops, }, - { .name = "jbe", .ops = &jump_ops, }, - { .name = "jc", .ops = &jump_ops, }, - { .name = "jcxz", .ops = &jump_ops, }, - { .name = "je", .ops = &jump_ops, }, - { .name = "jecxz", .ops = &jump_ops, }, - { .name = "jg", .ops = &jump_ops, }, - { .name = "jge", .ops = &jump_ops, }, - { .name = "jl", .ops = &jump_ops, }, - { .name = "jle", .ops = &jump_ops, }, - { .name = "jmp", .ops = &jump_ops, }, - { .name = "jmpq", .ops = &jump_ops, }, - { .name = "jna", .ops = &jump_ops, }, - { .name = "jnae", .ops = &jump_ops, }, - { .name = "jnb", .ops = &jump_ops, }, - { .name = "jnbe", .ops = &jump_ops, }, - { .name = "jnc", .ops = &jump_ops, }, - { .name = "jne", .ops = &jump_ops, }, - { .name = "jng", .ops = &jump_ops, }, - { .name = "jnge", .ops = &jump_ops, }, - { .name = "jnl", .ops = &jump_ops, }, - { .name = "jnle", .ops = &jump_ops, }, - { .name = "jno", .ops = &jump_ops, }, - { .name = "jnp", .ops = &jump_ops, }, - { .name = "jns", .ops = &jump_ops, }, - { .name = "jnz", .ops = &jump_ops, }, - { .name = "jo", .ops = &jump_ops, }, - { .name = "jp", .ops = &jump_ops, }, - { .name = "jpe", .ops = &jump_ops, }, - { .name = "jpo", .ops = &jump_ops, }, - { .name = "jrcxz", .ops = &jump_ops, }, - { .name = "js", .ops = &jump_ops, }, - { .name = "jz", .ops = &jump_ops, }, - { .name = "lea", .ops = &mov_ops, }, - { .name = "lock", .ops = &lock_ops, }, - { .name = "mov", .ops = &mov_ops, }, - { .name = "movb", .ops = &mov_ops, }, - { .name = "movdqa",.ops = &mov_ops, }, - { .name = "movl", .ops = &mov_ops, }, - { .name = "movq", .ops = &mov_ops, }, - { .name = "movslq", .ops = &mov_ops, }, - { .name = "movzbl", .ops = &mov_ops, }, - { .name = "movzwl", .ops = &mov_ops, }, - { .name = "nop", .ops = &nop_ops, }, - { .name = "nopl", .ops = &nop_ops, }, - { .name = "nopw", .ops = &nop_ops, }, - { .name = "or", .ops = &mov_ops, }, - { .name = "orl", .ops = &mov_ops, }, - { .name = "test", .ops = &mov_ops, }, - { .name = "testb", .ops = &mov_ops, }, - { .name = "testl", .ops = &mov_ops, }, - { .name = "xadd", .ops = &mov_ops, }, - { .name = "xbeginl", .ops = &jump_ops, }, - { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, -}; - static int ins__key_cmp(const void *name, const void *insp) { const struct ins *ins = insp; @@ -472,24 +418,58 @@ static int ins__cmp(const void *a, const void *b) return strcmp(ia->name, ib->name); } -static void ins__sort(void) +static void ins__sort(struct arch *arch) { - const int nmemb = ARRAY_SIZE(instructions); + const int nmemb = arch->nr_instructions; - qsort(instructions, nmemb, sizeof(struct ins), ins__cmp); + qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); } -static struct ins *ins__find(const char *name) +static struct ins *ins__find(struct arch *arch, const char *name) { - const int nmemb = ARRAY_SIZE(instructions); + const int nmemb = arch->nr_instructions; + + if (!arch->sorted_instructions) { + ins__sort(arch); + arch->sorted_instructions = true; + } + + return bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); +} + +static int arch__key_cmp(const void *name, const void *archp) +{ + const struct arch *arch = archp; + + return strcmp(name, arch->name); +} + +static int arch__cmp(const void *a, const void *b) +{ + const struct arch *aa = a; + const struct arch *ab = b; + + return strcmp(aa->name, ab->name); +} + +static void arch__sort(void) +{ + const int nmemb = ARRAY_SIZE(architectures); + + qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); +} + +static struct arch *arch__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(architectures); static bool sorted; if (!sorted) { - ins__sort(); + arch__sort(); sorted = true; } - return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp); + return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } int symbol__alloc_hist(struct symbol *sym) @@ -709,9 +689,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); } -static void disasm_line__init_ins(struct disasm_line *dl, struct map *map) +static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map) { - dl->ins = ins__find(dl->name); + dl->ins = ins__find(arch, dl->name); if (dl->ins == NULL) return; @@ -719,7 +699,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct map *map) if (!dl->ins->ops) return; - if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0) + if (dl->ins->ops->parse && dl->ins->ops->parse(arch, &dl->ops, map) < 0) dl->ins = NULL; } @@ -762,6 +742,7 @@ out_free_name: static struct disasm_line *disasm_line__new(s64 offset, char *line, size_t privsize, int line_nr, + struct arch *arch, struct map *map) { struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); @@ -777,7 +758,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, map); + disasm_line__init_ins(dl, arch, map); } } @@ -1087,6 +1068,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, + struct arch *arch, FILE *file, size_t privsize, int *line_nr) { @@ -1149,7 +1131,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map); + dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); free(line); (*line_nr)++; @@ -1280,10 +1262,23 @@ fallback: return 0; } -int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) +static const char *annotate__norm_arch(const char *arch_name) +{ + struct utsname uts; + + if (!arch_name) { /* Assume we are annotating locally. */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } + return normalize_arch((char *)arch_name); +} + +int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; + struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1297,6 +1292,14 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) if (err) return err; + arch_name = annotate__norm_arch(arch_name); + if (!arch_name) + return -1; + + arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1395,7 +1398,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) nline = 0; while (!feof(file)) { - if (symbol__parse_objdump_line(sym, map, file, privsize, + if (symbol__parse_objdump_line(sym, map, arch, file, privsize, &lineno) < 0) break; nline++; @@ -1793,7 +1796,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, 0) < 0) + if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 5bbcec173b82..8e490b5c91bc 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,9 +34,11 @@ struct ins_operands { }; }; +struct arch; + struct ins_ops { void (*free)(struct ins_operands *ops); - int (*parse)(struct ins_operands *ops, struct map *map); + int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map); int (*scnprintf)(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); }; @@ -156,7 +158,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize); +int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e58a2fbf3b16..b2365a63db45 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1481,7 +1481,7 @@ retry_sample_id: group_fd = get_group_fd(evsel, cpu, thread); retry_open: - pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, @@ -1490,11 +1490,13 @@ retry_open: group_fd, flags); if (FD(evsel, cpu, thread) < 0) { err = -errno; - pr_debug2("sys_perf_event_open failed, error %d\n", + pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } + pr_debug2(" = %d\n", FD(evsel, cpu, thread)); + if (evsel->bpf_fd >= 0) { int evt_fd = FD(evsel, cpu, thread); int bpf_fd = evsel->bpf_fd; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8cd7cd227483..27fa3a343577 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -391,6 +391,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, #define EVSEL__PRINT_ONELINE (1<<4) #define EVSEL__PRINT_SRCLINE (1<<5) #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) +#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) struct callchain_cursor; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 662a0a6182e7..53bb614feafb 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -108,7 +108,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, int print_oneline = print_opts & EVSEL__PRINT_ONELINE; int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; + int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW; char s = print_oneline ? ' ' : '\t'; + bool first = true; if (sample->callchain) { struct addr_location node_al; @@ -124,6 +126,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); + if (print_arrow && !first) + printed += fprintf(fp, " <-"); + if (print_ip) printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); @@ -137,7 +142,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_symoffset) { printed += __symbol__fprintf_symname_offs(node->sym, &node_al, - print_unknown_as_addr, fp); + print_unknown_as_addr, + true, fp); } else { printed += __symbol__fprintf_symname(node->sym, &node_al, print_unknown_as_addr, fp); @@ -157,6 +163,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, printed += fprintf(fp, "\n"); callchain_cursor_advance(cursor); + first = false; } } @@ -188,7 +195,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, printed += fprintf(fp, " "); if (print_symoffset) { printed += __symbol__fprintf_symname_offs(al->sym, al, - print_unknown_as_addr, fp); + print_unknown_as_addr, + true, fp); } else { printed += __symbol__fprintf_symname(al->sym, al, print_unknown_as_addr, fp); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index e50773286ef6..1d4ab53c60ca 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -280,6 +280,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) u64 lock = data_src->mem_lock; int err = 0; +#define HITM_INC(__f) \ +do { \ + stats->__f++; \ + stats->tot_hitm++; \ +} while (0) + #define P(a, b) PERF_MEM_##a##_##b stats->nr_entries++; @@ -303,7 +309,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) if (lvl & P(LVL, L2 )) stats->ld_l2hit++; if (lvl & P(LVL, L3 )) { if (snoop & P(SNOOP, HITM)) - stats->lcl_hitm++; + HITM_INC(lcl_hitm); else stats->ld_llchit++; } @@ -331,7 +337,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) if (snoop & P(SNOOP, HIT)) stats->rmt_hit++; else if (snoop & P(SNOOP, HITM)) - stats->rmt_hitm++; + HITM_INC(rmt_hitm); } if ((lvl & P(LVL, MISS))) @@ -364,6 +370,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) } #undef P +#undef HITM_INC return err; } @@ -390,6 +397,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->ld_llchit += add->ld_llchit; stats->lcl_hitm += add->lcl_hitm; stats->rmt_hitm += add->rmt_hitm; + stats->tot_hitm += add->tot_hitm; stats->rmt_hit += add->rmt_hit; stats->lcl_dram += add->lcl_dram; stats->rmt_dram += add->rmt_dram; diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index faf80403b519..40f72ee4f42a 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -59,6 +59,7 @@ struct c2c_stats { u32 ld_llchit; /* count of loads that hit LLC */ u32 lcl_hitm; /* count of loads with local HITM */ u32 rmt_hitm; /* count of loads with remote HITM */ + u32 tot_hitm; /* count of loads with local and remote HITM */ u32 rmt_hit; /* count of loads with remote hit clean; */ u32 lcl_dram; /* count of loads miss to local DRAM */ u32 rmt_dram; /* count of loads miss to remote DRAM */ diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2d0a905c879a..dec7e2d44885 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -282,7 +282,8 @@ int symbol__annotation_init(void); struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); size_t __symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, - bool unknown_as_addr, FILE *fp); + bool unknown_as_addr, + bool print_offsets, FILE *fp); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); size_t __symbol__fprintf_symname(const struct symbol *sym, diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index a680bdaa65dc..7c6b33e8e2d2 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -15,14 +15,15 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp) size_t __symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, - bool unknown_as_addr, FILE *fp) + bool unknown_as_addr, + bool print_offsets, FILE *fp) { unsigned long offset; size_t length; if (sym && sym->name) { length = fprintf(fp, "%s", sym->name); - if (al) { + if (al && print_offsets) { if (al->addr < sym->end) offset = al->addr - sym->start; else @@ -40,19 +41,19 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp) { - return __symbol__fprintf_symname_offs(sym, al, false, fp); + return __symbol__fprintf_symname_offs(sym, al, false, true, fp); } size_t __symbol__fprintf_symname(const struct symbol *sym, const struct addr_location *al, bool unknown_as_addr, FILE *fp) { - return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp); + return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp); } size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) { - return __symbol__fprintf_symname_offs(sym, NULL, false, fp); + return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp); } size_t dso__fprintf_symbols_by_name(struct dso *dso, |