diff options
33 files changed, 513 insertions, 110 deletions
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d97f84c080da..022d0acf7df0 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -330,7 +330,8 @@ struct perf_event_attr { mmap2 : 1, /* include mmap with inode data */ comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ - __reserved_1 : 38; + context_switch : 1, /* context switch data */ + __reserved_1 : 37; union { __u32 wakeup_events; /* wakeup every n events */ @@ -572,9 +573,11 @@ struct perf_event_mmap_page { /* * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on * different events so can reuse the same bit position. + * Ditto PERF_RECORD_MISC_SWITCH_OUT. */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also @@ -818,6 +821,32 @@ enum perf_event_type { */ PERF_RECORD_LOST_SAMPLES = 13, + /* + * Records a context switch in or out (flagged by + * PERF_RECORD_MISC_SWITCH_OUT). See also + * PERF_RECORD_SWITCH_CPU_WIDE. + * + * struct { + * struct perf_event_header header; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH = 14, + + /* + * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and + * next_prev_tid that are the next (switching out) or previous + * (switching in) pid/tid. + * + * struct { + * struct perf_event_header header; + * u32 next_prev_pid; + * u32 next_prev_tid; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH_CPU_WIDE = 15, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index d3dae3419b99..ce21143c0d9e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -163,6 +163,7 @@ static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; +static atomic_t nr_switch_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -2619,6 +2620,9 @@ static void perf_pmu_sched_task(struct task_struct *prev, local_irq_restore(flags); } +static void perf_event_switch(struct task_struct *task, + struct task_struct *next_prev, bool sched_in); + #define for_each_task_context_nr(ctxn) \ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) @@ -2641,6 +2645,9 @@ void __perf_event_task_sched_out(struct task_struct *task, if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(task, next, false); + if (atomic_read(&nr_switch_events)) + perf_event_switch(task, next, false); + for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); @@ -2831,6 +2838,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); + if (atomic_read(&nr_switch_events)) + perf_event_switch(task, prev, true); + if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(prev, task, true); } @@ -3454,6 +3464,10 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_task_events); if (event->attr.freq) atomic_dec(&nr_freq_events); + if (event->attr.context_switch) { + static_key_slow_dec_deferred(&perf_sched_events); + atomic_dec(&nr_switch_events); + } if (is_cgroup_event(event)) static_key_slow_dec_deferred(&perf_sched_events); if (has_branch_stack(event)) @@ -5982,6 +5996,91 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) } /* + * context_switch tracking + */ + +struct perf_switch_event { + struct task_struct *task; + struct task_struct *next_prev; + + struct { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; + } event_id; +}; + +static int perf_event_switch_match(struct perf_event *event) +{ + return event->attr.context_switch; +} + +static void perf_event_switch_output(struct perf_event *event, void *data) +{ + struct perf_switch_event *se = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_switch_match(event)) + return; + + /* Only CPU-wide events are allowed to see next/prev pid/tid */ + if (event->ctx->task) { + se->event_id.header.type = PERF_RECORD_SWITCH; + se->event_id.header.size = sizeof(se->event_id.header); + } else { + se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE; + se->event_id.header.size = sizeof(se->event_id); + se->event_id.next_prev_pid = + perf_event_pid(event, se->next_prev); + se->event_id.next_prev_tid = + perf_event_tid(event, se->next_prev); + } + + perf_event_header__init_id(&se->event_id.header, &sample, event); + + ret = perf_output_begin(&handle, event, se->event_id.header.size); + if (ret) + return; + + if (event->ctx->task) + perf_output_put(&handle, se->event_id.header); + else + perf_output_put(&handle, se->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_event_switch(struct task_struct *task, + struct task_struct *next_prev, bool sched_in) +{ + struct perf_switch_event switch_event; + + /* N.B. caller checks nr_switch_events != 0 */ + + switch_event = (struct perf_switch_event){ + .task = task, + .next_prev = next_prev, + .event_id = { + .header = { + /* .type */ + .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT, + /* .size */ + }, + /* .next_prev_pid */ + /* .next_prev_tid */ + }, + }; + + perf_event_aux(perf_event_switch_output, + &switch_event, + NULL); +} + +/* * IRQ throttle logging */ @@ -7479,6 +7578,10 @@ static void account_event(struct perf_event *event) if (atomic_inc_return(&nr_freq_events) == 1) tick_nohz_full_kick_all(); } + if (event->attr.context_switch) { + atomic_inc(&nr_switch_events); + static_key_slow_inc(&perf_sched_events.key); + } if (has_branch_stack(event)) static_key_slow_inc(&perf_sched_events.key); if (is_cgroup_event(event)) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cc25f059ab3d..fcd8a9e3d2e1 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -418,7 +418,7 @@ static int func_map_init(struct pevent *pevent) } static struct func_map * -find_func(struct pevent *pevent, unsigned long long addr) +__find_func(struct pevent *pevent, unsigned long long addr) { struct func_map *func; struct func_map key; @@ -434,6 +434,71 @@ find_func(struct pevent *pevent, unsigned long long addr) return func; } +struct func_resolver { + pevent_func_resolver_t *func; + void *priv; + struct func_map map; +}; + +/** + * pevent_set_function_resolver - set an alternative function resolver + * @pevent: handle for the pevent + * @resolver: function to be used + * @priv: resolver function private state. + * + * Some tools may have already a way to resolve kernel functions, allow them to + * keep using it instead of duplicating all the entries inside + * pevent->funclist. + */ +int pevent_set_function_resolver(struct pevent *pevent, + pevent_func_resolver_t *func, void *priv) +{ + struct func_resolver *resolver = malloc(sizeof(*resolver)); + + if (resolver == NULL) + return -1; + + resolver->func = func; + resolver->priv = priv; + + free(pevent->func_resolver); + pevent->func_resolver = resolver; + + return 0; +} + +/** + * pevent_reset_function_resolver - reset alternative function resolver + * @pevent: handle for the pevent + * + * Stop using whatever alternative resolver was set, use the default + * one instead. + */ +void pevent_reset_function_resolver(struct pevent *pevent) +{ + free(pevent->func_resolver); + pevent->func_resolver = NULL; +} + +static struct func_map * +find_func(struct pevent *pevent, unsigned long long addr) +{ + struct func_map *map; + + if (!pevent->func_resolver) + return __find_func(pevent, addr); + + map = &pevent->func_resolver->map; + map->mod = NULL; + map->addr = addr; + map->func = pevent->func_resolver->func(pevent->func_resolver->priv, + &map->addr, &map->mod); + if (map->func == NULL) + return NULL; + + return map; +} + /** * pevent_find_function - find a function by a given address * @pevent: handle for the pevent @@ -6564,6 +6629,7 @@ void pevent_free(struct pevent *pevent) free(pevent->trace_clock); free(pevent->events); free(pevent->sort_events); + free(pevent->func_resolver); free(pevent); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 063b1971eb35..204befb05a17 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -453,6 +453,10 @@ struct cmdline_list; struct func_map; struct func_list; struct event_handler; +struct func_resolver; + +typedef char *(pevent_func_resolver_t)(void *priv, + unsigned long long *addrp, char **modp); struct pevent { int ref_count; @@ -481,6 +485,7 @@ struct pevent { int cmdline_count; struct func_map *func_map; + struct func_resolver *func_resolver; struct func_list *funclist; unsigned int func_count; @@ -611,6 +616,9 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, }; +int pevent_set_function_resolver(struct pevent *pevent, + pevent_func_resolver_t *func, void *priv); +void pevent_reset_function_resolver(struct pevent *pevent); int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 29e5307945bf..63ee0408761d 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -293,6 +293,10 @@ When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. This option sets the time out limit. The default value is 500 ms. +--switch-events:: +Record context switch events i.e. events of type PERF_RECORD_SWITCH or +PERF_RECORD_SWITCH_CPU_WIDE. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c82df572fac2..e2fec5fc21e7 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -222,6 +222,10 @@ OPTIONS --show-mmap-events Display mmap related events (e.g. MMAP, MMAP2). +--show-switch-events + Display context switch events i.e. events of type PERF_RECORD_SWITCH or + PERF_RECORD_SWITCH_CPU_WIDE. + --header Show perf.data header. diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 01b06492bd6a..f62c49b35be0 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -561,6 +561,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, + .context_switch = perf_event__repipe, .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1932e27c00d8..445a64d19625 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1075,6 +1075,8 @@ struct option __record_options[] = { "opts", "AUX area tracing Snapshot Mode", ""), OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, + "Record context switch events"), OPT_END() }; @@ -1102,6 +1104,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) " system-wide mode\n"); usage_with_options(record_usage, record_options); } + if (rec->opts.record_switch_events && + !perf_can_record_switch_events()) { + ui__error("kernel does not support recording context switch events (--switch-events option)\n"); + usage_with_options(record_usage, record_options); + } if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 24809787369f..bd31380122f9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -623,6 +623,7 @@ struct perf_script { struct perf_session *session; bool show_task_events; bool show_mmap_events; + bool show_switch_events; }; static int process_attr(struct perf_tool *tool, union perf_event *event, @@ -661,7 +662,7 @@ static int process_comm_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); int ret = -1; thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); @@ -695,7 +696,7 @@ static int process_fork_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_fork(tool, event, sample, machine) < 0) return -1; @@ -727,7 +728,7 @@ static int process_exit_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); if (thread == NULL) { @@ -759,7 +760,7 @@ static int process_mmap_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_mmap(tool, event, sample, machine) < 0) return -1; @@ -790,7 +791,7 @@ static int process_mmap2_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_mmap2(tool, event, sample, machine) < 0) return -1; @@ -813,6 +814,32 @@ static int process_mmap2_event(struct perf_tool *tool, return 0; } +static int process_switch_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + + if (perf_event__process_switch(tool, event, sample, machine) < 0) + return -1; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) { + pr_debug("problem processing SWITCH event, skipping it.\n"); + return -1; + } + + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -834,6 +861,8 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap = process_mmap_event; script->tool.mmap2 = process_mmap2_event; } + if (script->show_switch_events) + script->tool.context_switch = process_switch_event; ret = perf_session__process_events(script->session); @@ -1618,6 +1647,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the fork/comm/exit events"), OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, "Show the mmap events"), + OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, + "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", @@ -1830,6 +1861,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) else symbol_conf.use_callchain = false; + if (pevent_set_function_resolver(session->tevent.pevent, + machine__resolve_kernel_addr, + &session->machines.host) < 0) { + pr_err("%s: failed to set libtraceevent function resolver\n", __func__); + return -1; + } + if (generate_script_lang) { struct stat perf_stat; int input; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 32b4d280af4f..282841b10f24 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1489,6 +1489,9 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; + if (trace_event__register_resolver(trace->host) < 0) + return -errno; + err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, trace->opts.proc_map_timeout); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 937b16aa0300..cf459f89fc9b 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -57,6 +57,7 @@ struct record_opts { bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; + bool record_switch_events; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 5acf000939ea..138a0e3431fa 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -20,6 +20,8 @@ int test__thread_map(void) TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "perf")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&map->refcnt) == 1); thread_map__put(map); /* test dummy pid */ @@ -33,6 +35,8 @@ int test__thread_map(void) TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "dummy")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&map->refcnt) == 1); thread_map__put(map); return 0; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2fe98bb0e95b..c73276db6d6f 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -137,6 +137,10 @@ struct dso { struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct { + u64 addr; + struct symbol *symbol; + } last_find_result[MAP__NR_TYPES]; void *a2l; char *symsrc_filename; unsigned int a2l_fails; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 67a977e5d0ab..7ff61274ed57 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -26,6 +26,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_AUX] = "AUX", [PERF_RECORD_ITRACE_START] = "ITRACE_START", [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", + [PERF_RECORD_SWITCH] = "SWITCH", + [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, return machine__process_lost_samples_event(machine, event, sample); } +int perf_event__process_switch(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_switch_event(machine, event); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + const char *in_out = out ? "OUT" : "IN "; + + if (event->header.type == PERF_RECORD_SWITCH) + return fprintf(fp, " %s\n", in_out); + + return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", + in_out, out ? "next" : "prev", + event->context_switch.next_prev_pid, + event->context_switch.next_prev_tid); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_ITRACE_START: ret += perf_event__fprintf_itrace_start(event, fp); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret += perf_event__fprintf_switch(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c53f36384b64..4bb2ae894c78 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -348,6 +348,12 @@ struct itrace_start_event { u32 pid, tid; }; +struct context_switch_event { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -369,6 +375,7 @@ union perf_event { struct auxtrace_error_event auxtrace_error; struct aux_event aux; struct itrace_start_event itrace_start; + struct context_switch_event context_switch; }; void perf_event__print_totals(void); @@ -418,6 +425,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_switch(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -480,6 +491,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f7d9c77ee31b..3b9f411a6b46 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1102,7 +1102,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, } static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - struct target *target) + bool has_user_cpus) { struct perf_evsel *evsel; @@ -1111,15 +1111,16 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (evsel->cpus && target->cpu_list) + if (evsel->cpus && has_user_cpus) cpu_map__put(evsel->cpus); - if (!evsel->cpus || target->cpu_list) + if (!evsel->cpus || has_user_cpus) evsel->cpus = cpu_map__get(evlist->cpus); evsel->threads = thread_map__get(evlist->threads); - if (!evsel->cpus || !evsel->threads) + if ((evlist->cpus && !evsel->cpus) || + (evlist->threads && !evsel->threads)) return -ENOMEM; } @@ -1142,7 +1143,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, target); + return perf_evlist__propagate_maps(evlist, !!target->cpu_list); out_delete_threads: thread_map__put(evlist->threads); @@ -1150,6 +1151,23 @@ out_delete_threads: return -1; } +int perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads) +{ + if (evlist->cpus) + cpu_map__put(evlist->cpus); + + evlist->cpus = cpus; + + if (evlist->threads) + thread_map__put(evlist->threads); + + evlist->threads = threads; + + return perf_evlist__propagate_maps(evlist, false); +} + int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 037633c1da9d..a8930b68456b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -114,6 +114,7 @@ void perf_evlist__close(struct perf_evlist *evlist); void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); +bool perf_can_record_switch_events(void); void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); int record_opts__config(struct record_opts *opts); @@ -152,14 +153,9 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -static inline void perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) -{ - evlist->cpus = cpus; - evlist->threads = threads; -} - +int perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 49fb7b5feb09..71f6905c7cb9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -738,6 +738,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_switch_events) + attr->context_switch = track; + if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); @@ -1127,6 +1130,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(mmap2, p_unsigned); PRINT_ATTRf(comm_exec, p_unsigned); PRINT_ATTRf(use_clockid, p_unsigned); + PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 03ace57a800c..179b2bdd157d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -923,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { int nr, i; - char *str; nr = ph->env.nr_cmdline; - str = ph->env.cmdline; fprintf(fp, "# cmdline : "); - for (i = 0; i < nr; i++) { - fprintf(fp, "%s ", str); - str += strlen(str) + 1; - } + for (i = 0; i < nr; i++) + fprintf(fp, "%s ", ph->env.cmdline_argv[i]); fputc('\n', fp); } @@ -1541,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cmdline(struct perf_file_section *section __maybe_unused, +static int process_cmdline(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { ssize_t ret; - char *str; - u32 nr, i; - struct strbuf sb; + char *str, *cmdline = NULL, **argv = NULL; + u32 nr, i, len = 0; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1558,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, nr = bswap_32(nr); ph->env.nr_cmdline = nr; - strbuf_init(&sb, 128); + + cmdline = zalloc(section->size + nr + 1); + if (!cmdline) + return -1; + + argv = zalloc(sizeof(char *) * (nr + 1)); + if (!argv) + goto error; for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); if (!str) goto error; - /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + argv[i] = cmdline + len; + memcpy(argv[i], str, strlen(str) + 1); + len += strlen(str) + 1; free(str); } - ph->env.cmdline = strbuf_detach(&sb, NULL); + ph->env.cmdline = cmdline; + ph->env.cmdline_argv = (const char **) argv; return 0; error: - strbuf_release(&sb); + free(argv); + free(cmdline); return -1; } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d57962c591..9b53b6525ce8 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -84,6 +84,7 @@ struct perf_session_env { int nr_pmu_mappings; int nr_groups; char *cmdline; + const char **cmdline_argv; char *sibling_cores; char *sibling_threads; char *numa_nodes; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d0bf1e590479..be3e00891d22 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -550,6 +550,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } +int machine__process_switch_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_switch(event, stdout); + return 0; +} + struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename) { @@ -1451,6 +1459,9 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_itrace_start_event(machine, event); break; case PERF_RECORD_LOST_SAMPLES: ret = machine__process_lost_samples_event(machine, event, sample); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret = machine__process_switch_event(machine, event); break; default: ret = -1; break; @@ -1993,3 +2004,17 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { return dsos__findnew(&machine->dsos, filename); } + +char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) +{ + struct machine *machine = vmachine; + struct map *map; + struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL); + + if (sym == NULL) + return NULL; + + *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL; + *addrp = map->unmap_ip(map, sym->start); + return sym->name; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 887798e511e9..ea5cb4a621db 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -87,6 +87,8 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); +int machine__process_switch_event(struct machine *machine __maybe_unused, + union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, @@ -237,5 +239,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); +/* + * For use with libtraceevent's pevent_set_function_resolver() + */ +char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b5a5e9c02437..ce37e95bc513 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -224,6 +224,20 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return map; } +/* + * Use this and __map__is_kmodule() for map instances that are in + * machine->kmaps, and thus have map->groups->machine all properly set, to + * disambiguate between the kernel and modules. + * + * When the need arises, introduce map__is_{kernel,kmodule)() that + * checks (map->groups != NULL && map->groups->machine != NULL && + * map->dso->kernel) before calling __map__is_{kernel,kmodule}()) + */ +bool __map__is_kernel(const struct map *map) +{ + return map->groups->machine->vmlinux_maps[map->type] == map; +} + static void map__exit(struct map *map) { BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index d73e687b224e..57829e89b78b 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -256,4 +256,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, struct map *map_groups__find_by_name(struct map_groups *mg, enum map_type type, const char *name); +bool __map__is_kernel(const struct map *map); + +static inline bool __map__is_kmodule(const struct map *map) +{ + return !__map__is_kernel(map); +} + #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 1f7becbe5e18..0d228a29526d 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel) evsel->attr.comm_exec = 1; } +static void perf_probe_context_switch(struct perf_evsel *evsel) +{ + evsel->attr.context_switch = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -95,6 +100,11 @@ static bool perf_can_comm_exec(void) return perf_probe_api(perf_probe_comm_exec); } +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) { struct perf_evsel *evsel; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ed9dc2555ec7..2d9574710543 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -180,6 +180,7 @@ static void perf_session_env__delete(struct perf_session_env *env) zfree(&env->cpuid); zfree(&env->cmdline); + zfree(&env->cmdline_argv); zfree(&env->sibling_cores); zfree(&env->sibling_threads); zfree(&env->numa_nodes); @@ -332,6 +333,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->aux = perf_event__process_aux; if (tool->itrace_start == NULL) tool->itrace_start = perf_event__process_itrace_start; + if (tool->context_switch == NULL) + tool->context_switch = perf_event__process_switch; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -470,6 +473,19 @@ static void perf_event__itrace_start_swap(union perf_event *event, swap_sample_id_all(event, &event->itrace_start + 1); } +static void perf_event__switch_swap(union perf_event *event, bool sample_id_all) +{ + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + event->context_switch.next_prev_pid = + bswap_32(event->context_switch.next_prev_pid); + event->context_switch.next_prev_tid = + bswap_32(event->context_switch.next_prev_tid); + } + + if (sample_id_all) + swap_sample_id_all(event, &event->context_switch + 1); +} + static void perf_event__throttle_swap(union perf_event *event, bool sample_id_all) { @@ -632,6 +648,8 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_AUX] = perf_event__aux_swap, [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, + [PERF_RECORD_SWITCH] = perf_event__switch_swap, + [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1093,6 +1111,9 @@ static int machines__deliver_event(struct machines *machines, return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + return tool->context_switch(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 45c2e3a8316b..725640fd7cd8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -444,7 +444,12 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { - return symbols__find(&dso->symbols[type], addr); + if (dso->last_find_result[type].addr != addr) { + dso->last_find_result[type].addr = addr; + dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); + } + + return dso->last_find_result[type].symbol; } struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c307dd438286..cab8cc24831b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -46,6 +46,7 @@ struct perf_tool { lost_samples, aux, itrace_start, + context_switch, throttle, unthrottle; event_attr_op attr; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index eb72716017ac..22245986e59e 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -341,20 +341,14 @@ out: static int record_proc_kallsyms(void) { - unsigned int size; - const char *path = "/proc/kallsyms"; - struct stat st; - int ret, err = 0; - - ret = stat(path, &st); - if (ret < 0) { - /* not found */ - size = 0; - if (write(output_fd, &size, 4) != 4) - err = -EIO; - return err; - } - return record_file(path, 4); + unsigned long long size = 0; + /* + * Just to keep older perf.data file parsers happy, record a zero + * sized kallsyms file, i.e. do the same thing that was done when + * /proc/kallsyms (or something specified via --kallsyms, in a + * different path) couldn't be read. + */ + return write(output_fd, &size, 4) != 4 ? -EIO : 0; } static int record_ftrace_printk(void) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index d4957418657e..8ff7d620d942 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -135,36 +135,6 @@ void event_format__print(struct event_format *event, return event_format__fprintf(event, cpu, data, size, stdout); } -void parse_proc_kallsyms(struct pevent *pevent, - char *file, unsigned int size __maybe_unused) -{ - unsigned long long addr; - char *func; - char *line; - char *next = NULL; - char *addr_str; - char *mod; - char *fmt = NULL; - - line = strtok_r(file, "\n", &next); - while (line) { - mod = NULL; - addr_str = strtok_r(line, " ", &fmt); - addr = strtoull(addr_str, NULL, 16); - /* skip character */ - strtok_r(NULL, " ", &fmt); - func = strtok_r(NULL, "\t", &fmt); - mod = strtok_r(NULL, "]", &fmt); - /* truncate the extra '[' */ - if (mod) - mod = mod + 1; - - pevent_register_function(pevent, func, addr, mod); - - line = strtok_r(NULL, "\n", &next); - } -} - void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 54d9e9b548a8..b67a0ccf5ab9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -162,25 +162,23 @@ out: static int read_proc_kallsyms(struct pevent *pevent) { unsigned int size; - char *buf; size = read4(pevent); if (!size) return 0; - - buf = malloc(size + 1); - if (buf == NULL) - return -1; - - if (do_read(buf, size) < 0) { - free(buf); - return -1; - } - buf[size] = '\0'; - - parse_proc_kallsyms(pevent, buf, size); - - free(buf); + /* + * Just skip it, now that we configure libtraceevent to use the + * tools/perf/ symbol resolver. + * + * We need to skip it so that we can continue parsing old perf.data + * files, that contains this /proc/kallsyms payload. + * + * Newer perf.data files will have just the 4-bytes zeros "kallsyms + * payload", so that older tools can continue reading it and interpret + * it as "no kallsyms payload is present". + */ + lseek(input_fd, size, SEEK_CUR); + trace_data_size += size; return 0; } diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 6322d37164c5..667bd109d16f 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <traceevent/event-parse.h> #include "trace-event.h" +#include "machine.h" #include "util.h" /* @@ -19,6 +20,7 @@ * there. */ static struct trace_event tevent; +static bool tevent_initialized; int trace_event__init(struct trace_event *t) { @@ -32,6 +34,32 @@ int trace_event__init(struct trace_event *t) return pevent ? 0 : -1; } +static int trace_event__init2(void) +{ + int be = traceevent_host_bigendian(); + struct pevent *pevent; + + if (trace_event__init(&tevent)) + return -1; + + pevent = tevent.pevent; + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, be); + pevent_set_host_bigendian(pevent, be); + tevent_initialized = true; + return 0; +} + +int trace_event__register_resolver(struct machine *machine) +{ + if (!tevent_initialized && trace_event__init2()) + return -1; + + return pevent_set_function_resolver(tevent.pevent, + machine__resolve_kernel_addr, + machine); +} + void trace_event__cleanup(struct trace_event *t) { traceevent_unload_plugins(t->plugin_list, t->pevent); @@ -62,21 +90,8 @@ tp_format(const char *sys, const char *name) struct event_format* trace_event__tp_format(const char *sys, const char *name) { - static bool initialized; - - if (!initialized) { - int be = traceevent_host_bigendian(); - struct pevent *pevent; - - if (trace_event__init(&tevent)) - return NULL; - - pevent = tevent.pevent; - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, be); - pevent_set_host_bigendian(pevent, be); - initialized = true; - } + if (!tevent_initialized && trace_event__init2()) + return NULL; return tp_format(sys, name); } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d5168f0be4ec..568128c3284a 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -18,6 +18,7 @@ struct trace_event { int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); +int trace_event__register_resolver(struct machine *machine); struct event_format* trace_event__tp_format(const char *sys, const char *name); |