From 0d37aa34f8806bb443dd3c8621fd9bdbb50c58bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Jan 2012 14:08:15 -0200 Subject: perf tools: Introduce per user view The new --uid command line option will show only the tasks for a given user, using the proc interface to figure out the existing tasks. Kernel work is needed to close races at startup, but this should already be useful in many use cases. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-bdnspm000gw2l984a2t53o8z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 64f8bee31ced..92af1688bae4 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -188,6 +188,7 @@ void pthread__unblock_sigwinch(void); struct perf_record_opts { pid_t target_pid; pid_t target_tid; + uid_t uid; bool call_graph; bool group; bool inherit_stat; -- cgit v1.2.3 From b52956c961be3a04182ae7b776623531601e0fb7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 8 Feb 2012 09:32:52 -0700 Subject: perf tools: Allow multiple threads or processes in record, stat, top Allow a user to collect events for multiple threads or processes using a comma separated list. e.g., collect data on a VM and its vhost thread: perf top -p 21483,21485 perf stat -p 21483,21485 -ddd perf record -p 21483,21485 or monitoring vcpu threads perf top -t 21488,21489 perf stat -t 21488,21489 -ddd perf record -t 21488,21489 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1328718772-16688-1-git-send-email-dsahern@gmail.com Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 +- tools/perf/Documentation/perf-stat.txt | 4 +- tools/perf/Documentation/perf-top.txt | 4 +- tools/perf/builtin-record.c | 10 +-- tools/perf/builtin-stat.c | 31 ++++---- tools/perf/builtin-test.c | 2 - tools/perf/builtin-top.c | 12 ++- tools/perf/perf.h | 4 +- tools/perf/util/evlist.c | 10 +-- tools/perf/util/evlist.h | 4 +- tools/perf/util/evsel.c | 2 +- tools/perf/util/python-ext-sources | 2 + tools/perf/util/thread_map.c | 128 +++++++++++++++++++++++++++++++ tools/perf/util/thread_map.h | 4 + tools/perf/util/top.c | 10 +-- tools/perf/util/top.h | 2 +- tools/perf/util/usage.c | 6 +- tools/perf/util/util.h | 2 +- 18 files changed, 185 insertions(+), 56 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index ff9a66e0d4e4..a5766b4b0125 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -52,11 +52,11 @@ OPTIONS -p:: --pid=:: - Record events on existing process ID. + Record events on existing process ID (comma separated list). -t:: --tid=:: - Record events on existing thread ID. + Record events on existing thread ID (comma separated list). -u:: --uid=:: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8966b9ab2014..2fa173b51970 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -35,11 +35,11 @@ OPTIONS child tasks do not inherit counters -p:: --pid=:: - stat events on existing process id + stat events on existing process id (comma separated list) -t:: --tid=:: - stat events on existing thread id + stat events on existing thread id (comma separated list) -a:: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index ab1454ed450f..4a5680cb242e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -72,11 +72,11 @@ Default is to monitor all CPUS. -p :: --pid=:: - Profile events on existing Process ID. + Profile events on existing Process ID (comma separated list). -t :: --tid=:: - Profile events on existing thread ID. + Profile events on existing thread ID (comma separated list). -u:: --uid=:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d6d1c6cdcfe6..08ed24b66ffe 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -645,8 +645,6 @@ static const char * const record_usage[] = { */ static struct perf_record record = { .opts = { - .target_pid = -1, - .target_tid = -1, .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, @@ -670,9 +668,9 @@ const struct option record_options[] = { parse_events_option), OPT_CALLBACK(0, "filter", &record.evlist, "filter", "event filter", parse_filter), - OPT_INTEGER('p', "pid", &record.opts.target_pid, + OPT_STRING('p', "pid", &record.opts.target_pid, "pid", "record events on existing process id"), - OPT_INTEGER('t', "tid", &record.opts.target_tid, + OPT_STRING('t', "tid", &record.opts.target_tid, "tid", "record events on existing thread id"), OPT_INTEGER('r', "realtime", &record.realtime_prio, "collect data with this RT SCHED_FIFO priority"), @@ -739,7 +737,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, record_options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && + if (!argc && !rec->opts.target_pid && !rec->opts.target_tid && !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str) usage_with_options(record_usage, record_options); @@ -785,7 +783,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1) goto out_free_fd; - if (rec->opts.target_pid != -1) + if (rec->opts.target_pid) rec->opts.target_tid = rec->opts.target_pid; if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d14b37ad7638..ea40e4e8b227 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -182,8 +182,8 @@ static int run_count = 1; static bool no_inherit = false; static bool scale = true; static bool no_aggr = false; -static pid_t target_pid = -1; -static pid_t target_tid = -1; +static const char *target_pid; +static const char *target_tid; static pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, if (system_wide) return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group, group_fd); - if (target_pid == -1 && target_tid == -1) { + if (!target_pid && !target_tid) { attr->disabled = 1; attr->enable_on_exec = 1; } @@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv) exit(-1); } - if (target_tid == -1 && target_pid == -1 && !system_wide) + if (!target_tid && !target_pid && !system_wide) evsel_list->threads->map[0] = child_pid; /* @@ -968,14 +968,14 @@ static void print_stat(int argc, const char **argv) if (!csv_output) { fprintf(output, "\n"); fprintf(output, " Performance counter stats for "); - if(target_pid == -1 && target_tid == -1) { + if (!target_pid && !target_tid) { fprintf(output, "\'%s", argv[0]); for (i = 1; i < argc; i++) fprintf(output, " %s", argv[i]); - } else if (target_pid != -1) - fprintf(output, "process id \'%d", target_pid); + } else if (target_pid) + fprintf(output, "process id \'%s", target_pid); else - fprintf(output, "thread id \'%d", target_tid); + fprintf(output, "thread id \'%s", target_tid); fprintf(output, "\'"); if (run_count > 1) @@ -1049,10 +1049,10 @@ static const struct option options[] = { "event filter", parse_filter), OPT_BOOLEAN('i', "no-inherit", &no_inherit, "child tasks do not inherit counters"), - OPT_INTEGER('p', "pid", &target_pid, - "stat events on existing process id"), - OPT_INTEGER('t', "tid", &target_tid, - "stat events on existing thread id"), + OPT_STRING('p', "pid", &target_pid, "pid", + "stat events on existing process id"), + OPT_STRING('t', "tid", &target_tid, "tid", + "stat events on existing thread id"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, @@ -1190,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) } else if (big_num_opt == 0) /* User passed --no-big-num */ big_num = false; - if (!argc && target_pid == -1 && target_tid == -1) + if (!argc && !target_pid && !target_tid) usage_with_options(stat_usage, options); if (run_count <= 0) usage_with_options(stat_usage, options); @@ -1206,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (add_default_attributes()) goto out; - if (target_pid != -1) + if (target_pid) target_tid = target_pid; - evsel_list->threads = thread_map__new(target_pid, target_tid, UINT_MAX); + evsel_list->threads = thread_map__new_str(target_pid, + target_tid, UINT_MAX); if (evsel_list->threads == NULL) { pr_err("Problems finding threads of monitor\n"); usage_with_options(stat_usage, options); diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 70c4eb2bdf72..0f151952a770 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1010,8 +1010,6 @@ realloc: static int test__PERF_RECORD(void) { struct perf_record_opts opts = { - .target_pid = -1, - .target_tid = -1, .no_delay = true, .freq = 10, .mmap_pages = 256, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d869b214ada2..94d55cb2048c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -965,7 +965,7 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - if (top->target_tid != -1 || top->uid != UINT_MAX) + if (top->target_tid || top->uid != UINT_MAX) perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, perf_event__process, &top->session->host_machine); @@ -1103,8 +1103,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) struct perf_top top = { .count_filter = 5, .delay_secs = 2, - .target_pid = -1, - .target_tid = -1, .uid = UINT_MAX, .freq = 1000, /* 1 KHz */ .sample_id_all_avail = true, @@ -1118,9 +1116,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) parse_events_option), OPT_INTEGER('c', "count", &top.default_interval, "event period to sample"), - OPT_INTEGER('p', "pid", &top.target_pid, + OPT_STRING('p', "pid", &top.target_pid, "pid", "profile events on existing process id"), - OPT_INTEGER('t', "tid", &top.target_tid, + OPT_STRING('t', "tid", &top.target_tid, "tid", "profile events on existing thread id"), OPT_BOOLEAN('a', "all-cpus", &top.system_wide, "system-wide collection from all CPUs"), @@ -1210,13 +1208,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) goto out_delete_evlist; /* CPU and PID are mutually exclusive */ - if (top.target_tid > 0 && top.cpu_list) { + if (top.target_tid && top.cpu_list) { printf("WARNING: PID switch overriding CPU\n"); sleep(1); top.cpu_list = NULL; } - if (top.target_pid != -1) + if (top.target_pid) top.target_tid = top.target_pid; if (perf_evlist__create_maps(top.evlist, top.target_pid, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 92af1688bae4..deb17dba4a5b 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -186,8 +186,8 @@ extern const char perf_version_string[]; void pthread__unblock_sigwinch(void); struct perf_record_opts { - pid_t target_pid; - pid_t target_tid; + const char *target_pid; + const char *target_tid; uid_t uid; bool call_graph; bool group; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a57a8cfc5d90..5c61dc57d7c7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -593,15 +593,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, prot, mask); } -int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, uid_t uid, const char *cpu_list) +int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, + const char *target_tid, uid_t uid, const char *cpu_list) { - evlist->threads = thread_map__new(target_pid, target_tid, uid); + evlist->threads = thread_map__new_str(target_pid, target_tid, uid); if (evlist->threads == NULL) return -1; - if (uid != UINT_MAX || (cpu_list == NULL && target_tid != -1)) + if (uid != UINT_MAX || (cpu_list == NULL && target_tid)) evlist->cpus = cpu_map__dummy_new(); else evlist->cpus = cpu_map__new(cpu_list); @@ -820,7 +820,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, exit(-1); } - if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) + if (!opts->system_wide && !opts->target_tid && !opts->target_pid) evlist->threads->map[0] = evlist->workload.pid; close(child_ready_pipe[1]); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 1b4282be8fe7..21f1c9e57f13 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, evlist->threads = threads; } -int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t tid, uid_t uid, const char *cpu_list); +int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, + const char *tid, uid_t uid, const char *cpu_list); void perf_evlist__delete_maps(struct perf_evlist *evlist); int perf_evlist__set_filters(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9a11f9edac12..f910f50136d0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -130,7 +130,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) attr->mmap = track; attr->comm = track; - if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { + if (!opts->target_pid && !opts->target_tid && !opts->system_wide) { attr->disabled = 1; attr->enable_on_exec = 1; } diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index ff606f482a7c..2884e67ee625 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -15,3 +15,5 @@ util/util.c util/xyarray.c util/cgroup.c util/debugfs.c +util/strlist.c +../../lib/rbtree.c diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 3d4b6c5931b9..e15983cf077d 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -6,6 +6,8 @@ #include #include #include +#include "strlist.h" +#include #include "thread_map.h" /* Skip "." and ".." directories */ @@ -152,6 +154,132 @@ struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) return thread_map__new_by_tid(tid); } +static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) +{ + struct thread_map *threads = NULL, *nt; + char name[256]; + int items, total_tasks = 0; + struct dirent **namelist = NULL; + int i, j = 0; + pid_t pid, prev_pid = INT_MAX; + char *end_ptr; + struct str_node *pos; + struct strlist *slist = strlist__new(false, pid_str); + + if (!slist) + return NULL; + + strlist__for_each(pos, slist) { + pid = strtol(pos->s, &end_ptr, 10); + + if (pid == INT_MIN || pid == INT_MAX || + (*end_ptr != '\0' && *end_ptr != ',')) + goto out_free_threads; + + if (pid == prev_pid) + continue; + + sprintf(name, "/proc/%d/task", pid); + items = scandir(name, &namelist, filter, NULL); + if (items <= 0) + goto out_free_threads; + + total_tasks += items; + nt = realloc(threads, (sizeof(*threads) + + sizeof(pid_t) * total_tasks)); + if (nt == NULL) + goto out_free_threads; + + threads = nt; + + if (threads) { + for (i = 0; i < items; i++) + threads->map[j++] = atoi(namelist[i]->d_name); + threads->nr = total_tasks; + } + + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + + if (!threads) + break; + } + +out: + strlist__delete(slist); + return threads; + +out_free_threads: + free(threads); + threads = NULL; + goto out; +} + +static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) +{ + struct thread_map *threads = NULL, *nt; + int ntasks = 0; + pid_t tid, prev_tid = INT_MAX; + char *end_ptr; + struct str_node *pos; + struct strlist *slist; + + /* perf-stat expects threads to be generated even if tid not given */ + if (!tid_str) { + threads = malloc(sizeof(*threads) + sizeof(pid_t)); + if (threads != NULL) { + threads->map[1] = -1; + threads->nr = 1; + } + return threads; + } + + slist = strlist__new(false, tid_str); + if (!slist) + return NULL; + + strlist__for_each(pos, slist) { + tid = strtol(pos->s, &end_ptr, 10); + + if (tid == INT_MIN || tid == INT_MAX || + (*end_ptr != '\0' && *end_ptr != ',')) + goto out_free_threads; + + if (tid == prev_tid) + continue; + + ntasks++; + nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); + + if (nt == NULL) + goto out_free_threads; + + threads = nt; + threads->map[ntasks - 1] = tid; + threads->nr = ntasks; + } +out: + return threads; + +out_free_threads: + free(threads); + threads = NULL; + goto out; +} + +struct thread_map *thread_map__new_str(const char *pid, const char *tid, + uid_t uid) +{ + if (pid) + return thread_map__new_by_pid_str(pid); + + if (!tid && uid != UINT_MAX) + return thread_map__new_by_uid(uid); + + return thread_map__new_by_tid_str(tid); +} + void thread_map__delete(struct thread_map *threads) { free(threads); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index c75ddbaba005..7da80f14418b 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -13,6 +13,10 @@ struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); + +struct thread_map *thread_map__new_str(const char *pid, + const char *tid, uid_t uid); + void thread_map__delete(struct thread_map *threads); size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index e4370ca27193..09fe579ccafb 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -69,11 +69,11 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) ret += SNPRINTF(bf + ret, size - ret, "], "); - if (top->target_pid != -1) - ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", + if (top->target_pid) + ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", top->target_pid); - else if (top->target_tid != -1) - ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", + else if (top->target_tid) + ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", top->target_tid); else if (top->uid_str != NULL) ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", @@ -85,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); else { - if (top->target_tid != -1) + if (top->target_tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index def3e53e0fe0..49eb8481f19f 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -23,7 +23,7 @@ struct perf_top { u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; int freq; - pid_t target_pid, target_tid; + const char *target_pid, *target_tid; uid_t uid; bool hide_kernel_symbols, hide_user_symbols, zero; bool system_wide; diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index d0c013934f30..52bb07c6442a 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -83,7 +83,7 @@ void warning(const char *warn, ...) va_end(params); } -uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid) +uid_t parse_target_uid(const char *str, const char *tid, const char *pid) { struct passwd pwd, *result; char buf[1024]; @@ -91,8 +91,8 @@ uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid) if (str == NULL) return UINT_MAX; - /* CPU and PID are mutually exclusive */ - if (tid > 0 || pid > 0) { + /* UID and PID are mutually exclusive */ + if (tid || pid) { ui__warning("PID/TID switch overriding UID\n"); sleep(1); return UINT_MAX; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 232d17ef3e60..7917b09430bd 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -245,7 +245,7 @@ struct perf_event_attr; void event_attr_init(struct perf_event_attr *attr); -uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid); +uid_t parse_target_uid(const char *str, const char *tid, const char *pid); #define _STR(x) #x #define STR(x) _STR(x) -- cgit v1.2.3 From 7e1ccd3804281fc0755eb726b654469c40a96d89 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 9 Feb 2012 16:12:38 +0100 Subject: perf tools: cleanup initialization of attr->size The perf_event_attr size needs to be initialized in all cases because it captures the ABI version. This patch moves the initialization of the field from the perf_event_open() syscall stub to its proper location in the event_attr_init(). Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120209151238.GA10272@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 1 - tools/perf/util/util.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index deb17dba4a5b..03a045618148 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -167,7 +167,6 @@ sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { - attr->size = sizeof(*attr); return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index fb25d1329218..8109a907841e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; + /* to capture ABI version */ + attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) -- cgit v1.2.3 From 0c9781280fb672ca09c997df3f14ba506bbdb977 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Feb 2012 14:05:30 -0200 Subject: perf tools: Handle kernels that don't support attr.exclude_{guest,host} Just fall back to resetting those fields, if set, warning the user that that feature is not available. If guest samples appear they will just be discarded because no struct machine will be found and thus the event will be accounted as not handled and dropped, see 0c09571. Reported-by: Namhyung Kim Tested-by: Joerg Roedel Cc: David Ahern Cc: Frederic Weisbecker Cc: Joerg Roedel Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vuwxig36mzprl5n7nzvnxxsh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 29 ++++++++++++++++++++--------- tools/perf/builtin-top.c | 23 +++++++++++++++++------ tools/perf/perf.h | 1 + tools/perf/util/top.h | 1 + 4 files changed, 39 insertions(+), 15 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 08ed24b66ffe..d6c10e8ff6b6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -205,6 +205,9 @@ static void perf_record__open(struct perf_record *rec) if (opts->group && pos != first) group_fd = first->fd; +fallback_missing_features: + if (opts->exclude_guest_missing) + attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; try_again: @@ -218,15 +221,23 @@ try_again: } else if (err == ENODEV && opts->cpu_list) { die("No such device - did you specify" " an out-of-range profile CPU?\n"); - } else if (err == EINVAL && opts->sample_id_all_avail) { - /* - * Old kernel, no attr->sample_id_type_all field - */ - opts->sample_id_all_avail = false; - if (!opts->sample_time && !opts->raw_samples && !time_needed) - attr->sample_type &= ~PERF_SAMPLE_TIME; - - goto retry_sample_id; + } else if (err == EINVAL) { + if (!opts->exclude_guest_missing && + (attr->exclude_guest || attr->exclude_host)) { + pr_debug("Old kernel, cannot exclude " + "guest or host samples.\n"); + opts->exclude_guest_missing = true; + goto fallback_missing_features; + } else if (opts->sample_id_all_avail) { + /* + * Old kernel, no attr->sample_id_type_all field + */ + opts->sample_id_all_avail = false; + if (!opts->sample_time && !opts->raw_samples && !time_needed) + attr->sample_type &= ~PERF_SAMPLE_TIME; + + goto retry_sample_id; + } } /* diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5a88c0dadf15..02e11ff5a729 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -872,6 +872,9 @@ static void perf_top__start_counters(struct perf_top *top) attr->mmap = 1; attr->comm = 1; attr->inherit = top->inherit; +fallback_missing_features: + if (top->exclude_guest_missing) + attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; try_again: @@ -883,12 +886,20 @@ try_again: if (err == EPERM || err == EACCES) { ui__error_paranoid(); goto out_err; - } else if (err == EINVAL && top->sample_id_all_avail) { - /* - * Old kernel, no attr->sample_id_type_all field - */ - top->sample_id_all_avail = false; - goto retry_sample_id; + } else if (err == EINVAL) { + if (!top->exclude_guest_missing && + (attr->exclude_guest || attr->exclude_host)) { + pr_debug("Old kernel, cannot exclude " + "guest or host samples.\n"); + top->exclude_guest_missing = true; + goto fallback_missing_features; + } else if (top->sample_id_all_avail) { + /* + * Old kernel, no attr->sample_id_type_all field + */ + top->sample_id_all_avail = false; + goto retry_sample_id; + } } /* * If it's cycles then fall back to hrtimer diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 03a045618148..8b9c43635f4d 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -199,6 +199,7 @@ struct perf_record_opts { bool sample_address; bool sample_time; bool sample_id_all_avail; + bool exclude_guest_missing; bool system_wide; bool period; unsigned int freq; diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 49eb8481f19f..7dea8912aa97 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -35,6 +35,7 @@ struct perf_top { bool inherit; bool group; bool sample_id_all_avail; + bool exclude_guest_missing; bool dump_symtab; const char *cpu_list; struct hist_entry *sym_filter_entry; -- cgit v1.2.3 From 808e122630d45a7f036d25582474d70548a87e2c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Feb 2012 14:18:57 -0200 Subject: perf tools: Invert the sample_id_all logic Instead of requiring that users of perf_record_opts set .sample_id_all_avail to true, just invert the logic, using .sample_id_all_missing, that doesn't need to be explicitely initialized since gcc will zero members ommitted in a struct initialization. Just like the newly introduced .exclude_{guest,host} feature test. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ab772uzk78cwybihf0vt7kxw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 7 +++---- tools/perf/builtin-test.c | 1 - tools/perf/builtin-top.c | 7 +++---- tools/perf/perf.h | 2 +- tools/perf/util/evsel.c | 4 ++-- tools/perf/util/top.h | 2 +- 6 files changed, 10 insertions(+), 13 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d6c10e8ff6b6..75d230fef202 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -209,7 +209,7 @@ fallback_missing_features: if (opts->exclude_guest_missing) attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: - attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; + attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(pos, evlist->cpus, evlist->threads, opts->group, group_fd) < 0) { @@ -228,11 +228,11 @@ try_again: "guest or host samples.\n"); opts->exclude_guest_missing = true; goto fallback_missing_features; - } else if (opts->sample_id_all_avail) { + } else if (!opts->sample_id_all_missing) { /* * Old kernel, no attr->sample_id_type_all field */ - opts->sample_id_all_avail = false; + opts->sample_id_all_missing = true; if (!opts->sample_time && !opts->raw_samples && !time_needed) attr->sample_type &= ~PERF_SAMPLE_TIME; @@ -660,7 +660,6 @@ static struct perf_record record = { .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .freq = 1000, - .sample_id_all_avail = true, }, .write_mode = WRITE_FORCE, .file_new = true, diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 0f151952a770..3e087ce8daa6 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1013,7 +1013,6 @@ static int test__PERF_RECORD(void) .no_delay = true, .freq = 10, .mmap_pages = 256, - .sample_id_all_avail = true, }; cpu_set_t *cpu_mask = NULL; size_t cpu_mask_size = 0; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 02e11ff5a729..e3c63aef8efc 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -876,7 +876,7 @@ fallback_missing_features: if (top->exclude_guest_missing) attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: - attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; + attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(counter, top->evlist->cpus, top->evlist->threads, top->group, @@ -893,11 +893,11 @@ try_again: "guest or host samples.\n"); top->exclude_guest_missing = true; goto fallback_missing_features; - } else if (top->sample_id_all_avail) { + } else if (!top->sample_id_all_missing) { /* * Old kernel, no attr->sample_id_type_all field */ - top->sample_id_all_avail = false; + top->sample_id_all_missing = true; goto retry_sample_id; } } @@ -1122,7 +1122,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) .delay_secs = 2, .uid = UINT_MAX, .freq = 1000, /* 1 KHz */ - .sample_id_all_avail = true, .mmap_pages = 128, .sym_pcnt_filter = 5, }; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 8b9c43635f4d..f0227e93665d 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -198,7 +198,7 @@ struct perf_record_opts { bool raw_samples; bool sample_address; bool sample_time; - bool sample_id_all_avail; + bool sample_id_all_missing; bool exclude_guest_missing; bool system_wide; bool period; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f910f50136d0..302d49a9f985 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ - attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; + attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; attr->inherit = !opts->no_inherit; attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | @@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) if (opts->period) attr->sample_type |= PERF_SAMPLE_PERIOD; - if (opts->sample_id_all_avail && + if (!opts->sample_id_all_missing && (opts->sample_time || opts->system_wide || !opts->no_inherit || opts->cpu_list)) attr->sample_type |= PERF_SAMPLE_TIME; diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 7dea8912aa97..ce61cb2d1acf 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -34,7 +34,7 @@ struct perf_top { bool vmlinux_warned; bool inherit; bool group; - bool sample_id_all_avail; + bool sample_id_all_missing; bool exclude_guest_missing; bool dump_symtab; const char *cpu_list; -- cgit v1.2.3 From b5387528f31d98acedf06e930554b563d87e2383 Mon Sep 17 00:00:00 2001 From: Roberto Agostino Vitillo Date: Thu, 9 Feb 2012 23:21:01 +0100 Subject: perf tools: Add code to support PERF_SAMPLE_BRANCH_STACK This patch adds: - ability to parse samples with PERF_SAMPLE_BRANCH_STACK - sort on branches (dso_from, symbol_from, dso_to, symbol_to, mispredict) - build histograms on branches Signed-off-by: Roberto Agostino Vitillo Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: robert.richter@amd.com Cc: ming.m.lin@intel.com Cc: andi@firstfloor.org Cc: asharma@fb.com Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1328826068-11713-12-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/perf.h | 17 +++ tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 10 ++ tools/perf/util/hist.c | 122 +++++++++++++++----- tools/perf/util/hist.h | 11 ++ tools/perf/util/session.c | 72 ++++++++++++ tools/perf/util/session.h | 4 + tools/perf/util/sort.c | 287 ++++++++++++++++++++++++++++++++++++++-------- tools/perf/util/sort.h | 9 ++ tools/perf/util/symbol.h | 13 +++ 10 files changed, 468 insertions(+), 78 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f0227e93665d..358f40135c4d 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -179,6 +179,23 @@ struct ip_callchain { u64 ips[0]; }; +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 reserved:62; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; + extern bool perf_host, perf_guest; extern const char perf_version_string[]; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e5..1b197280c621 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -81,6 +81,7 @@ struct perf_sample { u32 raw_size; void *raw_data; struct ip_callchain *callchain; + struct branch_stack *branch_stack; }; #define BUILD_ID_SIZE 20 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 302d49a9f985..a1fd1cd2a5af 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -576,6 +576,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, data->raw_data = (void *) pdata; } + if (type & PERF_SAMPLE_BRANCH_STACK) { + u64 sz; + + data->branch_stack = (struct branch_stack *)array; + array++; /* nr */ + + sz = data->branch_stack->nr * sizeof(struct branch_entry); + sz /= sizeof(u64); + array += sz; + } return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f505d1abac7..8380c3db1c92 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) hists__set_col_len(hists, col, 0); } +static void hists__set_unres_dso_col_len(struct hists *hists, int dso) +{ + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; + + if (hists__col_len(hists, dso) < unresolved_col_width && + !symbol_conf.col_width_list_str && !symbol_conf.field_sep && + !symbol_conf.dso_list) + hists__set_col_len(hists, dso, unresolved_col_width); +} + static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; if (h->ms.sym) - hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); - else { - const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - - if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && - !symbol_conf.col_width_list_str && !symbol_conf.field_sep && - !symbol_conf.dso_list) - hists__set_col_len(hists, HISTC_DSO, - unresolved_col_width); - } + hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); + else + hists__set_unres_dso_col_len(hists, HISTC_DSO); len = thread__comm_len(h->thread); if (hists__new_col_len(hists, HISTC_COMM, len)) @@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) len = dso__name_len(h->ms.map->dso); hists__new_col_len(hists, HISTC_DSO, len); } + + if (h->branch_info) { + int symlen; + /* + * +4 accounts for '[x] ' priv level info + * +2 account of 0x prefix on raw addresses + */ + if (h->branch_info->from.sym) { + symlen = (int)h->branch_info->from.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + + symlen = dso__name_len(h->branch_info->from.map->dso); + hists__new_col_len(hists, HISTC_DSO_FROM, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); + } + + if (h->branch_info->to.sym) { + symlen = (int)h->branch_info->to.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + + symlen = dso__name_len(h->branch_info->to.map->dso); + hists__new_col_len(hists, HISTC_DSO_TO, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); + } + } } static void hist_entry__add_cpumode_period(struct hist_entry *he, @@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -struct hist_entry *__hists__add_entry(struct hists *hists, +static struct hist_entry *add_hist_entry(struct hists *hists, + struct hist_entry *entry, struct addr_location *al, - struct symbol *sym_parent, u64 period) + u64 period) { struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct hist_entry entry = { - .thread = al->thread, - .ms = { - .map = al->map, - .sym = al->sym, - }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, - .period = period, - .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), - }; int cmp; pthread_mutex_lock(&hists->lock); @@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = hist_entry__cmp(&entry, he); + cmp = hist_entry__cmp(entry, he); if (!cmp) { he->period += period; @@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(&entry); + he = hist_entry__new(entry); if (!he) goto out_unlock; @@ -252,6 +275,51 @@ out_unlock: return he; } +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = bi->to.map, + .sym = bi->to.sym, + }, + .cpu = al->cpu, + .ip = bi->to.addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + .branch_info = bi, + }; + + return add_hist_entry(self, &entry, al, period); +} + +struct hist_entry *__hists__add_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .cpu = al->cpu, + .ip = al->addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + }; + + return add_hist_entry(self, &entry, al, period); +} + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 48e5acd1e862..9413f3e31fea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -42,6 +42,11 @@ enum hist_column { HISTC_COMM, HISTC_PARENT, HISTC_CPU, + HISTC_MISPREDICT, + HISTC_SYMBOL_FROM, + HISTC_SYMBOL_TO, + HISTC_DSO_FROM, + HISTC_DSO_TO, HISTC_NR_COLS, /* Last entry */ }; @@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period); + void hists__output_resort(struct hists *self); void hists__output_resort_threaded(struct hists *hists); void hists__collapse_resort(struct hists *self); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9f833cf9c6a9..bec8a328b1b8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -229,6 +229,63 @@ static bool symbol__match_parent_regex(struct symbol *sym) return 0; } +static const u8 cpumodes[] = { + PERF_RECORD_MISC_USER, + PERF_RECORD_MISC_KERNEL, + PERF_RECORD_MISC_GUEST_USER, + PERF_RECORD_MISC_GUEST_KERNEL +}; +#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) + +static void ip__resolve_ams(struct machine *self, struct thread *thread, + struct addr_map_symbol *ams, + u64 ip) +{ + struct addr_location al; + size_t i; + u8 m; + + memset(&al, 0, sizeof(al)); + + for (i = 0; i < NCPUMODES; i++) { + m = cpumodes[i]; + /* + * We cannot use the header.misc hint to determine whether a + * branch stack address is user, kernel, guest, hypervisor. + * Branches may straddle the kernel/user/hypervisor boundaries. + * Thus, we have to try consecutively until we find a match + * or else, the symbol is unknown + */ + thread__find_addr_location(thread, self, m, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym) + goto found; + } +found: + ams->addr = ip; + ams->sym = al.sym; + ams->map = al.map; +} + +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thr, + struct branch_stack *bs) +{ + struct branch_info *bi; + unsigned int i; + + bi = calloc(bs->nr, sizeof(struct branch_info)); + if (!bi) + return NULL; + + for (i = 0; i < bs->nr; i++) { + ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); + bi[i].flags = bs->entries[i].flags; + } + return bi; +} + int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, struct thread *thread, struct ip_callchain *chain, @@ -697,6 +754,18 @@ static void callchain__printf(struct perf_sample *sample) i, sample->callchain->ips[i]); } +static void branch_stack__printf(struct perf_sample *sample) +{ + uint64_t i; + + printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); + + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", + i, sample->branch_stack->entries[i].from, + sample->branch_stack->entries[i].to); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) @@ -744,6 +813,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, if (session->sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); + + if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) + branch_stack__printf(sample); } static struct machine * diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c8d90178e7de..7a5434c00565 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel struct ip_callchain *chain, struct symbol **parent); +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thread, + struct branch_stack *bs); + bool perf_session__has_traces(struct perf_session *self, const char *msg); void mem_bswap_64(void *src, int byte_size); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 16da30d8d765..2739ed10d5e6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; +bool sort__branch_mode; enum sort_type sort__first_dimension; @@ -94,6 +95,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); } +static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) +{ + struct dso *dso_l = map_l ? map_l->dso : NULL; + struct dso *dso_r = map_r ? map_r->dso : NULL; + const char *dso_name_l, *dso_name_r; + + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); + + if (verbose) { + dso_name_l = dso_l->long_name; + dso_name_r = dso_r->long_name; + } else { + dso_name_l = dso_l->short_name; + dso_name_r = dso_r->short_name; + } + + return strcmp(dso_name_l, dso_name_r); +} + struct sort_entry sort_comm = { .se_header = "Command", .se_cmp = sort__comm_cmp, @@ -107,36 +128,74 @@ struct sort_entry sort_comm = { static int64_t sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) { - struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; - struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; - const char *dso_name_l, *dso_name_r; + return _sort__dso_cmp(left->ms.map, right->ms.map); +} - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - if (verbose) { - dso_name_l = dso_l->long_name; - dso_name_r = dso_r->long_name; - } else { - dso_name_l = dso_l->short_name; - dso_name_r = dso_r->short_name; +static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, + u64 ip_l, u64 ip_r) +{ + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + if (sym_l == sym_r) + return 0; + + if (sym_l) + ip_l = sym_l->start; + if (sym_r) + ip_r = sym_r->start; + + return (int64_t)(ip_r - ip_l); +} + +static int _hist_entry__dso_snprintf(struct map *map, char *bf, + size_t size, unsigned int width) +{ + if (map && map->dso) { + const char *dso_name = !verbose ? map->dso->short_name : + map->dso->long_name; + return repsep_snprintf(bf, size, "%-*s", width, dso_name); } - return strcmp(dso_name_l, dso_name_r); + return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); } static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width) { - if (self->ms.map && self->ms.map->dso) { - const char *dso_name = !verbose ? self->ms.map->dso->short_name : - self->ms.map->dso->long_name; - return repsep_snprintf(bf, size, "%-*s", width, dso_name); + return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); +} + +static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, + u64 ip, char level, char *bf, size_t size, + unsigned int width __used) +{ + size_t ret = 0; + + if (verbose) { + char o = map ? dso__symtab_origin(map->dso) : '!'; + ret += repsep_snprintf(bf, size, "%-#*llx %c ", + BITS_PER_LONG / 4, ip, o); } - return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); + ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); + if (sym) + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, + sym->name); + else { + size_t len = BITS_PER_LONG / 4; + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", + len, ip); + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, ""); + } + + return ret; } + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, @@ -144,8 +203,14 @@ struct sort_entry sort_dso = { .se_width_idx = HISTC_DSO, }; -/* --sort symbol */ +static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, + self->level, bf, size, width); +} +/* --sort symbol */ static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) ip_l = left->ms.sym->start; ip_r = right->ms.sym->start; - return (int64_t)(ip_r - ip_l); -} - -static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) -{ - size_t ret = 0; - - if (verbose) { - char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; - ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4, self->ip, o); - } - - if (!sort_dso.elide) - ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); - - if (self->ms.sym) - ret += repsep_snprintf(bf + ret, size - ret, "%s", - self->ms.sym->name); - else - ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", - BITS_PER_LONG / 4, self->ip); - - return ret; + return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); } struct sort_entry sort_sym = { @@ -246,19 +287,155 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +static int64_t +sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->from.map, + right->branch_info->from.map); +} + +static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->from.map, + bf, size, width); +} + +struct sort_entry sort_dso_from = { + .se_header = "Source Shared Object", + .se_cmp = sort__dso_from_cmp, + .se_snprintf = hist_entry__dso_from_snprintf, + .se_width_idx = HISTC_DSO_FROM, +}; + +static int64_t +sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->to.map, + right->branch_info->to.map); +} + +static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->to.map, + bf, size, width); +} + +static int64_t +sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *from_l = &left->branch_info->from; + struct addr_map_symbol *from_r = &right->branch_info->from; + + if (!from_l->sym && !from_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, + from_r->addr); +} + +static int64_t +sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *to_l = &left->branch_info->to; + struct addr_map_symbol *to_r = &right->branch_info->to; + + if (!to_l->sym && !to_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); +} + +static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *from = &self->branch_info->from; + return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, + self->level, bf, size, width); + +} + +static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *to = &self->branch_info->to; + return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, + self->level, bf, size, width); + +} + +struct sort_entry sort_dso_to = { + .se_header = "Target Shared Object", + .se_cmp = sort__dso_to_cmp, + .se_snprintf = hist_entry__dso_to_snprintf, + .se_width_idx = HISTC_DSO_TO, +}; + +struct sort_entry sort_sym_from = { + .se_header = "Source Symbol", + .se_cmp = sort__sym_from_cmp, + .se_snprintf = hist_entry__sym_from_snprintf, + .se_width_idx = HISTC_SYMBOL_FROM, +}; + +struct sort_entry sort_sym_to = { + .se_header = "Target Symbol", + .se_cmp = sort__sym_to_cmp, + .se_snprintf = hist_entry__sym_to_snprintf, + .se_width_idx = HISTC_SYMBOL_TO, +}; + +static int64_t +sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) +{ + const unsigned char mp = left->branch_info->flags.mispred != + right->branch_info->flags.mispred; + const unsigned char p = left->branch_info->flags.predicted != + right->branch_info->flags.predicted; + + return mp || p; +} + +static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width){ + static const char *out = "N/A"; + + if (self->branch_info->flags.predicted) + out = "N"; + else if (self->branch_info->flags.mispred) + out = "Y"; + + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_mispredict = { + .se_header = "Branch Mispredicted", + .se_cmp = sort__mispredict_cmp, + .se_snprintf = hist_entry__mispredict_snprintf, + .se_width_idx = HISTC_MISPREDICT, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; int taken; }; +#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } + static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, - { .name = "parent", .entry = &sort_parent, }, - { .name = "cpu", .entry = &sort_cpu, }, + DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_COMM, "comm", sort_comm), + DIM(SORT_DSO, "dso", sort_dso), + DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), + DIM(SORT_DSO_TO, "dso_to", sort_dso_to), + DIM(SORT_SYM, "symbol", sort_sym), + DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), + DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), + DIM(SORT_PARENT, "parent", sort_parent), + DIM(SORT_CPU, "cpu", sort_cpu), + DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), }; int sort_dimension__add(const char *tok) @@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { @@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok) sort__first_dimension = SORT_PARENT; else if (!strcmp(sd->name, "cpu")) sort__first_dimension = SORT_CPU; + else if (!strcmp(sd->name, "symbol_from")) + sort__first_dimension = SORT_SYM_FROM; + else if (!strcmp(sd->name, "symbol_to")) + sort__first_dimension = SORT_SYM_TO; + else if (!strcmp(sd->name, "dso_from")) + sort__first_dimension = SORT_DSO_FROM; + else if (!strcmp(sd->name, "dso_to")) + sort__first_dimension = SORT_DSO_TO; + else if (!strcmp(sd->name, "mispredict")) + sort__first_dimension = SORT_MISPREDICT; } list_add_tail(&sd->entry->list, &hist_entry__sort_list); @@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok) return 0; } - return -ESRCH; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae395752..7aa72a00bc8e 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,11 +31,14 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; +extern bool sort__branch_mode; extern char *field_sep; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; extern struct sort_entry sort_sym; extern struct sort_entry sort_parent; +extern struct sort_entry sort_lbr_dso; +extern struct sort_entry sort_lbr_sym; extern enum sort_type sort__first_dimension; /** @@ -72,6 +75,7 @@ struct hist_entry { struct hist_entry *pair; struct rb_root sorted_chain; }; + struct branch_info *branch_info; struct callchain_root callchain[0]; }; @@ -82,6 +86,11 @@ enum sort_type { SORT_SYM, SORT_PARENT, SORT_CPU, + SORT_DSO_FROM, + SORT_DSO_TO, + SORT_SYM_FROM, + SORT_SYM_TO, + SORT_MISPREDICT, }; /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2a683d4fc918..5866ce6b9c02 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,6 +5,7 @@ #include #include #include "map.h" +#include "../perf.h" #include #include #include @@ -120,6 +121,18 @@ struct map_symbol { bool has_children; }; +struct addr_map_symbol { + struct map *map; + struct symbol *sym; + u64 addr; +}; + +struct branch_info { + struct addr_map_symbol from; + struct addr_map_symbol to; + struct branch_flags flags; +}; + struct addr_location { struct thread *thread; struct map *map; -- cgit v1.2.3 From bdfebd848f2a14e639031a0b0e61d7c7ee5e5fd2 Mon Sep 17 00:00:00 2001 From: Roberto Agostino Vitillo Date: Thu, 9 Feb 2012 23:21:02 +0100 Subject: perf record: Add support for sampling taken branch This patch adds a new option to enable taken branch stack sampling, i.e., leverage the PERF_SAMPLE_BRANCH_STACK feature of perf_events. There is a new option to active this mode: -b. It is possible to pass a set of filters to select the type of branches to sample. The following filters are available: - any : any type of branches - any_call : any function call or system call - any_ret : any function return or system call return - any_ind : any indirect branch - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the branch target is in the hypervisor Filters can be combined by passing a comma separated list to the option: $ perf record -b any_call,u -e cycles:u branchy Signed-off-by: Roberto Agostino Vitillo Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: robert.richter@amd.com Cc: ming.m.lin@intel.com Cc: andi@firstfloor.org Cc: asharma@fb.com Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1328826068-11713-13-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 25 +++++++++++ tools/perf/builtin-record.c | 74 ++++++++++++++++++++++++++++++++ tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 4 ++ 4 files changed, 104 insertions(+) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a5766b4b0125..60bddaf0e5bd 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -152,6 +152,31 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha corresponding events, i.e., they always refer to events defined earlier on the command line. +-b:: +--branch-stack:: +Enable taken branch stack sampling. Each sample captures a series of consecutive +taken branches. The number of branches captured with each sample depends on the +underlying hardware, the type of branches of interest, and the executed code. +It is possible to select the types of branches captured by enabling filters. The +following filters are defined: + + - any : any type of branches + - any_call: any function call or system call + - any_ret: any function return or system call return + - any_ind: any indirect branch + - u: only when the branch target is at the user level + - k: only when the branch target is in the kernel + - hv: only when the target is at the hypervisor level + ++ +At least one of any, any_call, any_ret, any_ind must be provided. The privilege levels may +be ommitted, in which case, the privilege levels of the associated event are applied to the +branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to +permissions. When sampling on multiple events, branch stack sampling is enabled for all +the sampling events. The sampled branch type is the same for all events. +Note that taken branch sampling may not be available on all processors. +The various filters must be specified as a comma separated list: -b any_ret,u,k + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 75d230fef202..1c49d4e8767c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -638,6 +638,77 @@ out_delete_session: return err; } +#define BRANCH_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define BRANCH_END { .name = NULL } + +struct branch_mode { + const char *name; + int mode; +}; + +static const struct branch_mode branch_modes[] = { + BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), + BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), + BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), + BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), + BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), + BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), + BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_END +}; + +static int +parse_branch_stack(const struct option *opt, const char *str, int unset __used) +{ +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + + uint64_t *mode = (uint64_t *)opt->value; + const struct branch_mode *br; + char *s, *os, *p; + int ret = -1; + + *mode = 0; + + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) + goto error; + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; + } + ret = 0; + + if ((*mode & ~ONLY_PLM) == 0) { + error("need at least one branch type with -b\n"); + ret = -1; + } +error: + free(os); + return ret; +} + static const char * const record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -727,6 +798,9 @@ const struct option record_options[] = { "monitor event in cgroup name only", parse_cgroups), OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), + OPT_CALLBACK('b', "branch-stack", &record.opts.branch_stack, + "branch mode mask", "branch stack sampling modes", + parse_branch_stack), OPT_END() }; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 358f40135c4d..eec392e48067 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -222,6 +222,7 @@ struct perf_record_opts { unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; + int branch_stack; u64 default_interval; u64 user_interval; const char *cpu_list; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a1fd1cd2a5af..f421f7cbc0d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) attr->watermark = 0; attr->wakeup_events = 1; } + if (opts->branch_stack) { + attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; + attr->branch_sample_type = opts->branch_stack; + } attr->mmap = track; attr->comm = track; -- cgit v1.2.3