From e2e1680fda1573ebfdd6bba5d58f978044746993 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 24 Oct 2016 13:56:52 -0700 Subject: perf bench futex: Avoid worker cacheline bouncing Sebastian noted that overhead for worker thread ops (throughput) accounting was producing 'perf' to appear in the profiles, consuming a non-trivial (i.e. 13%) amount of CPU. This is due to cacheline bouncing due to the increment of w->ops. We can easily fix this by just working on a local copy and updating the actual worker once done running, and ready to show the program summary. There is no danger of the worker being concurrent, so we can trust that no stale value is being seen by another thread. This also gets rid of the unnecessary cache alignment hack; its not worth it. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Davidlohr Bueso Acked-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/1477342613-9938-2-git-send-email-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 11 +++++------ tools/perf/bench/futex-lock-pi.c | 4 +++- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index d9e5e80bb4d0..da04b8c5568a 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -39,15 +39,12 @@ static unsigned int threads_starting; static struct stats throughput_stats; static pthread_cond_t thread_parent, thread_worker; -#define SMP_CACHE_BYTES 256 -#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES))) - struct worker { int tid; u_int32_t *futex; pthread_t thread; unsigned long ops; -} __cacheline_aligned; +}; static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), @@ -66,8 +63,9 @@ static const char * const bench_futex_hash_usage[] = { static void *workerfn(void *arg) { int ret; - unsigned int i; struct worker *w = (struct worker *) arg; + unsigned int i; + unsigned long ops = w->ops; /* avoid cacheline bouncing */ pthread_mutex_lock(&thread_lock); threads_starting--; @@ -77,7 +75,7 @@ static void *workerfn(void *arg) pthread_mutex_unlock(&thread_lock); do { - for (i = 0; i < nfutexes; i++, w->ops++) { + for (i = 0; i < nfutexes; i++, ops++) { /* * We want the futex calls to fail in order to stress * the hashing of uaddr and not measure other steps, @@ -91,6 +89,7 @@ static void *workerfn(void *arg) } } while (!done); + w->ops = ops; return NULL; } diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 936d89d30483..7032e4643c65 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -75,6 +75,7 @@ static void toggle_done(int sig __maybe_unused, static void *workerfn(void *arg) { struct worker *w = (struct worker *) arg; + unsigned long ops = w->ops; pthread_mutex_lock(&thread_lock); threads_starting--; @@ -103,9 +104,10 @@ static void *workerfn(void *arg) if (ret && !silent) warn("thread %d: Could not unlock pi-lock for %p (%d)", w->tid, w->futex, ret); - w->ops++; /* account for thread's share of work */ + ops++; /* account for thread's share of work */ } while (!done); + w->ops = ops; return NULL; } -- cgit v1.2.3 From 60758d6668b3e2fa8e5fd143d24d0425203d007e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 24 Oct 2016 13:56:53 -0700 Subject: perf bench futex: Sanitize numeric parameters This gets rid of oddities such as: perf bench futex hash -t -4 perf: calloc: Cannot allocate memory Runtime (and many more) are equally busted, i.e. run for bogus amounts of time. Just use the abs, instead of, for example errorring out. Committer note: After the patch: $ perf bench futex hash -t -4 # Running 'futex/hash' benchmark: Run summary [PID 10178]: 4 threads, each operating on 1024 [private] futexes for 10 secs. [thread 0] futexes: 0x34f9fa0 ... 0x34faf9c [ 4702208 ops/sec ] [thread 1] futexes: 0x34fb140 ... 0x34fc13c [ 4707020 ops/sec ] [thread 2] futexes: 0x34fc2e0 ... 0x34fd2dc [ 4711526 ops/sec ] [thread 3] futexes: 0x34fd480 ... 0x34fe47c [ 4709683 ops/sec ] Averaged 4707609 operations/sec (+- 0.04%), total secs = 10 $ Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1477342613-9938-3-git-send-email-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 4 ++++ tools/perf/bench/futex-lock-pi.c | 3 +++ tools/perf/bench/futex-requeue.c | 2 ++ tools/perf/bench/futex-wake-parallel.c | 4 ++++ tools/perf/bench/futex-wake.c | 3 +++ tools/perf/bench/futex.h | 4 ++++ 6 files changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index da04b8c5568a..bfbb6b5f609c 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -130,6 +130,8 @@ int bench_futex_hash(int argc, const char **argv, } ncpus = sysconf(_SC_NPROCESSORS_ONLN); + nsecs = futexbench_sanitize_numeric(nsecs); + nfutexes = futexbench_sanitize_numeric(nfutexes); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; @@ -137,6 +139,8 @@ int bench_futex_hash(int argc, const char **argv, if (!nthreads) /* default to the number of CPUs */ nthreads = ncpus; + else + nthreads = futexbench_sanitize_numeric(nthreads); worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 7032e4643c65..465012b320ee 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -152,6 +152,7 @@ int bench_futex_lock_pi(int argc, const char **argv, goto err; ncpus = sysconf(_SC_NPROCESSORS_ONLN); + nsecs = futexbench_sanitize_numeric(nsecs); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; @@ -159,6 +160,8 @@ int bench_futex_lock_pi(int argc, const char **argv, if (!nthreads) nthreads = ncpus; + else + nthreads = futexbench_sanitize_numeric(nthreads); worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2b9705a8734c..fd4ee95b689a 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -128,6 +128,8 @@ int bench_futex_requeue(int argc, const char **argv, if (!nthreads) nthreads = ncpus; + else + nthreads = futexbench_sanitize_numeric(nthreads); worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 2c8fa67ad537..beaa6c142477 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -217,8 +217,12 @@ int bench_futex_wake_parallel(int argc, const char **argv, sigaction(SIGINT, &act, NULL); ncpus = sysconf(_SC_NPROCESSORS_ONLN); + nwaking_threads = futexbench_sanitize_numeric(nwaking_threads); + if (!nblocked_threads) nblocked_threads = ncpus; + else + nblocked_threads = futexbench_sanitize_numeric(nblocked_threads); /* some sanity checks */ if (nwaking_threads > nblocked_threads || !nwaking_threads) diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index e246b1b8388a..46efcb98b5a4 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -129,6 +129,7 @@ int bench_futex_wake(int argc, const char **argv, } ncpus = sysconf(_SC_NPROCESSORS_ONLN); + nwakes = futexbench_sanitize_numeric(nwakes); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; @@ -136,6 +137,8 @@ int bench_futex_wake(int argc, const char **argv, if (!nthreads) nthreads = ncpus; + else + nthreads = futexbench_sanitize_numeric(nthreads); worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index b2e06d1190d0..ba7c735c0c62 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -7,6 +7,7 @@ #ifndef _FUTEX_H #define _FUTEX_H +#include #include #include #include @@ -99,4 +100,7 @@ static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, } #endif +/* User input sanitation */ +#define futexbench_sanitize_numeric(__n) abs((__n)) + #endif /* _FUTEX_H */ -- cgit v1.2.3 From 8a06b0be6507f97f3aa92ca814335b8b65fd3de2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Oct 2016 01:21:10 +0900 Subject: perf hist browser: Fix hierarchy column counts The perf report/top on TUI supports horizontal scrolling using LEFT and RIGHT keys. But it calculate the number of columns incorrectly when hierarchy mode is enabled so that keep pressing RIGHT key can make the output disappeared. In the hierarchy mode, all sort keys are collapsed into a single column, so it needs to be applied when calculating column numbers. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024162110.17918-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ddc4c3e59cc1..84f5dd2fb59c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2076,8 +2076,21 @@ void hist_browser__init(struct hist_browser *browser, browser->b.use_navkeypressed = true; browser->show_headers = symbol_conf.show_hist_headers; - hists__for_each_format(hists, fmt) + if (symbol_conf.report_hierarchy) { + struct perf_hpp_list_node *fmt_node; + + /* count overhead columns (in the first node) */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + ++browser->b.columns; + + /* add a single column for whole hierarchy sort keys*/ ++browser->b.columns; + } else { + hists__for_each_format(hists, fmt) + ++browser->b.columns; + } hists__reset_column_width(hists); } -- cgit v1.2.3 From 369a2478973a416a2c42a37a8cf7031872a6d926 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Oct 2016 12:00:02 +0900 Subject: tools lib subcmd: Suppport cascading options Sometimes subcommand have common options and it can only handled in the upper level command unless it duplicates the options. This patch adds a parent field and fallback to the parent if the given argument was not found in the current options. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20161024030003.28534-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.c | 14 ++++++++++++++ tools/lib/subcmd/parse-options.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 981bb4481fd5..3284bb14ae78 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -314,12 +314,19 @@ static int get_value(struct parse_opt_ctx_t *p, static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) { +retry: for (; options->type != OPTION_END; options++) { if (options->short_name == *p->opt) { p->opt = p->opt[1] ? p->opt + 1 : NULL; return get_value(p, options, OPT_SHORT); } } + + if (options->parent) { + options = options->parent; + goto retry; + } + return -2; } @@ -333,6 +340,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, if (!arg_end) arg_end = arg + strlen(arg); +retry: for (; options->type != OPTION_END; options++) { const char *rest; int flags = 0; @@ -426,6 +434,12 @@ match: } if (abbrev_option) return get_value(p, abbrev_option, abbrev_flags); + + if (options->parent) { + options = options->parent; + goto retry; + } + return -2; } diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index d60cab2726da..8866ac438b34 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -109,11 +109,13 @@ struct option { intptr_t defval; bool *set; void *data; + const struct option *parent; }; #define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v ) #define OPT_END() { .type = OPTION_END } +#define OPT_PARENT(p) { .type = OPTION_END, .parent = (p) } #define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } #define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } #define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } -- cgit v1.2.3 From 77f02f44460ab1480af2ae6145a1a85b9fe0b8ac Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Oct 2016 12:00:03 +0900 Subject: perf sched: Make common options cascading The -i and -v options can be used in subcommands so enable cascading the sched_options. This fixes the following inconvenience in 'perf sched': $ perf sched -i perf.data.sched map ... (it works well) ... $ perf sched map -i perf.data.sched Error: unknown switch `i' Usage: perf sched map [] --color-cpus highlight given CPUs in map --color-pids highlight given pids in map --compact map output in compact mode --cpus display given CPUs in map With this patch, the second command line works with the perf.data.sched data file. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20161024030003.28534-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f5503ca22e1c..8ca1b5409289 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1954,6 +1954,15 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .next_shortname2 = '0', .skip_merge = 0, }; + const struct option sched_options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; const struct option latency_options[] = { OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", "sort by key(s): runtime, switch, avg, max"), @@ -1965,7 +1974,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "dump raw trace in ASCII"), OPT_BOOLEAN('p', "pids", &sched.skip_merge, "latency stats per pid instead of per comm"), - OPT_END() + OPT_PARENT(sched_options) }; const struct option replay_options[] = { OPT_UINTEGER('r', "repeat", &sched.replay_repeat, @@ -1975,16 +1984,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), - OPT_END() - }; - const struct option sched_options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() + OPT_PARENT(sched_options) }; const struct option map_options[] = { OPT_BOOLEAN(0, "compact", &sched.map.comp, @@ -1995,7 +1995,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "highlight given CPUs in map"), OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", "display given CPUs in map"), - OPT_END() + OPT_PARENT(sched_options) }; const char * const latency_usage[] = { "perf sched latency []", -- cgit v1.2.3 From 1208bb274ba547012579d5b68c693e0b36682b74 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Oct 2016 11:02:43 +0900 Subject: perf sched map: Apply cpu color when there's an activity Applying cpu color always doesn't help readability IMHO. Instead it might be better to applying the color when there's an activity on those CPUs. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024020246.14928-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 8ca1b5409289..a8ad85914466 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1479,7 +1479,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, cpu_color = COLOR_CPUS; if (cpu != this_cpu) - color_fprintf(stdout, cpu_color, " "); + color_fprintf(stdout, color, " "); else color_fprintf(stdout, cpu_color, "*"); -- cgit v1.2.3 From e107f129e2e0e75ddf1cd7995a9f5ffff2307766 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Oct 2016 11:02:44 +0900 Subject: perf sched map: Always show task comm with -v I'd like to see the name of tasks with perf sched map, but it only shows name of new tasks and then use short names after all. This is not good for long running tasks since it's hard for users to track the short names. This patch makes it show the names (except the idle task) when -v option is used. Probably we may make it as default behavior. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024020246.14928-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a8ad85914466..1f33d15314a5 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1493,7 +1493,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, goto out; color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp / NSEC_PER_SEC); - if (new_shortname) { + if (new_shortname || (verbose && sched_in->tid)) { const char *pid_color = color; if (thread__has_color(sched_in)) -- cgit v1.2.3 From 99620a5d0cc8e2dd9aedb629a6e81825f0db020e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Oct 2016 11:02:45 +0900 Subject: perf tools: Introduce timestamp__scnprintf_usec() Joonwoo reported that there's a mismatch between timestamps in script and sched commands. This was because of difference in printing the timestamp. Factor out the code and share it so that they can be in sync. Also I found that sched map has similar problem, fix it too. Committer notes: Fixed the max_lat_at bug introduced by Namhyung's original patch, as pointed out by Joonwoo, and made it a function following the scnprintf() model, i.e. returning the number of bytes formatted, and receiving as the first parameter the object from where the data to the formatting is obtained, renaming it from: char *timestamp_in_usec(char *bf, size_t size, u64 timestamp) to int timestamp__scnprintf_usec(u64 timestamp, char *bf, size_t size) Reported-by: Joonwoo Park Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024020246.14928-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 10 +++++++--- tools/perf/builtin-script.c | 10 ++++++---- tools/perf/util/util.c | 8 ++++++++ tools/perf/util/util.h | 3 +++ 4 files changed, 24 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 1f33d15314a5..fb3441211e4b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1191,6 +1191,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ int i; int ret; u64 avg; + char max_lat_at[32]; if (!work_list->nb_atoms) return; @@ -1212,12 +1213,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ printf(" "); avg = work_list->total_lat / work_list->nb_atoms; + timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n", + printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", (double)work_list->total_runtime / NSEC_PER_MSEC, work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, (double)work_list->max_lat / NSEC_PER_MSEC, - (double)work_list->max_lat_at / NSEC_PER_SEC); + max_lat_at); } static int pid_cmp(struct work_atoms *l, struct work_atoms *r) @@ -1402,6 +1404,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, int cpus_nr; bool new_cpu = false; const char *color = PERF_COLOR_NORMAL; + char stimestamp[32]; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); @@ -1492,7 +1495,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) goto out; - color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp / NSEC_PER_SEC); + timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); + color_fprintf(stdout, color, " %12s secs ", stimestamp); if (new_shortname || (verbose && sched_in->tid)) { const char *pid_color = color; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 412fb6e65ac0..e1daff36d070 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -441,7 +441,6 @@ static void print_sample_start(struct perf_sample *sample, { struct perf_event_attr *attr = &evsel->attr; unsigned long secs; - unsigned long usecs; unsigned long long nsecs; if (PRINT_FIELD(COMM)) { @@ -471,11 +470,14 @@ static void print_sample_start(struct perf_sample *sample, nsecs = sample->time; secs = nsecs / NSEC_PER_SEC; nsecs -= secs * NSEC_PER_SEC; - usecs = nsecs / NSEC_PER_USEC; + if (nanosecs) printf("%5lu.%09llu: ", secs, nsecs); - else - printf("%5lu.%06lu: ", secs, usecs); + else { + char sample_time[32]; + timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); + printf("%12s: ", sample_time); + } } } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 85c56800f17a..5bbd1f609f1f 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -433,6 +433,14 @@ int parse_nsec_time(const char *str, u64 *ptime) return 0; } +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) +{ + u64 sec = timestamp / NSEC_PER_SEC; + u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; + + return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); +} + unsigned long parse_tag_value(const char *str, struct parse_tag *tags) { struct parse_tag *i = tags; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 71b6992f1d98..79662d67891e 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -362,4 +362,7 @@ extern int sched_getcpu(void); #endif int is_printable_array(char *p, unsigned int len); + +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); + #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 67bdc35fb48c97502dd4b9eeac561e4bcc18684b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 19 Oct 2016 11:45:23 -0700 Subject: perf list: Support matching by topic Add support in perf list topic to only show events belonging to a specific vendor events topic. For example the following works now: % perf list frontend List of pre-defined events (to be used in -e): stalled-cycles-frontend OR idle-cycles-frontend [Hardware event] stalled-cycles-frontend OR cpu/stalled-cycles-frontend/ [Kernel PMU event] frontend: dsb2mite_switches.count [Decode Stream Buffer (DSB)-to-MITE switches] dsb2mite_switches.penalty_cycles [Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles] dsb_fill.exceed_dsb_lines [Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines] icache.hit [Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches] ... Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1476902724-9586-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 31b845ec32e2..dc6ccaa4e927 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1141,7 +1141,9 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, if (event_glob != NULL && !(strglobmatch_nocase(name, event_glob) || (!is_cpu && strglobmatch_nocase(alias->name, - event_glob)))) + event_glob)) || + (alias->topic && + strglobmatch_nocase(alias->topic, event_glob)))) continue; if (is_cpu && !name_only && !alias->desc) -- cgit v1.2.3 From e0c4758278e2452ad28149f620b81ce43b2df7b6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 16:39:21 -0300 Subject: perf bench mem: Ignore export.h related changes to mem{cpy,set}.S Ignore export.h and EXPORT_SYMBOL in: 784d5699eddc ("x86: move exports to actual definitions") We're not dragging this stuff, not useful in tools/ This silences the following warnings while building perf: Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel Warning: tools/arch/x86/lib/memset_64.S differs from kernel Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-h9vw3pe0fq79zmyqsfr0s0mo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 982d6439bb07..7de14f470f3c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -381,10 +381,10 @@ $(PERF_IN): prepare FORCE (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ - (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ + (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true @(test -f ../../arch/x86/lib/memset_64.S && ( \ - (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ + (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \ -- cgit v1.2.3 From 0fb75c8ce15be60e768c06adabf00ff79e5c2501 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 17:02:11 -0300 Subject: tools: Update asm-generic/mman-common.h copy from the kernel To get the defines introduced in the commit e8c24d3a23a4 ("x86/pkeys: Allocation/free syscalls") Silencing this perf build warning: Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel Need to change 'perf trace' to beautify those syscalls, as soon as booting with a kernel with it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yev9rexu02cl7cjeozzmrl9t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/mman-common.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 58274382a616..8c27db0c5c08 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -72,4 +72,9 @@ #define MAP_HUGE_SHIFT 26 #define MAP_HUGE_MASK 0x3f +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + #endif /* __ASM_GENERIC_MMAN_COMMON_H */ -- cgit v1.2.3 From ca7202bffa32c6ff0d8abc088f77ff0bf3a5bead Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 16:57:34 -0300 Subject: perf tools: Update x86's syscall_64.tbl, adding pkey_(alloc,free,mprotect) Introduced in commit f9afc6197e9b ("x86: Wire up protection keys system calls") This will make 'perf trace' aware of them on x86_64. Cc: Adrian Hunter Cc: Dave Hansen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-s1ta2ttv2xacecqogmd3a9p1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index e9ce9c7c39b4..e93ef0b38db8 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -335,6 +335,9 @@ 326 common copy_file_range sys_copy_file_range 327 64 preadv2 sys_preadv2 328 64 pwritev2 sys_pwritev2 +329 common pkey_mprotect sys_pkey_mprotect +330 common pkey_alloc sys_pkey_alloc +331 common pkey_free sys_pkey_free # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.3 From cf346d5bd4b9d61656df2f72565c9b354ef3ca0d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 17:20:47 -0300 Subject: perf scripting: Avoid leaking the scripting_context variable Both register_perl_scripting() and register_python_scripting() allocate this variable, fix it by checking if it already was. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tom Zanussi Cc: Wang Nan Fixes: 7e4b21b84c43 ("perf/scripts: Add Python scripting engine") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-scripting.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 9df61059a85d..a2fd6e79d5a5 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -95,7 +95,8 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering py script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPYTHON @@ -159,7 +160,8 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering pl script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPERL -- cgit v1.2.3 From 9a8860bbaa936407aa95d7d3ef836036a117b207 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 17:30:05 -0300 Subject: perf scripting: Don't die if scripting can't be setup, disable it Removing one more set of die() calls. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6pyil685m5i2tugg56gcy0tg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-scripting.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index a2fd6e79d5a5..0ac9077f62a2 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -25,6 +25,7 @@ #include #include "../perf.h" +#include "debug.h" #include "util.h" #include "trace-event.h" @@ -86,17 +87,15 @@ struct scripting_ops python_scripting_unsupported_ops = { static void register_python_scripting(struct scripting_ops *scripting_ops) { - int err; - err = script_spec_register("Python", scripting_ops); - if (err) - die("error registering Python script extension"); - - err = script_spec_register("py", scripting_ops); - if (err) - die("error registering py script extension"); - if (scripting_context == NULL) scripting_context = malloc(sizeof(*scripting_context)); + + if (scripting_context == NULL || + script_spec_register("Python", scripting_ops) || + script_spec_register("py", scripting_ops)) { + pr_err("Error registering Python script extension: disabling it\n"); + zfree(&scripting_context); + } } #ifdef NO_LIBPYTHON @@ -151,17 +150,15 @@ struct scripting_ops perl_scripting_unsupported_ops = { static void register_perl_scripting(struct scripting_ops *scripting_ops) { - int err; - err = script_spec_register("Perl", scripting_ops); - if (err) - die("error registering Perl script extension"); - - err = script_spec_register("pl", scripting_ops); - if (err) - die("error registering pl script extension"); - if (scripting_context == NULL) scripting_context = malloc(sizeof(*scripting_context)); + + if (scripting_context == NULL || + script_spec_register("Perl", scripting_ops) || + script_spec_register("pl", scripting_ops)) { + pr_err("Error registering Perl script extension: disabling it\n"); + zfree(&scripting_context); + } } #ifdef NO_LIBPERL -- cgit v1.2.3 From 46cb25b1a0ee74bf4a79cfb3081ae3567b2f7135 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2016 19:02:35 -0200 Subject: perf tools: Add missing object file to the python binding linkage list In ac12f6764c50 ("perf tools: Implement branch_type event parameter") we started using the parse_branch_str() function from one of the files used in the python binding, which caused this entry in 'perf test' to fail: # perf test -v python 16: Try 'import perf' in python, checking link problems : --- start --- test child forked, pid 16667 Traceback (most recent call last): File "", line 1, in ImportError: /tmp/build/perf/python/perf.so: undefined symbol: parse_branch_str test child finished with -1 ---- end ---- Try 'import perf' in python, checking link problems: FAILED! # I must've commited some mistake when running 'perf test' to send the pull request for the perf-core-for-mingo-20161024 tag, to have let this regression to pass, sigh. Just add tools/perf/util/parse-branch-options.c and switch from using ui__warning(), that is not available in the python binding, use pr_warning() instead, which is good enough for this case. Now: # perf test python 16: Try 'import perf' in python, checking link problems : Ok # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Andi Kleen Fixes: ac12f6764c50 ("perf tools: Implement branch_type event parameter") Link: http://lkml.kernel.org/n/tip-9kn1ct1cx9ppwqlmzl6z0xhs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-branch-options.c | 2 +- tools/perf/util/python-ext-sources | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 3634d6974300..38fd11504015 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -64,7 +64,7 @@ int parse_branch_str(const char *str, __u64 *mode) } if (!br->name) { ret = -1; - ui__warning("unknown branch filter %s," + pr_warning("unknown branch filter %s," " check man page\n", s); goto error; } diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b7d4f4aeee61..0546a4304347 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,6 +18,7 @@ util/thread_map.c util/util.c util/xyarray.c util/cgroup.c +util/parse-branch-options.c util/rblist.c util/counts.c util/strlist.c -- cgit v1.2.3