diff options
author | Andi Kleen <ak@linux.intel.com> | 2019-11-21 01:15:20 +0100 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-11-29 16:20:45 +0100 |
commit | 4b49ab708d1804bc8b2fcdde79844b8bc98f7ef6 (patch) | |
tree | cb714fa37caf1dd17546d4512d14c3913ca19414 /tools | |
parent | perf stat: Use affinity for opening events (diff) | |
download | linux-4b49ab708d1804bc8b2fcdde79844b8bc98f7ef6.tar.xz linux-4b49ab708d1804bc8b2fcdde79844b8bc98f7ef6.zip |
perf stat: Use affinity for reading
Restructure event reading to use affinity to minimize the number of IPIs
needed.
Before on a large test case with 94 CPUs:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
3.16 0.106079 4 22082 read
After:
3.43 0.081295 3 22082 read
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-stat.c | 97 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 1 |
2 files changed, 57 insertions, 41 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cf8516e701e2..a098c2ebf4ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu, * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter(struct evsel *counter, struct timespec *rs) +static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); - int ncpus, cpu, thread; - - if (target__has_cpu(&target) && !target__has_per_thread(&target)) - ncpus = perf_evsel__nr_cpus(counter); - else - ncpus = 1; + int thread; if (!counter->supported) return -ENOENT; @@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - struct perf_counts_values *count; - - count = perf_counts(counter->counts, cpu, thread); - - /* - * The leader's group read loads data into its group members - * (via perf_evsel__read_counter) and sets threir count->loaded. - */ - if (!perf_counts__is_loaded(counter->counts, cpu, thread) && - read_single_counter(counter, cpu, thread, rs)) { - counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; - return -1; - } + struct perf_counts_values *count; - perf_counts__set_loaded(counter->counts, cpu, thread, false); + count = perf_counts(counter->counts, cpu, thread); - if (STAT_RECORD) { - if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { - pr_err("failed to write stat event\n"); - return -1; - } - } + /* + * The leader's group read loads data into its group members + * (via perf_evsel__read_counter()) and sets their count->loaded. + */ + if (!perf_counts__is_loaded(counter->counts, cpu, thread) && + read_single_counter(counter, cpu, thread, rs)) { + counter->counts->scaled = -1; + perf_counts(counter->counts, cpu, thread)->ena = 0; + perf_counts(counter->counts, cpu, thread)->run = 0; + return -1; + } + + perf_counts__set_loaded(counter->counts, cpu, thread, false); - if (verbose > 1) { - fprintf(stat_config.output, - "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), - cpu, - count->val, count->ena, count->run); + if (STAT_RECORD) { + if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { + pr_err("failed to write stat event\n"); + return -1; } } + + if (verbose > 1) { + fprintf(stat_config.output, + "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), + cpu, + count->val, count->ena, count->run); + } } return 0; @@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs) static void read_counters(struct timespec *rs) { struct evsel *counter; - int ret; + struct affinity affinity; + int i, ncpus, cpu; + + if (affinity__setup(&affinity) < 0) + return; + + ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); + if (!target__has_cpu(&target) || target__has_per_thread(&target)) + ncpus = 1; + evlist__for_each_cpu(evsel_list, i, cpu) { + if (i >= ncpus) + break; + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evsel_list, counter) { + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (!counter->err) { + counter->err = read_counter_cpu(counter, rs, + counter->cpu_iter - 1); + } + } + } + affinity__cleanup(&affinity); evlist__for_each_entry(evsel_list, counter) { - ret = read_counter(counter, rs); - if (ret) + if (counter->err) pr_debug("failed to read counter %s\n", counter->name); - - if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) + if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); + counter->err = 0; } } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ca82a93960cd..c8af4bc23f8f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -86,6 +86,7 @@ struct evsel { struct list_head config_terms; struct bpf_object *bpf_obj; int bpf_fd; + int err; bool auto_merge_stats; bool merged_stat; const char * metric_expr; |