summaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-12-06 17:20:04 +0100
committerIngo Molnar <mingo@kernel.org>2017-12-06 17:20:04 +0100
commitc32909741fe93032705e6da29b564b8fec84f415 (patch)
tree5f82c3f5f1c573f304feeff7c8c5428200e696dd /tools/perf
parentMerge branch 'perf/urgent' into perf/core, to pick up fixes (diff)
parentperf tools: Rename 'backward' to 'overwrite' in evlist, mmap and record (diff)
downloadlinux-c32909741fe93032705e6da29b564b8fec84f415.tar.xz
linux-c32909741fe93032705e6da29b564b8fec84f415.zip
Merge tag 'perf-core-for-mingo-4.16-20171206' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Improve build messages for files needed by Intel-PT, originally copied from the kernel sources, that drifted from its original (Adrian Hunter) - Allow computing 'perf stat' style metrics in 'perf script' (Andi Kleen) - Fix 'perf report -D' output for user metadata events (Arnaldo Carvalho de Melo) - Add feature test for pthread_barrier_t availability (Arnaldo Carvalho de Melo) - Allow again using x86's asm.h when building for the 'bpf' clang target, making some 'perf test' LLVM/BPF entries work again (Arnaldo Carvalho de Melo) - Use cpumaps in 'perf bench futex', eliminating some code duplication (Davidlohr Bueso) - Improve PMU infrastructure to support amp64's ThunderX2 implementation defined core events (Ganapatrao Kulkarni) - Add hint about how to add USDT probes for Node.js (Hansuk Hong) - s/390 needs -fPIC to be incrementally linked or linked to shared libraries (Hendrik Brueckner) - Use pthread_barrier to synch 'perf bench futex wake-parallel' waker threads (James Yang) - Fix up build in hardened environments, such as Fedora 27 (Jiri Olsa) - Add a tip about cacheline events in 'perf c2c' (Sangwon Hong) - Set browser mode right before setup_browser(), because we may have errors printed before that, which were getting lost (Seokho Song) - s390x doesn't support PERF_TYPE_BREAKPOINT, so disable 'perf test' cases 19 and 20 on s390x, that tests that feature (Thomas Richter) - Fix unnecessary memory allocation for s390x 'perf annotate' objdump parsing, which could lead to thousands of needless entries in the instruction handling array (Thomas Richter) - Fix objdump comment parsing for Intel mov dissassembly (Thomas Richter) - Clarify usage of 'overwrite' and 'backward' in the evlist/mmap code, removing the 'overwrite' parameter from several functions as it was always used it as 'false' (Wang Nan) - Fix 'perf record' backward recording, it wasn't doing what was expected: overwriting records when the ring buffer gets full (Wang Nan) - Use more flexible pattern matching for CPU identification for perf vendor event's mapfile.csv, removing the need for a new perf binary for a sligthly different chip revision that shares the same set of counters (William Cohen) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-script.txt10
-rw-r--r--tools/perf/Documentation/tips.txt2
-rw-r--r--tools/perf/Makefile.config12
-rw-r--r--tools/perf/arch/arm64/util/Build1
-rw-r--r--tools/perf/arch/arm64/util/header.c65
-rw-r--r--tools/perf/arch/powerpc/util/header.c2
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c3
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/arch/x86/util/header.c2
-rw-r--r--tools/perf/bench/futex-hash.c19
-rw-r--r--tools/perf/bench/futex-lock-pi.c23
-rw-r--r--tools/perf/bench/futex-requeue.c22
-rw-r--r--tools/perf/bench/futex-wake-parallel.c46
-rw-r--r--tools/perf/bench/futex-wake.c18
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-record.c40
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/builtin-script.c97
-rw-r--r--tools/perf/builtin-stat.c62
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/builtin-trace.c2
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json62
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv15
-rw-r--r--tools/perf/pmu-events/arch/powerpc/mapfile.csv12
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv5
-rw-r--r--tools/perf/pmu-events/jevents.c39
-rw-r--r--tools/perf/tests/backward-ring-buffer.c6
-rw-r--r--tools/perf/tests/bp_signal.c2
-rw-r--r--tools/perf/tests/bpf.c2
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/perf-record.c2
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/tests/task-exit.c2
-rw-r--r--tools/perf/util/annotate.c8
-rw-r--r--tools/perf/util/evlist.c53
-rw-r--r--tools/perf/util/evlist.h8
-rw-r--r--tools/perf/util/header.c68
-rw-r--r--tools/perf/util/header.h8
-rw-r--r--tools/perf/util/intel-pt-decoder/Build24
-rw-r--r--tools/perf/util/metricgroup.c8
-rw-r--r--tools/perf/util/mmap.c73
-rw-r--r--tools/perf/util/mmap.h4
-rw-r--r--tools/perf/util/pmu.c87
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/python.c2
-rw-r--r--tools/perf/util/rblist.c19
-rw-r--r--tools/perf/util/rblist.h1
-rw-r--r--tools/perf/util/session.c3
-rw-r--r--tools/perf/util/stat-shadow.c12
-rw-r--r--tools/perf/util/thread_map.c22
-rw-r--r--tools/perf/util/thread_map.h1
56 files changed, 722 insertions, 287 deletions
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2811fcf684cb..974ceb12c7f3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
- brstackoff, callindent, insn, insnlen, synth, phys_addr.
+ brstackoff, callindent, insn, insnlen, synth, phys_addr, metric.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -217,6 +217,14 @@ OPTIONS
The brstackoff field will print an offset into a specific dso/binary.
+ With the metric option perf script can compute metrics for
+ sampling periods, similar to perf stat. This requires
+ specifying a group with multiple metrics with the :S option
+ for perf record. perf will sample on the first event, and
+ compute metrics for all the events in the group. Please note
+ that the metric computed is averaged over the whole sampling
+ period, not just for the sample point.
+
-k::
--vmlinux=<file>::
vmlinux pathname
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index db0ca3063eae..849599f39c5e 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -32,3 +32,5 @@ Order by the overhead of source file name and line number: perf report -s srclin
System-wide collection from all CPUs: perf record -a
Show current config key-value pairs: perf config --list
Show user configuration overrides: perf config --user --list
+To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
+To report cacheline events from previous recording: perf c2c report
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index f6786fa2419f..808066c823f7 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -41,6 +41,7 @@ ifeq ($(SRCARCH),x86)
LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
endif
NO_PERF_REGS := 0
+ CFLAGS += -fPIC
endif
ifeq ($(SRCARCH),arm)
@@ -184,9 +185,7 @@ ifdef PYTHON_CONFIG
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
- ifeq ($(CC_NO_CLANG), 1)
- PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
- endif
+ PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
endif
@@ -263,6 +262,10 @@ ifeq ($(feature-pthread-attr-setaffinity-np), 1)
CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP
endif
+ifeq ($(feature-pthread-barrier), 1)
+ CFLAGS += -DHAVE_PTHREAD_BARRIER
+endif
+
ifndef NO_BIONIC
$(call feature_check,bionic)
ifeq ($(feature-bionic), 1)
@@ -572,7 +575,6 @@ ifndef NO_GTK2
endif
endif
-
ifdef NO_LIBPERL
CFLAGS += -DNO_LIBPERL
else
@@ -580,6 +582,8 @@ else
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
ifneq ($(feature-libperl), 1)
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index cef6fb38d17e..b1ab72d2a42e 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,3 +1,4 @@
+libperf-y += header.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
new file mode 100644
index 000000000000..534cd2507d83
--- /dev/null
+++ b/tools/perf/arch/arm64/util/header.c
@@ -0,0 +1,65 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include "header.h"
+
+#define MIDR "/regs/identification/midr_el1"
+#define MIDR_SIZE 19
+#define MIDR_REVISION_MASK 0xf
+#define MIDR_VARIANT_SHIFT 20
+#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+ char *buf = NULL;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+ int cpu;
+ u64 midr = 0;
+ struct cpu_map *cpus;
+ FILE *file;
+
+ if (!sysfs || !pmu || !pmu->cpus)
+ return NULL;
+
+ buf = malloc(MIDR_SIZE);
+ if (!buf)
+ return NULL;
+
+ /* read midr from list of cpus mapped to this pmu */
+ cpus = cpu_map__get(pmu->cpus);
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
+ sysfs, cpus->map[cpu]);
+
+ file = fopen(path, "r");
+ if (!file) {
+ pr_debug("fopen failed for file %s\n", path);
+ continue;
+ }
+
+ if (!fgets(buf, MIDR_SIZE, file)) {
+ fclose(file);
+ continue;
+ }
+ fclose(file);
+
+ /* Ignore/clear Variant[23:20] and
+ * Revision[3:0] of MIDR
+ */
+ midr = strtoul(buf, NULL, 16);
+ midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
+ scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
+ /* got midr break loop */
+ break;
+ }
+
+ if (!midr) {
+ pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
+ free(buf);
+ buf = NULL;
+ }
+
+ cpu_map__put(cpus);
+ return buf;
+}
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 7a4cf80c207a..0b242664f5ea 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -35,7 +35,7 @@ get_cpuid(char *buffer, size_t sz)
}
char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *bufp;
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index e0e466c650df..8c72b44444cb 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -18,7 +18,8 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
if (!strcmp(name, "br"))
ops = &ret_ops;
- arch__associate_ins_ops(arch, name, ops);
+ if (ops)
+ arch__associate_ins_ops(arch, name, ops);
return ops;
}
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index b59678e8c1e2..06abe8108b33 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -84,7 +84,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
CHECK__(perf_evlist__open(evlist));
- CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false));
+ CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
pc = evlist->mmap[0].base;
ret = perf_read_tsc_conversion(pc, &tc);
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 33027c5e6f92..b626d2bad9f1 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -66,7 +66,7 @@ get_cpuid(char *buffer, size_t sz)
}
char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *buf = malloc(128);
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 58ae6ed8f38b..2defb6df7fd0 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -24,6 +24,7 @@
#include <subcmd/parse-options.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <sys/time.h>
@@ -118,11 +119,12 @@ static void print_summary(void)
int bench_futex_hash(int argc, const char **argv)
{
int ret = 0;
- cpu_set_t cpu;
+ cpu_set_t cpuset;
struct sigaction act;
- unsigned int i, ncpus;
+ unsigned int i;
pthread_attr_t thread_attr;
struct worker *worker = NULL;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
if (argc) {
@@ -130,14 +132,16 @@ int bench_futex_hash(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ goto errmem;
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads) /* default to the number of CPUs */
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -163,10 +167,10 @@ int bench_futex_hash(int argc, const char **argv)
if (!worker[i].futex)
goto errmem;
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -217,6 +221,7 @@ int bench_futex_hash(int argc, const char **argv)
print_summary();
free(worker);
+ free(cpu);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 08653ae8a8c4..8e9c4753e304 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -15,6 +15,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -32,7 +33,7 @@ static struct worker *worker;
static unsigned int nsecs = 10;
static bool silent = false, multi = false;
static bool done = false, fshared = false;
-static unsigned int ncpus, nthreads = 0;
+static unsigned int nthreads = 0;
static int futex_flag = 0;
struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
@@ -113,9 +114,10 @@ static void *workerfn(void *arg)
return NULL;
}
-static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
@@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
} else
worker[i].futex = &global_futex;
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
@@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv)
unsigned int i;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
if (argc)
goto err;
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_attr_init(&thread_attr);
gettimeofday(&start, NULL);
- create_threads(worker, thread_attr);
+ create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 1058c194608a..fc692efa0c05 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -22,6 +22,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -40,7 +41,7 @@ static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats requeuetime_stats, requeued_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
static int futex_flag = 0;
static const struct option options[] = {
@@ -83,19 +84,19 @@ static void *workerfn(void *arg __maybe_unused)
}
static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr)
+ pthread_attr_t thread_attr, struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
/* create and block all threads */
for (i = 0; i < nthreads; i++) {
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -116,19 +117,22 @@ int bench_futex_requeue(int argc, const char **argv)
unsigned int i, j;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
if (argc)
goto err;
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "cpu_map__new");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -156,7 +160,7 @@ int bench_futex_requeue(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr);
+ block_threads(worker, thread_attr, cpu);
/* make sure all threads are already blocked */
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index b4732dad9f89..69d8fdc87315 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -7,7 +7,17 @@
* for each individual thread to service its share of work. Ultimately
* it can be used to measure futex_wake() changes.
*/
+#include "bench.h"
+#include <linux/compiler.h>
+#include "../util/debug.h"
+#ifndef HAVE_PTHREAD_BARRIER
+int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+ pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
+ return 0;
+}
+#else /* HAVE_PTHREAD_BARRIER */
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
@@ -15,12 +25,11 @@
#include <signal.h>
#include "../util/stat.h"
#include <subcmd/parse-options.h>
-#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/time64.h>
#include <errno.h>
-#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -42,8 +51,9 @@ static bool done = false, silent = false, fshared = false;
static unsigned int nblocked_threads = 0, nwaking_threads = 0;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
+static pthread_barrier_t barrier;
static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting;
+static unsigned int threads_starting;
static int futex_flag = 0;
static const struct option options[] = {
@@ -64,6 +74,8 @@ static void *waking_workerfn(void *arg)
struct thread_data *waker = (struct thread_data *) arg;
struct timeval start, end;
+ pthread_barrier_wait(&barrier);
+
gettimeofday(&start, NULL);
waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
@@ -84,6 +96,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+ pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+
/* create and block all threads */
for (i = 0; i < nwaking_threads; i++) {
/*
@@ -96,9 +110,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_barrier_wait(&barrier);
+
for (i = 0; i < nwaking_threads; i++)
if (pthread_join(td[i].worker, NULL))
err(EXIT_FAILURE, "pthread_join");
+
+ pthread_barrier_destroy(&barrier);
}
static void *blocked_workerfn(void *arg __maybe_unused)
@@ -119,19 +137,20 @@ static void *blocked_workerfn(void *arg __maybe_unused)
return NULL;
}
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nblocked_threads;
/* create and block all threads */
for (i = 0; i < nblocked_threads; i++) {
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
@@ -205,6 +224,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
struct sigaction act;
pthread_attr_t thread_attr;
struct thread_data *waking_worker;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options,
bench_futex_wake_parallel_usage, 0);
@@ -217,9 +237,12 @@ int bench_futex_wake_parallel(int argc, const char **argv)
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
+
if (!nblocked_threads)
- nblocked_threads = ncpus;
+ nblocked_threads = cpu->nr;
/* some sanity checks */
if (nwaking_threads > nblocked_threads || !nwaking_threads)
@@ -259,7 +282,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
/* create, launch & block all threads */
- block_threads(blocked_worker, thread_attr);
+ block_threads(blocked_worker, thread_attr, cpu);
/* make sure all threads are already blocked */
pthread_mutex_lock(&thread_lock);
@@ -297,3 +320,4 @@ int bench_futex_wake_parallel(int argc, const char **argv)
free(blocked_worker);
return ret;
}
+#endif /* HAVE_PTHREAD_BARRIER */
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 8c5c0b6b5c97..e8181ad7d088 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -22,6 +22,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -89,19 +90,19 @@ static void print_summary(void)
}
static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr)
+ pthread_attr_t thread_attr, struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
/* create and block all threads */
for (i = 0; i < nthreads; i++) {
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)
unsigned int i, j;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
if (argc) {
@@ -129,7 +131,9 @@ int bench_futex_wake(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
@@ -161,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr);
+ block_threads(worker, thread_attr, cpu);
/* make sure all threads are already blocked */
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 597c7de9bec9..98853162eae9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1044,7 +1044,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
goto out;
}
- if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
+ if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
ui__error("Failed to mmap the events: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
perf_evlist__close(evlist);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 003255910c05..0a5749ef8b94 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -301,7 +301,7 @@ static int record__mmap_evlist(struct record *rec,
struct record_opts *opts = &rec->opts;
char msg[512];
- if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+ if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode) < 0) {
if (errno == EPERM) {
@@ -372,6 +372,8 @@ try_again:
ui__error("%s\n", msg);
goto out;
}
+
+ pos->supported = true;
}
if (perf_evlist__apply_filters(evlist, &pos)) {
@@ -477,7 +479,7 @@ static struct perf_event_header finished_round_event = {
};
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool backward)
+ bool overwrite)
{
u64 bytes_written = rec->bytes_written;
int i;
@@ -487,18 +489,18 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (!evlist)
return 0;
- maps = backward ? evlist->backward_mmap : evlist->mmap;
+ maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
if (!maps)
return 0;
- if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+ if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;
for (i = 0; i < evlist->nr_mmaps; i++) {
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
if (maps[i].base) {
- if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
rc = -1;
goto out;
}
@@ -518,7 +520,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (bytes_written != rec->bytes_written)
rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
- if (backward)
+ if (overwrite)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
out:
return rc;
@@ -690,8 +692,8 @@ perf_evlist__pick_pc(struct perf_evlist *evlist)
if (evlist) {
if (evlist->mmap && evlist->mmap[0].base)
return evlist->mmap[0].base;
- if (evlist->backward_mmap && evlist->backward_mmap[0].base)
- return evlist->backward_mmap[0].base;
+ if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
+ return evlist->overwrite_mmap[0].base;
}
return NULL;
}
@@ -784,6 +786,28 @@ static int record__synthesize(struct record *rec, bool tail)
perf_event__synthesize_guest_os, tool);
}
+ err = perf_event__synthesize_extra_attr(&rec->tool,
+ rec->evlist,
+ process_synthesized_event,
+ data->is_pipe);
+ if (err)
+ goto out;
+
+ err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
+ process_synthesized_event,
+ NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize cpu map.\n");
+ return err;
+ }
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address,
opts->proc_map_timeout, 1);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index af5dd038195e..eb9ce6327e71 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -921,13 +921,6 @@ int cmd_report(int argc, const char **argv)
return -EINVAL;
}
- if (report.use_stdio)
- use_browser = 0;
- else if (report.use_tui)
- use_browser = 1;
- else if (report.use_gtk)
- use_browser = 2;
-
if (report.inverted_callchain)
callchain_param.order = ORDER_CALLER;
if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
@@ -1014,6 +1007,13 @@ repeat:
perf_hpp_list.need_collapse = true;
}
+ if (report.use_stdio)
+ use_browser = 0;
+ else if (report.use_tui)
+ use_browser = 1;
+ else if (report.use_gtk)
+ use_browser = 2;
+
/* Force tty output for header output and per-thread stat. */
if (report.header || report.header_only || report.show_threads)
use_browser = 0;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ee7c7aaaae72..39d8b55f0db3 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/stat.h"
+#include "util/color.h"
#include "util/string2.h"
#include "util/thread-stack.h"
#include "util/time-utils.h"
@@ -90,6 +91,7 @@ enum perf_output_field {
PERF_OUTPUT_SYNTH = 1U << 25,
PERF_OUTPUT_PHYS_ADDR = 1U << 26,
PERF_OUTPUT_UREGS = 1U << 27,
+ PERF_OUTPUT_METRIC = 1U << 28,
};
struct output_option {
@@ -124,6 +126,7 @@ struct output_option {
{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
{.str = "synth", .field = PERF_OUTPUT_SYNTH},
{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
+ {.str = "metric", .field = PERF_OUTPUT_METRIC},
};
enum {
@@ -215,12 +218,20 @@ struct perf_evsel_script {
char *filename;
FILE *fp;
u64 samples;
+ /* For metric output */
+ u64 val;
+ int gnum;
};
+static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
+{
+ return (struct perf_evsel_script *)evsel->priv;
+}
+
static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
struct perf_data *data)
{
- struct perf_evsel_script *es = malloc(sizeof(*es));
+ struct perf_evsel_script *es = zalloc(sizeof(*es));
if (es != NULL) {
if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
@@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel
es->fp = fopen(es->filename, "w");
if (es->fp == NULL)
goto out_free_filename;
- es->samples = 0;
}
return es;
@@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp)
return fprintf(fp, "%-*s", maxlen, out);
}
+struct metric_ctx {
+ struct perf_sample *sample;
+ struct thread *thread;
+ struct perf_evsel *evsel;
+ FILE *fp;
+};
+
+static void script_print_metric(void *ctx, const char *color,
+ const char *fmt,
+ const char *unit, double val)
+{
+ struct metric_ctx *mctx = ctx;
+
+ if (!fmt)
+ return;
+ perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ mctx->fp);
+ fputs("\tmetric: ", mctx->fp);
+ if (color)
+ color_fprintf(mctx->fp, color, fmt, val);
+ else
+ printf(fmt, val);
+ fprintf(mctx->fp, " %s\n", unit);
+}
+
+static void script_new_line(void *ctx)
+{
+ struct metric_ctx *mctx = ctx;
+
+ perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ mctx->fp);
+ fputs("\tmetric: ", mctx->fp);
+}
+
+static void perf_sample__fprint_metric(struct perf_script *script,
+ struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ FILE *fp)
+{
+ struct perf_stat_output_ctx ctx = {
+ .print_metric = script_print_metric,
+ .new_line = script_new_line,
+ .ctx = &(struct metric_ctx) {
+ .sample = sample,
+ .thread = thread,
+ .evsel = evsel,
+ .fp = fp,
+ },
+ .force_header = false,
+ };
+ struct perf_evsel *ev2;
+ static bool init;
+ u64 val;
+
+ if (!init) {
+ perf_stat__init_shadow_stats();
+ init = true;
+ }
+ if (!evsel->stats)
+ perf_evlist__alloc_stats(script->session->evlist, false);
+ if (evsel_script(evsel->leader)->gnum++ == 0)
+ perf_stat__reset_shadow_stats();
+ val = sample->period * evsel->scale;
+ perf_stat__update_shadow_stats(evsel,
+ val,
+ sample->cpu);
+ evsel_script(evsel)->val = val;
+ if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
+ for_each_group_member (ev2, evsel->leader) {
+ perf_stat__print_shadow_stats(ev2,
+ evsel_script(ev2)->val,
+ sample->cpu,
+ &ctx,
+ NULL);
+ }
+ evsel_script(evsel->leader)->gnum = 0;
+ }
+}
+
static void process_event(struct perf_script *script,
struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al,
@@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(PHYS_ADDR))
fprintf(fp, "%16" PRIx64, sample->phys_addr);
fprintf(fp, "\n");
+
+ if (PRINT_FIELD(METRIC))
+ perf_sample__fprint_metric(script, thread, evsel, sample, fp);
}
static struct scripting_ops *scripting_ops;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 59af5a8419e2..a027b4712e48 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -458,19 +458,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
-static bool has_unit(struct perf_evsel *counter)
-{
- return counter->unit && *counter->unit;
-}
-
-static bool has_scale(struct perf_evsel *counter)
-{
- return counter->scale != 1;
-}
-
static int perf_stat_synthesize_config(bool is_pipe)
{
- struct perf_evsel *counter;
int err;
if (is_pipe) {
@@ -482,53 +471,10 @@ static int perf_stat_synthesize_config(bool is_pipe)
}
}
- /*
- * Synthesize other events stuff not carried within
- * attr event - unit, scale, name
- */
- evlist__for_each_entry(evsel_list, counter) {
- if (!counter->supported)
- continue;
-
- /*
- * Synthesize unit and scale only if it's defined.
- */
- if (has_unit(counter)) {
- err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel unit.\n");
- return err;
- }
- }
-
- if (has_scale(counter)) {
- err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel scale.\n");
- return err;
- }
- }
-
- if (counter->own_cpus) {
- err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel scale.\n");
- return err;
- }
- }
-
- /*
- * Name is needed only for pipe output,
- * perf.data carries event names.
- */
- if (is_pipe) {
- err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel name.\n");
- return err;
- }
- }
- }
+ err = perf_event__synthesize_extra_attr(NULL,
+ evsel_list,
+ process_synthesized_event,
+ is_pipe);
err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
process_synthesized_event,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 0077724fb24f..540461f5e345 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -907,7 +907,7 @@ try_again:
}
}
- if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+ if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) {
ui__error("Failed to mmap with %d (%s)\n",
errno, str_error_r(errno, msg, sizeof(msg)));
goto out_err;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 84debdbad327..7c57898095ea 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2437,7 +2437,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_apply_filters;
- err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
+ err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
if (err < 0)
goto out_error_mmap;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 77406d25e521..e66a8a7bcced 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -45,7 +45,6 @@ include/uapi/asm-generic/mman-common.h
check () {
file=$1
- opts="--ignore-blank-lines --ignore-space-change"
shift
while [ -n "$*" ]; do
diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
new file mode 100644
index 000000000000..2db45c40ebc7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
@@ -0,0 +1,62 @@
+[
+ {
+ "PublicDescription": "Attributable Level 1 data cache access, read",
+ "EventCode": "0x40",
+ "EventName": "l1d_cache_rd",
+ "BriefDescription": "L1D cache read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache access, write ",
+ "EventCode": "0x41",
+ "EventName": "l1d_cache_wr",
+ "BriefDescription": "L1D cache write",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, read",
+ "EventCode": "0x42",
+ "EventName": "l1d_cache_refill_rd",
+ "BriefDescription": "L1D cache refill read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, write",
+ "EventCode": "0x43",
+ "EventName": "l1d_cache_refill_wr",
+ "BriefDescription": "L1D refill write",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill, read",
+ "EventCode": "0x4C",
+ "EventName": "l1d_tlb_refill_rd",
+ "BriefDescription": "L1D tlb refill read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill, write",
+ "EventCode": "0x4D",
+ "EventName": "l1d_tlb_refill_wr",
+ "BriefDescription": "L1D tlb refill write",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
+ "EventCode": "0x4E",
+ "EventName": "l1d_tlb_rd",
+ "BriefDescription": "L1D tlb read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
+ "EventCode": "0x4F",
+ "EventName": "l1d_tlb_wr",
+ "BriefDescription": "L1D tlb write",
+ },
+ {
+ "PublicDescription": "Bus access read",
+ "EventCode": "0x60",
+ "EventName": "bus_access_rd",
+ "BriefDescription": "Bus access read",
+ },
+ {
+ "PublicDescription": "Bus access write",
+ "EventCode": "0x61",
+ "EventName": "bus_access_wr",
+ "BriefDescription": "Bus access write",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
new file mode 100644
index 000000000000..219d6756134e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -0,0 +1,15 @@
+# Format:
+# MIDR,Version,JSON/file/pathname,Type
+#
+# where
+# MIDR Processor version
+# Variant[23:20] and Revision [3:0] should be zero.
+# Version could be used to track version of of JSON file
+# but currently unused.
+# JSON/file/pathname is the path to JSON file, relative
+# to tools/perf/pmu-events/arch/arm64/.
+# Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x00000000420f5160,v1,cavium,core
diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
index a0f3a11ca19f..229150e7ab7d 100644
--- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv
+++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
@@ -13,13 +13,5 @@
#
# Power8 entries
-004b0000,1,power8,core
-004b0201,1,power8,core
-004c0000,1,power8,core
-004d0000,1,power8,core
-004d0100,1,power8,core
-004d0200,1,power8,core
-004c0100,1,power8,core
-004e0100,1,power9,core
-004e0200,1,power9,core
-004e1200,1,power9,core
+004[bcd][[:xdigit:]]{4},1,power8,core
+004e[[:xdigit:]]{4},1,power9,core
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index fe1a2c47cabf..93656f2fd53a 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -23,10 +23,7 @@ GenuineIntel-6-1E,v2,nehalemep,core
GenuineIntel-6-1F,v2,nehalemep,core
GenuineIntel-6-1A,v2,nehalemep,core
GenuineIntel-6-2E,v2,nehalemex,core
-GenuineIntel-6-4E,v24,skylake,core
-GenuineIntel-6-5E,v24,skylake,core
-GenuineIntel-6-8E,v24,skylake,core
-GenuineIntel-6-9E,v24,skylake,core
+GenuineIntel-6-[4589]E,v24,skylake,core
GenuineIntel-6-37,v13,silvermont,core
GenuineIntel-6-4D,v13,silvermont,core
GenuineIntel-6-4C,v13,silvermont,core
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 9eb7047bafe4..b578aa26e375 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -116,6 +116,43 @@ static void fixdesc(char *s)
*e = 0;
}
+/* Add escapes for '\' so they are proper C strings. */
+static char *fixregex(char *s)
+{
+ int len = 0;
+ int esc_count = 0;
+ char *fixed = NULL;
+ char *p, *q;
+
+ /* Count the number of '\' in string */
+ for (p = s; *p; p++) {
+ ++len;
+ if (*p == '\\')
+ ++esc_count;
+ }
+
+ if (esc_count == 0)
+ return s;
+
+ /* allocate space for a new string */
+ fixed = (char *) malloc(len + 1);
+ if (!fixed)
+ return NULL;
+
+ /* copy over the characters */
+ q = fixed;
+ for (p = s; *p; p++) {
+ if (*p == '\\') {
+ *q = '\\';
+ ++q;
+ }
+ *q = *p;
+ ++q;
+ }
+ *q = '\0';
+ return fixed;
+}
+
static struct msrmap {
const char *num;
const char *pname;
@@ -648,7 +685,7 @@ static int process_mapfile(FILE *outfp, char *fpath)
}
line[strlen(line)-1] = '\0';
- cpuid = strtok_r(p, ",", &save);
+ cpuid = fixregex(strtok_r(p, ",", &save));
version = strtok_r(NULL, ",", &save);
fname = strtok_r(NULL, ",", &save);
type = strtok_r(NULL, ",", &save);
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 43a8c6ac4070..4035d43523c3 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,8 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
- perf_mmap__read_catchup(&evlist->backward_mmap[i]);
- while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) {
+ perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
+ while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
const u32 type = event->header.type;
switch (type) {
@@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages,
int err;
char sbuf[STRERR_BUFSIZE];
- err = perf_evlist__mmap(evlist, mmap_pages, false);
+ err = perf_evlist__mmap(evlist, mmap_pages);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index 335b695f4970..a467615c5a0e 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -296,7 +296,7 @@ bool test__bp_signal_is_supported(void)
* instruction breakpoint using the perf event interface.
* Once it's there we can release this.
*/
-#ifdef __powerpc__
+#if defined(__powerpc__) || defined(__s390x__)
return false;
#else
return true;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 34c22cdf4d5d..c433dd30975a 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -167,7 +167,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
goto out_delete_evlist;
}
- err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+ err = perf_evlist__mmap(evlist, opts.mmap_pages);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index fcc8984bc329..3bf7b145b826 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -639,7 +639,7 @@ static int do_test_code_reading(bool try_kcore)
break;
}
- ret = perf_evlist__mmap(evlist, UINT_MAX, false);
+ ret = perf_evlist__mmap(evlist, UINT_MAX);
if (ret < 0) {
pr_debug("perf_evlist__mmap failed\n");
goto out_put;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 842d33637a18..c46530918938 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -95,7 +95,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
goto out_err;
}
- CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false));
+ CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
/*
* First, test that a 'comm' event can be found when the event is
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 91f10d6d9ae2..c0e971da965c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
expected_nr_events[i] = 1 + rand() % 127;
}
- if (perf_evlist__mmap(evlist, 128, false) < 0) {
+ if (perf_evlist__mmap(evlist, 128) < 0) {
pr_debug("failed to mmap events: %d (%s)\n", errno,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_delete_evlist;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index d9619d265314..97c9407d02a0 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -64,7 +64,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out_delete_evlist;
}
- err = perf_evlist__mmap(evlist, UINT_MAX, false);
+ err = perf_evlist__mmap(evlist, UINT_MAX);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index c34904d37705..0afafab85238 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -141,7 +141,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
* fds in the same CPU to be injected in the same mmap ring buffer
* (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
*/
- err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+ err = perf_evlist__mmap(evlist, opts.mmap_pages);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index c6937ed12e6b..f6c72f915d48 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
goto out_delete_evlist;
}
- err = perf_evlist__mmap(evlist, 128, false);
+ err = perf_evlist__mmap(evlist, 128);
if (err < 0) {
pr_debug("failed to mmap event: %d (%s)\n", errno,
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 7d3f4bf9534f..33e00295a972 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -449,7 +449,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
goto out;
}
- err = perf_evlist__mmap(evlist, UINT_MAX, false);
+ err = perf_evlist__mmap(evlist, UINT_MAX);
if (err) {
pr_debug("perf_evlist__mmap failed!\n");
goto out_err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 5d06ac81f7f1..01b62b81751b 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -101,7 +101,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
goto out_delete_evlist;
}
- if (perf_evlist__mmap(evlist, 128, false) < 0) {
+ if (perf_evlist__mmap(evlist, 128) < 0) {
pr_debug("failed to mmap events: %d (%s)\n", errno,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_delete_evlist;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 22ea7936d92f..facad1e279a8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -322,6 +322,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
return 0;
*addrp = strtoull(comment, &endptr, 16);
+ if (endptr == comment)
+ return 0;
name = strchr(endptr, '<');
if (name == NULL)
return -1;
@@ -435,8 +437,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m
return 0;
comment = ltrim(comment);
- comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name);
- comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name);
+ comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
return 0;
@@ -480,7 +482,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops
return 0;
comment = ltrim(comment);
- comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
return 0;
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 199bb82efbcd..3570355bcf39 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
fdarray__exit(&evlist->pollfd);
}
@@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
int i;
- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return 0;
for (i = 0; i < evlist->nr_mmaps; i++) {
- int fd = evlist->backward_mmap[i].fd;
+ int fd = evlist->overwrite_mmap[i].fd;
int err;
if (fd < 0)
@@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
* No need for read-write ring buffer: kernel stop outputting when
* it hit md->prev (perf_mmap__consume()).
*/
- return perf_mmap__read_forward(md, evlist->overwrite);
+ return perf_mmap__read_forward(md);
}
union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
@@ -738,7 +738,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
- perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
+ perf_mmap__consume(&evlist->mmap[idx], false);
}
static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
@@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
for (i = 0; i < evlist->nr_mmaps; i++)
perf_mmap__munmap(&evlist->mmap[i]);
- if (evlist->backward_mmap)
+ if (evlist->overwrite_mmap)
for (i = 0; i < evlist->nr_mmaps; i++)
- perf_mmap__munmap(&evlist->backward_mmap[i]);
+ perf_mmap__munmap(&evlist->overwrite_mmap[i]);
}
void perf_evlist__munmap(struct perf_evlist *evlist)
{
perf_evlist__munmap_nofree(evlist);
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
}
static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
@@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu_idx,
- int thread, int *_output, int *_output_backward)
+ int thread, int *_output, int *_output_overwrite)
{
struct perf_evsel *evsel;
int revent;
@@ -812,18 +812,20 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
int fd;
int cpu;
+ mp->prot = PROT_READ | PROT_WRITE;
if (evsel->attr.write_backward) {
- output = _output_backward;
- maps = evlist->backward_mmap;
+ output = _output_overwrite;
+ maps = evlist->overwrite_mmap;
if (!maps) {
maps = perf_evlist__alloc_mmap(evlist);
if (!maps)
return -1;
- evlist->backward_mmap = maps;
+ evlist->overwrite_mmap = maps;
if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
+ mp->prot &= ~PROT_WRITE;
}
if (evsel->system_wide && thread)
@@ -884,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per cpu\n");
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
true);
for (thread = 0; thread < nr_threads; thread++) {
if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output, &output_backward))
+ thread, &output, &output_overwrite))
goto out_unmap;
}
}
@@ -912,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
false);
if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output, &output_backward))
+ &output, &output_overwrite))
goto out_unmap;
}
@@ -1052,15 +1054,18 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
* Return: %0 on success, negative error code otherwise.
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite, unsigned int auxtrace_pages,
+ unsigned int auxtrace_pages,
bool auxtrace_overwrite)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
const struct thread_map *threads = evlist->threads;
- struct mmap_params mp = {
- .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
- };
+ /*
+ * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+ * Its value is decided by evsel's write_backward.
+ * So &mp should not be passed through const pointer.
+ */
+ struct mmap_params mp;
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist);
@@ -1070,7 +1075,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
- evlist->overwrite = overwrite;
evlist->mmap_len = perf_evlist__mmap_size(pages);
pr_debug("mmap size %zuB\n", evlist->mmap_len);
mp.mask = evlist->mmap_len - page_size - 1;
@@ -1091,10 +1095,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
return perf_evlist__mmap_per_cpu(evlist, &mp);
}
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite)
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
@@ -1750,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
RESUME,
} action = NONE;
- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return;
switch (old_state) {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 4e8131dacbd7..75160666d305 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -31,7 +31,6 @@ struct perf_evlist {
int nr_entries;
int nr_groups;
int nr_mmaps;
- bool overwrite;
bool enabled;
bool has_user_cpus;
size_t mmap_len;
@@ -45,7 +44,7 @@ struct perf_evlist {
} workload;
struct fdarray pollfd;
struct perf_mmap *mmap;
- struct perf_mmap *backward_mmap;
+ struct perf_mmap *overwrite_mmap;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
@@ -169,10 +168,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite, unsigned int auxtrace_pages,
+ unsigned int auxtrace_pages,
bool auxtrace_overwrite);
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite);
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
size_t perf_evlist__mmap_size(unsigned long pages);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 7c0e9d587bfa..5890e08e0754 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3258,6 +3258,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,
return err;
}
+static bool has_unit(struct perf_evsel *counter)
+{
+ return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+ return counter->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+ struct perf_evlist *evsel_list,
+ perf_event__handler_t process,
+ bool is_pipe)
+{
+ struct perf_evsel *counter;
+ int err;
+
+ /*
+ * Synthesize other events stuff not carried within
+ * attr event - unit, scale, name
+ */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->supported)
+ continue;
+
+ /*
+ * Synthesize unit and scale only if it's defined.
+ */
+ if (has_unit(counter)) {
+ err = perf_event__synthesize_event_update_unit(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel unit.\n");
+ return err;
+ }
+ }
+
+ if (has_scale(counter)) {
+ err = perf_event__synthesize_event_update_scale(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel counter.\n");
+ return err;
+ }
+ }
+
+ if (counter->own_cpus) {
+ err = perf_event__synthesize_event_update_cpus(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel cpus.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Name is needed only for pipe output,
+ * perf.data carries event names.
+ */
+ if (is_pipe) {
+ err = perf_event__synthesize_event_update_name(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel name.\n");
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_evlist **pevlist)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 29ccbfdf8724..317fb901e47f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include "event.h"
#include "env.h"
+#include "pmu.h"
enum {
HEADER_RESERVED = 0, /* always cleared */
@@ -107,6 +108,11 @@ int perf_event__synthesize_features(struct perf_tool *tool,
struct perf_evlist *evlist,
perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+ struct perf_evlist *evsel_list,
+ perf_event__handler_t process,
+ bool is_pipe);
+
int perf_event__process_feature(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session);
@@ -166,5 +172,5 @@ int write_padded(struct feat_fd *fd, const void *bf,
*/
int get_cpuid(char *buffer, size_t sz);
-char *get_cpuid_str(void);
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 10e0814bb8d2..1b704fbea9de 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
@(diff -I 2>&1 | grep -q 'option requires an argument' && \
- test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \
- diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
- diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
- diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
- || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true
+ test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 0ddd9c199227..e48410c99b39 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -38,6 +38,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct metric_event me = {
.evsel = evsel
};
+
+ if (!metric_events)
+ return NULL;
+
nd = rblist__find(metric_events, &me);
if (nd)
return container_of(nd, struct metric_event, nd);
@@ -270,7 +274,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
void metricgroup__print(bool metrics, bool metricgroups, char *filter,
bool raw)
{
- struct pmu_events_map *map = perf_pmu__find_map();
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
int i;
struct rblist groups;
@@ -368,7 +372,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
static int metricgroup__add_metric(const char *metric, struct strbuf *events,
struct list_head *group_list)
{
- struct pmu_events_map *map = perf_pmu__find_map();
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
int ret = -EINVAL;
int i, j;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 9fe5f9c7d577..05076e683938 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
/* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup,
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
u64 start, u64 end, u64 *prev)
{
unsigned char *data = map->base + page_size;
union perf_event *event = NULL;
int diff = end - start;
- if (check_messup) {
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the 'end', we got messed up.
- *
- * In either case, truncate and restart at 'end'.
- */
- if (diff > map->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * 'end' points to a known good entry, start there.
- */
- start = end;
- diff = 0;
- }
- }
-
if (diff >= (int)sizeof(event->header)) {
size_t size;
@@ -89,7 +69,7 @@ broken_event:
return event;
}
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup)
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
{
u64 head;
u64 old = map->prev;
@@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess
head = perf_mmap__read_head(map);
- return perf_mmap__read(map, check_messup, old, head, &map->prev);
+ return perf_mmap__read(map, old, head, &map->prev);
}
union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
@@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
else
end = head + map->mask + 1;
- return perf_mmap__read(map, false, start, end, &map->prev);
+ return perf_mmap__read(map, start, end, &map->prev);
}
void perf_mmap__read_catchup(struct perf_mmap *map)
@@ -254,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
return 0;
}
-static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
+static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
{
struct perf_event_header *pheader;
u64 evt_head = head;
int size = mask + 1;
- pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
+ pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
pheader = (struct perf_event_header *)(buf + (head & mask));
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
- pr_debug("Finished reading backward ring buffer: rewind\n");
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
@@ -275,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
pheader = (struct perf_event_header *)(buf + (evt_head & mask));
if (pheader->size == 0) {
- pr_debug("Finished reading backward ring buffer: get start\n");
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
*end = evt_head;
return 0;
}
@@ -287,19 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}
-static int rb_find_range(void *data, int mask, u64 head, u64 old,
- u64 *start, u64 *end, bool backward)
-{
- if (!backward) {
- *start = old;
- *end = head;
- return 0;
- }
-
- return backward_rb_find_range(data, mask, head, start, end);
-}
-
-int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
void *to, int push(void *to, void *buf, size_t size))
{
u64 head = perf_mmap__read_head(md);
@@ -310,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
void *buf;
int rc = 0;
- if (rb_find_range(data, md->mask, head, old, &start, &end, backward))
- return -1;
+ start = overwrite ? head : old;
+ end = overwrite ? old : head;
if (start == end)
return 0;
size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+ if (!overwrite) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
- md->prev = head;
- perf_mmap__consume(md, overwrite || backward);
- return 0;
+ md->prev = head;
+ perf_mmap__consume(md, overwrite);
+ return 0;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
+ return -1;
}
if ((start & md->mask) + size != (end & md->mask)) {
@@ -346,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
}
md->prev = head;
- perf_mmap__consume(md, overwrite || backward);
+ perf_mmap__consume(md, overwrite);
out:
return rc;
}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index efd78b827b05..d640273b7762 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -86,10 +86,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
pc->data_tail = tail;
}
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
-int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool backward,
void *to, int push(void *to, void *buf, size_t size));
size_t perf_mmap__mmap_len(struct perf_mmap *map);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 80fb1593913a..57e38fdf0b34 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -12,6 +12,7 @@
#include <dirent.h>
#include <api/fs/fs.h>
#include <locale.h>
+#include <regex.h>
#include "util.h"
#include "pmu.h"
#include "parse-events.h"
@@ -537,17 +538,45 @@ static bool pmu_is_uncore(const char *name)
}
/*
+ * PMU CORE devices have different name other than cpu in sysfs on some
+ * platforms. looking for possible sysfs files to identify as core device.
+ */
+static int is_pmu_core(const char *name)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return 0;
+
+ /* Look for cpu sysfs (x86 and others) */
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs);
+ if ((stat(path, &st) == 0) &&
+ (strncmp(name, "cpu", strlen("cpu")) == 0))
+ return 1;
+
+ /* Look for cpu sysfs (specific to arm) */
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
+ sysfs, name);
+ if (stat(path, &st) == 0)
+ return 1;
+
+ return 0;
+}
+
+/*
* Return the CPU id as a raw string.
*
* Each architecture should provide a more precise id string that
* can be use to match the architecture's "mapfile".
*/
-char * __weak get_cpuid_str(void)
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
return NULL;
}
-static char *perf_pmu__getcpuid(void)
+static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
{
char *cpuid;
static bool printed;
@@ -556,7 +585,7 @@ static char *perf_pmu__getcpuid(void)
if (cpuid)
cpuid = strdup(cpuid);
if (!cpuid)
- cpuid = get_cpuid_str();
+ cpuid = get_cpuid_str(pmu);
if (!cpuid)
return NULL;
@@ -567,22 +596,45 @@ static char *perf_pmu__getcpuid(void)
return cpuid;
}
-struct pmu_events_map *perf_pmu__find_map(void)
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
{
struct pmu_events_map *map;
- char *cpuid = perf_pmu__getcpuid();
+ char *cpuid = perf_pmu__getcpuid(pmu);
int i;
+ /* on some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ return NULL;
+
i = 0;
for (;;) {
+ regex_t re;
+ regmatch_t pmatch[1];
+ int match;
+
map = &pmu_events_map[i++];
if (!map->table) {
map = NULL;
break;
}
- if (!strcmp(map->cpuid, cpuid))
+ if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) {
+ /* Warn unable to generate match particular string. */
+ pr_info("Invalid regular expression %s\n", map->cpuid);
break;
+ }
+
+ match = !regexec(&re, cpuid, 1, pmatch, 0);
+ regfree(&re);
+ if (match) {
+ size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+ /* Verify the entire string matched. */
+ if (match_len == strlen(cpuid))
+ break;
+ }
}
free(cpuid);
return map;
@@ -593,13 +645,14 @@ struct pmu_events_map *perf_pmu__find_map(void)
* to the current running CPU. Then, add all PMU events from that table
* as aliases.
*/
-static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
int i;
struct pmu_events_map *map;
struct pmu_event *pe;
+ const char *name = pmu->name;
- map = perf_pmu__find_map();
+ map = perf_pmu__find_map(pmu);
if (!map)
return;
@@ -608,7 +661,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
*/
i = 0;
while (1) {
- const char *pname;
pe = &map->table[i++];
if (!pe->name) {
@@ -617,9 +669,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
break;
}
- pname = pe->pmu ? pe->pmu : "cpu";
- if (strncmp(pname, name, strlen(pname)))
- continue;
+ if (!is_pmu_core(name)) {
+ /* check for uncore devices */
+ if (pe->pmu == NULL)
+ continue;
+ if (strncmp(pe->pmu, name, strlen(pe->pmu)))
+ continue;
+ }
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
@@ -661,21 +717,20 @@ static struct perf_pmu *pmu_lookup(const char *name)
if (pmu_aliases(name, &aliases))
return NULL;
- pmu_add_cpu_aliases(&aliases, name);
pmu = zalloc(sizeof(*pmu));
if (!pmu)
return NULL;
pmu->cpus = pmu_cpumask(name);
-
+ pmu->name = strdup(name);
+ pmu->type = type;
pmu->is_uncore = pmu_is_uncore(name);
+ pmu_add_cpu_aliases(&aliases, pmu);
INIT_LIST_HEAD(&pmu->format);
INIT_LIST_HEAD(&pmu->aliases);
list_splice(&format, &pmu->format);
list_splice(&aliases, &pmu->aliases);
- pmu->name = strdup(name);
- pmu->type = type;
list_add_tail(&pmu->list, &pmus);
pmu->default_config = perf_pmu__get_default_config(pmu);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 27c75e635866..76fecec7b3f9 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -92,6 +92,6 @@ int perf_pmu__test(void);
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
-struct pmu_events_map *perf_pmu__find_map(void);
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
#endif /* __PMU_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 8e49d9cafcfc..b1e999bd21ef 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
&pages, &overwrite))
return NULL;
- if (perf_evlist__mmap(evlist, pages, overwrite) < 0) {
+ if (perf_evlist__mmap(evlist, pages) < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
index 0dfe27d99458..0efc3258c648 100644
--- a/tools/perf/util/rblist.c
+++ b/tools/perf/util/rblist.c
@@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist)
return;
}
+void rblist__exit(struct rblist *rblist)
+{
+ struct rb_node *pos, *next = rb_first(&rblist->entries);
+
+ while (next) {
+ pos = next;
+ next = rb_next(pos);
+ rblist__remove_node(rblist, pos);
+ }
+}
+
void rblist__delete(struct rblist *rblist)
{
if (rblist != NULL) {
- struct rb_node *pos, *next = rb_first(&rblist->entries);
-
- while (next) {
- pos = next;
- next = rb_next(pos);
- rblist__remove_node(rblist, pos);
- }
+ rblist__exit(rblist);
free(rblist);
}
}
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
index 4c8638a22571..76df15c27f5f 100644
--- a/tools/perf/util/rblist.h
+++ b/tools/perf/util/rblist.h
@@ -29,6 +29,7 @@ struct rblist {
};
void rblist__init(struct rblist *rblist);
+void rblist__exit(struct rblist *rblist);
void rblist__delete(struct rblist *rblist);
int rblist__add_node(struct rblist *rblist, const void *new_entry);
void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index df2857137908..54e30f1bcbd7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1348,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
{
struct ordered_events *oe = &session->ordered_events;
struct perf_tool *tool = session->tool;
+ struct perf_sample sample = { .time = 0, };
int fd = perf_data__fd(session->data);
int err;
- dump_event(session->evlist, event, file_offset, NULL);
+ dump_event(session->evlist, event, file_offset, &sample);
/* These events are processed right away */
switch (event->header.type) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 855e35cbb1dc..57ec22513971 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -87,6 +87,16 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
return &nd->rb_node;
}
+static void saved_value_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct saved_value *v;
+
+ BUG_ON(!rb_node);
+ v = container_of(rb_node, struct saved_value, rb_node);
+ free(v);
+}
+
static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
int cpu,
bool create)
@@ -114,7 +124,7 @@ void perf_stat__init_shadow_stats(void)
rblist__init(&runtime_saved_values);
runtime_saved_values.node_cmp = saved_value_cmp;
runtime_saved_values.node_new = saved_value_new;
- /* No delete for now */
+ runtime_saved_values.node_delete = saved_value_delete;
}
static int evsel_context(struct perf_evsel *evsel)
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index be0d5a736dea..2b653853eec2 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
return threads;
}
-struct thread_map *thread_map__new_by_uid(uid_t uid)
+static struct thread_map *__thread_map__new_all_cpus(uid_t uid)
{
DIR *proc;
int max_threads = 32, items, i;
@@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
while ((dirent = readdir(proc)) != NULL) {
char *end;
bool grow = false;
- struct stat st;
pid_t pid = strtol(dirent->d_name, &end, 10);
if (*end) /* only interested in proper numerical dirents */
@@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
- if (stat(path, &st) != 0)
- continue;
+ if (uid != UINT_MAX) {
+ struct stat st;
- if (st.st_uid != uid)
- continue;
+ if (stat(path, &st) != 0 || st.st_uid != uid)
+ continue;
+ }
snprintf(path, sizeof(path), "/proc/%d/task", pid);
items = scandir(path, &namelist, filter, NULL);
@@ -178,6 +178,16 @@ out_free_closedir:
goto out_closedir;
}
+struct thread_map *thread_map__new_all_cpus(void)
+{
+ return __thread_map__new_all_cpus(UINT_MAX);
+}
+
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+ return __thread_map__new_all_cpus(uid);
+}
+
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
{
if (pid != -1)
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index f15803985435..07a765fb22bb 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void);
struct thread_map *thread_map__new_by_pid(pid_t pid);
struct thread_map *thread_map__new_by_tid(pid_t tid);
struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new_all_cpus(void);
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
struct thread_map *thread_map__new_event(struct thread_map_event *event);