summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-09-22 18:05:48 +0200
committerIngo Molnar <mingo@kernel.org>2017-09-22 18:05:48 +0200
commitaa469aafddca64d6570d2257b3443359af55ab88 (patch)
tree5669c076721fddde0fe91747a5d1524d29d8d2aa /tools
parentMerge tag 'trace-v4.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parentperf tools: Provide mutex wrappers for pthreads rwlocks (diff)
downloadlinux-aa469aafddca64d6570d2257b3443359af55ab88.tar.xz
linux-aa469aafddca64d6570d2257b3443359af55ab88.zip
Merge tag 'perf-core-for-mingo-4.15-20170922' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Support direct --user-regs arguments in 'perf record', previously the only way to sample PERF_SAMPLE_REGS_USER was implicitly selecting it when recording callchains (Andi Kleen) - Support showing sampled user regs in 'perf script' (Andi Kleen) - Introduce the concept of weak groups in 'perf stat': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. E.g: (Andi Kleen) % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}:W' -a sleep 1 125,366,055 branches (80.02%) 9,208,402 branch-misses # 7.35% of all branches (80.01%) 24,560,249 l1d.replacement (80.00%) 43,174,971 l2_lines_in.all (80.05%) 31,891,457 l2_rqsts.all_code_rd (79.92%) - Support metrics in 'stat' and 'list'. A metric is a formula that uses multiple events to compute a higher level result (e.g. IPC). (Andi Kleen) - Add Intel processors vendor event metrics JSON files (Andi Kleen) - Add 'pid' and 'tid' options to 'perf sched timehist' (David Ahern) - Generate 'behavior' string table from kernel headers, helps getting new parameters when synchronizing kernel headers, like MADV_WIPEONFORK and MADV_KEEPONFORK, that are now beautied (Arnaldo Carvalho de Melo) - Improve TUI progress bar by showing how many bytes from a total were processed (Jiri Olsa) - Use scandir() to replace readdir(), prep work to have the synthesizing of PERF_RECORD_ entries for existing threads be multithreaded, making 'perf top' bearable on high core count systems such as Intel's Knights Landing/Mill (Kan Liang) - Allow creating a ~/.perfconfig file when setting a variable to its default value, previously it would bail out and not write such a file (Taeung Song) - Introduce wrapper for allowing purely single threaded apps to avoid the costs of locking (Arnaldo Carvalho de Melo) - Introduce hashtable to reduce the cost of thread lookup - Fix build C++ build wrt poison.h using void pointer arithmetic, affects only the embedded clang/llvm case, that is disabled by default (Arnaldo Carvalho de Melo) - Fix leaking rec_argv in error cases (Martin Kepplinger) - Remove Intel CQM perf test, that infrastructure was nuked (Xiaochen Shen) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/include/linux/poison.h5
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h14
-rw-r--r--tools/perf/Documentation/perf-list.txt9
-rw-r--r--tools/perf/Documentation/perf-record.txt2
-rw-r--r--tools/perf/Documentation/perf-sched.txt8
-rw-r--r--tools/perf/Documentation/perf-script.txt4
-rw-r--r--tools/perf/Documentation/perf-stat.txt7
-rw-r--r--tools/perf/MANIFEST87
-rw-r--r--tools/perf/Makefile.perf17
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c127
-rw-r--r--tools/perf/builtin-c2c.c1
-rw-r--r--tools/perf/builtin-config.c22
-rw-r--r--tools/perf/builtin-kvm.c1
-rw-r--r--tools/perf/builtin-list.c7
-rw-r--r--tools/perf/builtin-mem.c1
-rw-r--r--tools/perf/builtin-record.c3
-rw-r--r--tools/perf/builtin-sched.c4
-rw-r--r--tools/perf/builtin-script.c32
-rw-r--r--tools/perf/builtin-stat.c82
-rw-r--r--tools/perf/builtin-timechart.c4
-rw-r--r--tools/perf/builtin-trace.c20
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json158
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json158
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json182
-rw-r--r--tools/perf/pmu-events/jevents.c24
-rw-r--r--tools/perf/pmu-events/jevents.h2
-rw-r--r--tools/perf/pmu-events/pmu-events.h1
-rw-r--r--tools/perf/tests/builtin-test.c1
-rwxr-xr-xtools/perf/trace/beauty/madvise_behavior.sh10
-rw-r--r--tools/perf/trace/beauty/mmap.c38
-rw-r--r--tools/perf/ui/progress.c6
-rw-r--r--tools/perf/ui/progress.h12
-rw-r--r--tools/perf/ui/tui/progress.c32
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/data.c1
-rw-r--r--tools/perf/util/dso.c13
-rw-r--r--tools/perf/util/dso.h4
-rw-r--r--tools/perf/util/event.c46
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c7
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/machine.c155
-rw-r--r--tools/perf/util/machine.h24
-rw-r--r--tools/perf/util/map.c34
-rw-r--r--tools/perf/util/map.h3
-rw-r--r--tools/perf/util/metricgroup.c490
-rw-r--r--tools/perf/util/metricgroup.h31
-rw-r--r--tools/perf/util/namespaces.c1
-rw-r--r--tools/perf/util/parse-events.c29
-rw-r--r--tools/perf/util/parse-events.h3
-rw-r--r--tools/perf/util/parse-events.l3
-rw-r--r--tools/perf/util/pmu.c55
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/probe-file.c1
-rw-r--r--tools/perf/util/rb_resort.h5
-rw-r--r--tools/perf/util/rwsem.c32
-rw-r--r--tools/perf/util/rwsem.h19
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/stat-shadow.c110
-rw-r--r--tools/perf/util/stat.h4
-rw-r--r--tools/perf/util/symbol.c8
-rw-r--r--tools/perf/util/thread.c4
-rw-r--r--tools/perf/util/trace-event-info.c1
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/perf/util/util.c16
-rw-r--r--tools/perf/util/util.h7
-rw-r--r--tools/perf/util/vdso.c4
-rw-r--r--tools/perf/util/zlib.c1
81 files changed, 2988 insertions, 489 deletions
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 51334edec506..f306a7642509 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -14,6 +14,10 @@
# define POISON_POINTER_DELTA 0
#endif
+#ifdef __cplusplus
+#define LIST_POISON1 NULL
+#define LIST_POISON2 NULL
+#else
/*
* These are non-NULL pointers that will result in page faults
* under normal circumstances, used to verify that nobody uses
@@ -21,6 +25,7 @@
*/
#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
+#endif
/********** include/linux/timer.h **********/
/*
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 8c27db0c5c08..203268f9231e 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -58,20 +58,12 @@
overrides the coredump filter bits */
#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
+#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
/* compatibility flags */
#define MAP_FILE 0
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
-
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index f709de54707b..24679aed90b7 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,8 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
+'perf list' [--no-desc] [--long-desc]
+ [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
DESCRIPTION
-----------
@@ -47,6 +48,8 @@ counted. The following modifiers exist:
P - use maximum detected precise level
S - read sample value (PERF_SAMPLE_READ)
D - pin the event to the PMU
+ W - group is weak and will fallback to non-group if not schedulable,
+ only supported in 'perf stat' for now.
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
@@ -246,6 +249,10 @@ To limit the list use:
. 'sdt' to list all Statically Defined Tracepoint events.
+. 'metric' to list metrics
+
+. 'metricgroup' to list metricgroups with metrics.
+
. If none of the above is matched, it will apply the supplied glob to all
events, printing the ones that match.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e397453e5a46..68a1ffb0a8a5 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -377,6 +377,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use
--intr-regs=\?. To name registers, pass a comma separated list such as
--intr-regs=ax,bx. The list of register is architecture dependent.
+--user-regs::
+Capture user registers at sample time. Same arguments as -I.
--running-time::
Record running and enabled time for read events (:S)
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index a092a2499e8f..55b67338548e 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -106,6 +106,14 @@ OPTIONS for 'perf sched timehist'
--max-stack::
Maximum number of functions to display in backtrace, default 5.
+-p=::
+--pid=::
+ Only show events for given process ID (comma separated list).
+
+-t=::
+--tid=::
+ Only show events for given thread ID (comma separated list).
+
-s::
--summary::
Show only a summary of scheduling by thread with min, max, and average
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 18dfcfa38454..bcc1ba35a2d8 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,8 @@ OPTIONS
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
- callindent, insn, insnlen, synth, phys_addr.
+ srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
+ brstackoff, callindent, insn, insnlen, synth, phys_addr.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c37d61682dfb..823fce7674bb 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -199,6 +199,13 @@ Aggregate counts per processor socket for system-wide mode measurements.
--per-core::
Aggregate counts per physical processor for system-wide mode measurements.
+-M::
+--metrics::
+Print metrics or metricgroups specified in a comma separated list.
+For a group all metrics from the group are added.
+The events from the metrics are automatically measured.
+See perf list output for the possble metrics and metricgroups.
+
-A::
--no-aggr::
Do not aggregate counts across all monitored CPUs.
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 62072822dc85..627b7cada144 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,34 +1,8 @@
tools/perf
-tools/arch/alpha/include/asm/barrier.h
-tools/arch/arm/include/asm/barrier.h
-tools/arch/arm64/include/asm/barrier.h
-tools/arch/ia64/include/asm/barrier.h
-tools/arch/mips/include/asm/barrier.h
-tools/arch/powerpc/include/asm/barrier.h
-tools/arch/s390/include/asm/barrier.h
-tools/arch/sh/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier_32.h
-tools/arch/sparc/include/asm/barrier_64.h
-tools/arch/tile/include/asm/barrier.h
-tools/arch/x86/include/asm/barrier.h
-tools/arch/x86/include/asm/cmpxchg.h
-tools/arch/x86/include/asm/cpufeatures.h
-tools/arch/x86/include/asm/disabled-features.h
-tools/arch/x86/include/asm/required-features.h
-tools/arch/x86/include/uapi/asm/svm.h
-tools/arch/x86/include/uapi/asm/vmx.h
-tools/arch/x86/include/uapi/asm/kvm.h
-tools/arch/x86/include/uapi/asm/kvm_perf.h
-tools/arch/x86/lib/memcpy_64.S
-tools/arch/x86/lib/memset_64.S
-tools/arch/s390/include/uapi/asm/kvm_perf.h
-tools/arch/s390/include/uapi/asm/sie.h
-tools/arch/xtensa/include/asm/barrier.h
+tools/arch
tools/scripts
tools/build
-tools/arch/x86/include/asm/atomic.h
-tools/arch/x86/include/asm/rmwcc.h
+tools/include
tools/lib/traceevent
tools/lib/api
tools/lib/bpf
@@ -42,60 +16,3 @@ tools/lib/find_bit.c
tools/lib/bitmap.c
tools/lib/str_error_r.c
tools/lib/vsprintf.c
-tools/include/asm/alternative-asm.h
-tools/include/asm/atomic.h
-tools/include/asm/barrier.h
-tools/include/asm/bug.h
-tools/include/asm-generic/atomic-gcc.h
-tools/include/asm-generic/barrier.h
-tools/include/asm-generic/bitops/arch_hweight.h
-tools/include/asm-generic/bitops/atomic.h
-tools/include/asm-generic/bitops/const_hweight.h
-tools/include/asm-generic/bitops/__ffs.h
-tools/include/asm-generic/bitops/__ffz.h
-tools/include/asm-generic/bitops/__fls.h
-tools/include/asm-generic/bitops/find.h
-tools/include/asm-generic/bitops/fls64.h
-tools/include/asm-generic/bitops/fls.h
-tools/include/asm-generic/bitops/hweight.h
-tools/include/asm-generic/bitops.h
-tools/include/linux/atomic.h
-tools/include/linux/bitops.h
-tools/include/linux/compiler.h
-tools/include/linux/compiler-gcc.h
-tools/include/linux/coresight-pmu.h
-tools/include/linux/bug.h
-tools/include/linux/filter.h
-tools/include/linux/hash.h
-tools/include/linux/kernel.h
-tools/include/linux/list.h
-tools/include/linux/log2.h
-tools/include/uapi/asm-generic/fcntl.h
-tools/include/uapi/asm-generic/ioctls.h
-tools/include/uapi/asm-generic/mman-common.h
-tools/include/uapi/asm-generic/mman.h
-tools/include/uapi/drm/drm.h
-tools/include/uapi/drm/i915_drm.h
-tools/include/uapi/linux/bpf.h
-tools/include/uapi/linux/bpf_common.h
-tools/include/uapi/linux/fcntl.h
-tools/include/uapi/linux/hw_breakpoint.h
-tools/include/uapi/linux/kvm.h
-tools/include/uapi/linux/mman.h
-tools/include/uapi/linux/perf_event.h
-tools/include/uapi/linux/sched.h
-tools/include/uapi/linux/stat.h
-tools/include/uapi/linux/vhost.h
-tools/include/uapi/sound/asound.h
-tools/include/linux/poison.h
-tools/include/linux/rbtree.h
-tools/include/linux/rbtree_augmented.h
-tools/include/linux/refcount.h
-tools/include/linux/string.h
-tools/include/linux/stringify.h
-tools/include/linux/types.h
-tools/include/linux/err.h
-tools/include/linux/bitmap.h
-tools/include/linux/time64.h
-tools/arch/*/include/uapi/asm/mman.h
-tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 91ef44bfaf3e..5f7408118a2d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -173,7 +173,7 @@ AWK = awk
# non-config cases
config := 1
-NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
+NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -441,6 +441,13 @@ perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh
$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl)
$(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@
+madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c
+madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
+madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
+
+$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
+ $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
+
all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
@@ -541,6 +548,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(sndrv_ctl_ioctl_array) \
$(kvm_ioctl_array) \
$(vhost_virtio_ioctl_array) \
+ $(madvise_behavior_array) \
$(perf_ioctl_array)
$(OUTPUT)%.o: %.c prepare FORCE
@@ -802,7 +810,10 @@ config-clean:
$(call QUIET_CLEAN, config)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean
+python-clean:
+ $(python-clean)
+
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
@@ -811,6 +822,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
$(OUTPUT)pmu-events/pmu-events.c \
+ $(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(drm_ioctl_array) \
$(OUTPUT)$(pkey_alloc_access_rights_array) \
$(OUTPUT)$(sndrv_ctl_ioctl_array) \
@@ -819,7 +831,6 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array)
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
- $(python-clean)
#
# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 4e0b806a7a0f..01ad4208bcdf 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -8,7 +8,6 @@ struct test;
int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
-int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index cbb7e978166b..8e2c5a38c3b9 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -5,4 +5,3 @@ libperf-y += arch-tests.o
libperf-y += rdpmc.o
libperf-y += perf-time-to-tsc.o
libperf-$(CONFIG_AUXTRACE) += insn-x86.o
-libperf-y += intel-cqm.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 99d66191e56c..271c0a0f95d7 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -24,10 +24,6 @@ struct test arch_tests[] = {
},
#endif
{
- .desc = "Intel cqm nmi context read",
- .func = test__intel_cqm_count_nmi_context,
- },
- {
.func = NULL,
},
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
deleted file mode 100644
index 57f86b6e7d6f..000000000000
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ /dev/null
@@ -1,127 +0,0 @@
-#include "tests/tests.h"
-#include "perf.h"
-#include "cloexec.h"
-#include "debug.h"
-#include "evlist.h"
-#include "evsel.h"
-#include "arch-tests.h"
-
-#include <signal.h>
-#include <sys/mman.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <string.h>
-
-static pid_t spawn(void)
-{
- pid_t pid;
-
- pid = fork();
- if (pid)
- return pid;
-
- while(1)
- sleep(5);
- return 0;
-}
-
-/*
- * Create an event group that contains both a sampled hardware
- * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then
- * wait for the hardware perf counter to overflow and generate a PMI,
- * which triggers an event read for both of the events in the group.
- *
- * Since reading Intel CQM event counters requires sending SMP IPIs, the
- * CQM pmu needs to handle the above situation gracefully, and return
- * the last read counter value to avoid triggering a WARN_ON_ONCE() in
- * smp_call_function_many() caused by sending IPIs from NMI context.
- */
-int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
-{
- struct perf_evlist *evlist = NULL;
- struct perf_evsel *evsel = NULL;
- struct perf_event_attr pe;
- int i, fd[2], flag, ret;
- size_t mmap_len;
- void *event;
- pid_t pid;
- int err = TEST_FAIL;
-
- flag = perf_event_open_cloexec_flag();
-
- evlist = perf_evlist__new();
- if (!evlist) {
- pr_debug("perf_evlist__new failed\n");
- return TEST_FAIL;
- }
-
- ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
- if (ret) {
- pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
- err = TEST_SKIP;
- goto out;
- }
-
- evsel = perf_evlist__first(evlist);
- if (!evsel) {
- pr_debug("perf_evlist__first failed\n");
- goto out;
- }
-
- memset(&pe, 0, sizeof(pe));
- pe.size = sizeof(pe);
-
- pe.type = PERF_TYPE_HARDWARE;
- pe.config = PERF_COUNT_HW_CPU_CYCLES;
- pe.read_format = PERF_FORMAT_GROUP;
-
- pe.sample_period = 128;
- pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ;
-
- pid = spawn();
-
- fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag);
- if (fd[0] < 0) {
- pr_debug("failed to open event\n");
- goto out;
- }
-
- memset(&pe, 0, sizeof(pe));
- pe.size = sizeof(pe);
-
- pe.type = evsel->attr.type;
- pe.config = evsel->attr.config;
-
- fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag);
- if (fd[1] < 0) {
- pr_debug("failed to open event\n");
- goto out;
- }
-
- /*
- * Pick a power-of-two number of pages + 1 for the meta-data
- * page (struct perf_event_mmap_page). See tools/perf/design.txt.
- */
- mmap_len = page_size * 65;
-
- event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0);
- if (event == (void *)(-1)) {
- pr_debug("failed to mmap %d\n", errno);
- goto out;
- }
-
- sleep(1);
-
- err = TEST_OK;
-
- munmap(event, mmap_len);
-
- for (i = 0; i < 2; i++)
- close(fd[i]);
-
- kill(pid, SIGKILL);
- wait(NULL);
-out:
- perf_evlist__delete(evlist);
- return err;
-}
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 475999e48f66..bb1ee22bd221 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2732,6 +2732,7 @@ static int perf_c2c__record(int argc, const char **argv)
if (!perf_mem_events[j].supported) {
pr_err("failed: event '%s' not supported\n",
perf_mem_events[j].name);
+ free(rec_argv);
return -1;
}
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index a1d82e33282c..b89417d9305e 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -34,8 +34,7 @@ static struct option config_options[] = {
OPT_END()
};
-static int set_config(struct perf_config_set *set, const char *file_name,
- const char *var, const char *value)
+static int set_config(struct perf_config_set *set, const char *file_name)
{
struct perf_config_section *section = NULL;
struct perf_config_item *item = NULL;
@@ -49,7 +48,6 @@ static int set_config(struct perf_config_set *set, const char *file_name,
if (!fp)
return -1;
- perf_config_set__collect(set, file_name, var, value);
fprintf(fp, "%s\n", first_line);
/* overwrite configvariables */
@@ -161,6 +159,7 @@ int cmd_config(int argc, const char **argv)
struct perf_config_set *set;
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
const char *config_filename;
+ bool changed = false;
argc = parse_options(argc, argv, config_options, config_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -231,15 +230,26 @@ int cmd_config(int argc, const char **argv)
goto out_err;
}
} else {
- if (set_config(set, config_filename, var, value) < 0) {
- pr_err("Failed to set '%s=%s' on %s\n",
- var, value, config_filename);
+ if (perf_config_set__collect(set, config_filename,
+ var, value) < 0) {
+ pr_err("Failed to add '%s=%s'\n",
+ var, value);
free(arg);
goto out_err;
}
+ changed = true;
}
free(arg);
}
+
+ if (!changed)
+ break;
+
+ if (set_config(set, config_filename) < 0) {
+ pr_err("Failed to set the configs on %s\n",
+ config_filename);
+ goto out_err;
+ }
}
ret = 0;
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index f309c3773522..c747a1af49fe 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -34,7 +34,6 @@
#include <termios.h>
#include <semaphore.h>
#include <signal.h>
-#include <pthread.h>
#include <math.h>
static const char *get_filename_for_perf_kvm(void)
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 4bf2cb4d25aa..b2d2ad3dd478 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -15,6 +15,7 @@
#include "util/cache.h"
#include "util/pmu.h"
#include "util/debug.h"
+#include "util/metricgroup.h"
#include <subcmd/parse-options.h>
static bool desc_flag = true;
@@ -79,6 +80,10 @@ int cmd_list(int argc, const char **argv)
long_desc_flag, details_flag);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
+ else if (strcmp(argv[i], "metric") == 0)
+ metricgroup__print(true, false, NULL, raw_dump);
+ else if (strcmp(argv[i], "metricgroup") == 0)
+ metricgroup__print(false, true, NULL, raw_dump);
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
@@ -96,6 +101,7 @@ int cmd_list(int argc, const char **argv)
s[sep_idx] = '\0';
print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
print_sdt_events(s, s + sep_idx + 1, raw_dump);
+ metricgroup__print(true, true, s, raw_dump);
free(s);
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
@@ -112,6 +118,7 @@ int cmd_list(int argc, const char **argv)
details_flag);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
+ metricgroup__print(true, true, NULL, raw_dump);
free(s);
}
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 0f15634ef82c..6940490bc3f9 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -112,6 +112,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
if (!perf_mem_events[j].supported) {
pr_err("failed: event '%s' not supported\n",
perf_mem_events__name(j));
+ free(rec_argv);
return -1;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 56f8142ff97f..9b379f3a3d99 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1643,6 +1643,9 @@ static struct option __record_options[] = {
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
"sample selected machine registers on interrupt,"
" use -I ? to list register names", parse_regs),
+ OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
+ "sample selected machine registers on interrupt,"
+ " use -I ? to list register names", parse_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts,
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 322b4def8411..b7e8812ee80c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -3363,6 +3363,10 @@ int cmd_sched(int argc, const char **argv)
OPT_STRING(0, "time", &sched.time_str, "str",
"Time span for analysis (start,stop)"),
OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
+ OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+ "analyze events only for given process id(s)"),
+ OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+ "analyze events only for given thread id(s)"),
OPT_PARENT(sched_options)
};
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3d4c3b5e1868..9092de0f7238 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -88,6 +88,7 @@ enum perf_output_field {
PERF_OUTPUT_BRSTACKOFF = 1U << 24,
PERF_OUTPUT_SYNTH = 1U << 25,
PERF_OUTPUT_PHYS_ADDR = 1U << 26,
+ PERF_OUTPUT_UREGS = 1U << 27,
};
struct output_option {
@@ -109,6 +110,7 @@ struct output_option {
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD},
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
+ {.str = "uregs", .field = PERF_OUTPUT_UREGS},
{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
@@ -385,6 +387,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_IREGS))
return -EINVAL;
+ if (PRINT_FIELD(UREGS) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
+ PERF_OUTPUT_UREGS))
+ return -EINVAL;
+
if (PRINT_FIELD(PHYS_ADDR) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
PERF_OUTPUT_PHYS_ADDR))
@@ -509,6 +516,24 @@ static void print_sample_iregs(struct perf_sample *sample,
}
}
+static void print_sample_uregs(struct perf_sample *sample,
+ struct perf_event_attr *attr)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ uint64_t mask = attr->sample_regs_user;
+ unsigned i = 0, r;
+
+ if (!regs || !regs->regs)
+ return;
+
+ printf(" ABI:%" PRIu64 " ", regs->abi);
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+ printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ }
+}
+
static void print_sample_start(struct perf_sample *sample,
struct thread *thread,
struct perf_evsel *evsel)
@@ -1444,6 +1469,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(IREGS))
print_sample_iregs(sample, attr);
+ if (PRINT_FIELD(UREGS))
+ print_sample_uregs(sample, attr);
+
if (PRINT_FIELD(BRSTACK))
print_sample_brstack(sample, thread, attr);
else if (PRINT_FIELD(BRSTACKSYM))
@@ -2739,7 +2767,7 @@ int cmd_script(int argc, const char **argv)
"+field to add and -field to remove."
"Valid types: hw,sw,trace,raw,synth. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,iregs,brstack,brstacksym,flags,"
+ "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags,"
"bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
@@ -2801,6 +2829,8 @@ int cmd_script(int argc, const char **argv)
NULL
};
+ perf_set_singlethreaded();
+
setup_scripting();
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 69523ed55894..dd525417880a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
#include "util/tool.h"
#include "util/group.h"
#include "util/string2.h"
+#include "util/metricgroup.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -133,6 +134,8 @@ static const char *smi_cost_attrs = {
static struct perf_evlist *evsel_list;
+static struct rblist metric_events;
+
static struct target target = {
.uid = UINT_MAX,
};
@@ -192,6 +195,11 @@ static struct perf_stat_config stat_config = {
.scale = true,
};
+static bool is_duration_time(struct perf_evsel *evsel)
+{
+ return !strcmp(evsel->name, "duration_time");
+}
+
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
@@ -407,6 +415,8 @@ static void process_interval(void)
pr_err("failed to write stat round event\n");
}
+ init_stats(&walltime_nsecs_stats);
+ update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
print_counters(&rs, 0, NULL);
}
@@ -582,6 +592,32 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
}
+static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
+{
+ struct perf_evsel *c2, *leader;
+ bool is_open = true;
+
+ leader = evsel->leader;
+ pr_debug("Weak group for %s/%d failed\n",
+ leader->name, leader->nr_members);
+
+ /*
+ * for_each_group_member doesn't work here because it doesn't
+ * include the first entry.
+ */
+ evlist__for_each_entry(evsel_list, c2) {
+ if (c2 == evsel)
+ is_open = false;
+ if (c2->leader == leader) {
+ if (is_open)
+ perf_evsel__close(c2);
+ c2->leader = c2;
+ c2->nr_members = 0;
+ }
+ }
+ return leader;
+}
+
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
@@ -618,6 +654,15 @@ static int __run_perf_stat(int argc, const char **argv)
evlist__for_each_entry(evsel_list, counter) {
try_again:
if (create_perf_stat_counter(counter) < 0) {
+
+ /* Weak group failed. Reset the group. */
+ if ((errno == EINVAL || errno == EBADF) &&
+ counter->leader != counter &&
+ counter->weak_group) {
+ counter = perf_evsel__reset_weak_group(counter);
+ goto try_again;
+ }
+
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
@@ -1199,7 +1244,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
perf_stat__print_shadow_stats(counter, uval,
first_shadow_cpu(counter, id),
- &out);
+ &out, &metric_events);
if (!csv_output && !metric_only) {
print_noise(counter, noise);
print_running(run, ena);
@@ -1325,6 +1370,9 @@ static void print_aggr(char *prefix)
ad.id = id = aggr_map->map[s];
first = true;
evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+
ad.val = ad.ena = ad.run = 0;
ad.nr = 0;
if (!collect_data(counter, aggr_cb, &ad))
@@ -1468,6 +1516,8 @@ static void print_no_aggr_metric(char *prefix)
if (prefix)
fputs(prefix, stat_config.output);
evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
if (first) {
aggr_printout(counter, cpu, 0);
first = false;
@@ -1522,6 +1572,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
/* Print metrics headers only */
evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
os.evsel = counter;
out.ctx = &os;
out.print_metric = print_metric_header;
@@ -1530,7 +1582,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
os.evsel = counter;
perf_stat__print_shadow_stats(counter, 0,
0,
- &out);
+ &out,
+ &metric_events);
}
fputc('\n', stat_config.output);
}
@@ -1668,12 +1721,18 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
print_aggr(prefix);
break;
case AGGR_THREAD:
- evlist__for_each_entry(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
print_aggr_thread(counter, prefix);
+ }
break;
case AGGR_GLOBAL:
- evlist__for_each_entry(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
print_counter_aggr(counter, prefix);
+ }
if (metric_only)
fputc('\n', stat_config.output);
break;
@@ -1681,8 +1740,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
if (metric_only)
print_no_aggr_metric(prefix);
else {
- evlist__for_each_entry(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
print_counter(counter, prefix);
+ }
}
break;
case AGGR_UNSET:
@@ -1754,6 +1816,13 @@ static int enable_metric_only(const struct option *opt __maybe_unused,
return 0;
}
+static int parse_metric_groups(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ return metricgroup__parse_groups(opt, str, &metric_events);
+}
+
static const struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1819,6 +1888,9 @@ static const struct option stat_options[] = {
"measure topdown level 1 statistics"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
"measure SMI cost"),
+ OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
+ "monitor specified metrics or metric groups (separated by ,)",
+ parse_metric_groups),
OPT_END()
};
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 4e2e61695986..01de01ca14f2 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1732,8 +1732,10 @@ static int timechart__io_record(int argc, const char **argv)
if (rec_argv == NULL)
return -ENOMEM;
- if (asprintf(&filter, "common_pid != %d", getpid()) < 0)
+ if (asprintf(&filter, "common_pid != %d", getpid()) < 0) {
+ free(rec_argv);
return -ENOMEM;
+ }
p = rec_argv;
for (i = 0; i < common_args_nr; i++)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 771ddab94bb0..967bd351b58d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2078,6 +2078,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
else {
pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+ free(rec_argv);
return -1;
}
}
@@ -2730,20 +2731,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
size_t printed = trace__fprintf_threads_header(fp);
struct rb_node *nd;
+ int i;
- if (threads == NULL) {
- fprintf(fp, "%s", "Error sorting output by nr_events!\n");
- return 0;
- }
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
- resort_rb__for_each_entry(nd, threads)
- printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+ if (threads == NULL) {
+ fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+ return 0;
+ }
- resort_rb__delete(threads);
+ resort_rb__for_each_entry(nd, threads)
+ printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+ resort_rb__delete(threads);
+ }
return printed;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index dc442ba21bf6..fbb0a9cd0ac6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -65,6 +65,7 @@ struct record_opts {
unsigned int user_freq;
u64 branch_stack;
u64 sample_intr_regs;
+ u64 sample_user_regs;
u64 default_interval;
u64 user_interval;
size_t auxtrace_snapshot_size;
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
new file mode 100644
index 000000000000..49c5f123d811
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
new file mode 100644
index 000000000000..49c5f123d811
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
new file mode 100644
index 000000000000..4248b029d199
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
new file mode 100644
index 000000000000..03d894988f0f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -0,0 +1,158 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else (UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
new file mode 100644
index 000000000000..0bae5eda9fb3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -0,0 +1,158 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
new file mode 100644
index 000000000000..057a832f1a6a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
new file mode 100644
index 000000000000..057a832f1a6a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
new file mode 100644
index 000000000000..b35b1c153c8a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -0,0 +1,140 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
new file mode 100644
index 000000000000..b35b1c153c8a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -0,0 +1,140 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
new file mode 100644
index 000000000000..411f9411ec9e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles )",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
new file mode 100644
index 000000000000..68f12a26c816
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -0,0 +1,182 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-core)",
+ "MetricExpr": "4*cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "( RS_EVENTS.EMPTY_CYCLES - (ICACHE_16B.IFDATA_STALL +2* ICACHE_16B.IFDATA_STALL:c1:e1) - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if 1 else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) )",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "L1 cache miss per kilo instruction for demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS_PS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses;",
+ "MetricName": "L1MPKI"
+ },
+ {
+ "BriefDescription": "L2 cache miss per kilo instruction for demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS_PS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses;",
+ "MetricName": "L2MPKI"
+ },
+ {
+ "BriefDescription": "L3 cache miss per kilo instruction for demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS_PS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses;",
+ "MetricName": "L3MPKI"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16* FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index d51dc9ca8861..9eb7047bafe4 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -292,7 +292,7 @@ static int print_events_table_entry(void *data, char *name, char *event,
char *desc, char *long_desc,
char *pmu, char *unit, char *perpkg,
char *metric_expr,
- char *metric_name)
+ char *metric_name, char *metric_group)
{
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -304,8 +304,10 @@ static int print_events_table_entry(void *data, char *name, char *event,
*/
fprintf(outfp, "{\n");
- fprintf(outfp, "\t.name = \"%s\",\n", name);
- fprintf(outfp, "\t.event = \"%s\",\n", event);
+ if (name)
+ fprintf(outfp, "\t.name = \"%s\",\n", name);
+ if (event)
+ fprintf(outfp, "\t.event = \"%s\",\n", event);
fprintf(outfp, "\t.desc = \"%s\",\n", desc);
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
if (long_desc && long_desc[0])
@@ -320,6 +322,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
if (metric_name)
fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
+ if (metric_group)
+ fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
fprintf(outfp, "},\n");
return 0;
@@ -357,6 +361,9 @@ static char *real_event(const char *name, char *event)
{
int i;
+ if (!name)
+ return NULL;
+
for (i = 0; fixed[i].name; i++)
if (!strcasecmp(name, fixed[i].name))
return (char *)fixed[i].event;
@@ -369,7 +376,7 @@ int json_events(const char *fn,
char *long_desc,
char *pmu, char *unit, char *perpkg,
char *metric_expr,
- char *metric_name),
+ char *metric_name, char *metric_group),
void *data)
{
int err = -EIO;
@@ -397,6 +404,7 @@ int json_events(const char *fn,
char *unit = NULL;
char *metric_expr = NULL;
char *metric_name = NULL;
+ char *metric_group = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
jsmntok_t *msrval = NULL;
@@ -476,6 +484,8 @@ int json_events(const char *fn,
addfield(map, &perpkg, "", "", val);
} else if (json_streq(map, field, "MetricName")) {
addfield(map, &metric_name, "", "", val);
+ } else if (json_streq(map, field, "MetricGroup")) {
+ addfield(map, &metric_group, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
addfield(map, &metric_expr, "", "", val);
for (s = metric_expr; *s; s++)
@@ -501,10 +511,11 @@ int json_events(const char *fn,
addfield(map, &event, ",", filter, NULL);
if (msr != NULL)
addfield(map, &event, ",", msr->pname, msrval);
- fixname(name);
+ if (name)
+ fixname(name);
err = func(data, name, real_event(name, event), desc, long_desc,
- pmu, unit, perpkg, metric_expr, metric_name);
+ pmu, unit, perpkg, metric_expr, metric_name, metric_group);
free(event);
free(desc);
free(name);
@@ -516,6 +527,7 @@ int json_events(const char *fn,
free(unit);
free(metric_expr);
free(metric_name);
+ free(metric_group);
if (err)
break;
tok += j;
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
index 611fac01913d..557994754410 100644
--- a/tools/perf/pmu-events/jevents.h
+++ b/tools/perf/pmu-events/jevents.h
@@ -6,7 +6,7 @@ int json_events(const char *fn,
char *long_desc,
char *pmu,
char *unit, char *perpkg, char *metric_expr,
- char *metric_name),
+ char *metric_name, char *metric_group),
void *data);
char *get_cpu_str(void);
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 569eab3688dd..94fa1720f6fd 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -15,6 +15,7 @@ struct pmu_event {
const char *perpkg;
const char *metric_expr;
const char *metric_name;
+ const char *metric_group;
};
/*
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 377bea009163..d0fee35db0e7 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -3,6 +3,7 @@
*
* Builtin regression testing command: ever growing number of sanity tests
*/
+#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
new file mode 100755
index 000000000000..60ef8640ee70
--- /dev/null
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+header_dir=$1
+
+printf "static const char *madvise_advices[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/mman-common.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 754558f9009d..45b1e0c0018f 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -94,35 +94,21 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
+static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size)
+{
+#include "trace/beauty/generated/madvise_behavior_array.c"
+ static DEFINE_STRARRAY(madvise_advices);
+
+ if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL)
+ return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]);
+
+ return scnprintf(bf, size, "%#", behavior);
+}
+
static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
struct syscall_arg *arg)
{
- int behavior = arg->val;
-
- switch (behavior) {
-#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
- P_MADV_BHV(NORMAL);
- P_MADV_BHV(RANDOM);
- P_MADV_BHV(SEQUENTIAL);
- P_MADV_BHV(WILLNEED);
- P_MADV_BHV(DONTNEED);
- P_MADV_BHV(FREE);
- P_MADV_BHV(REMOVE);
- P_MADV_BHV(DONTFORK);
- P_MADV_BHV(DOFORK);
- P_MADV_BHV(HWPOISON);
- P_MADV_BHV(SOFT_OFFLINE);
- P_MADV_BHV(MERGEABLE);
- P_MADV_BHV(UNMERGEABLE);
- P_MADV_BHV(HUGEPAGE);
- P_MADV_BHV(NOHUGEPAGE);
- P_MADV_BHV(DONTDUMP);
- P_MADV_BHV(DODUMP);
-#undef P_MADV_BHV
- default: break;
- }
-
- return scnprintf(bf, size, "%#x", behavior);
+ return madvise__scnprintf_behavior(arg->val, bf, size);
}
#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index ae91c8148edf..7ade387d511c 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -27,13 +27,17 @@ void ui_progress__update(struct ui_progress *p, u64 adv)
}
}
-void ui_progress__init(struct ui_progress *p, u64 total, const char *title)
+void __ui_progress__init(struct ui_progress *p, u64 total,
+ const char *title, bool size)
{
p->curr = 0;
p->next = p->step = total / 16 ?: 1;
p->total = total;
p->title = title;
+ p->size = size;
+ if (ui_progress__ops->init)
+ ui_progress__ops->init(p);
}
void ui_progress__finish(void)
diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
index 717d39d3052b..fbaa1507ebfe 100644
--- a/tools/perf/ui/progress.h
+++ b/tools/perf/ui/progress.h
@@ -8,12 +8,22 @@ void ui_progress__finish(void);
struct ui_progress {
const char *title;
u64 curr, next, step, total;
+ bool size;
};
-void ui_progress__init(struct ui_progress *p, u64 total, const char *title);
+void __ui_progress__init(struct ui_progress *p, u64 total,
+ const char *title, bool size);
+
+#define ui_progress__init(p, total, title) \
+ __ui_progress__init(p, total, title, false)
+
+#define ui_progress__init_size(p, total, title) \
+ __ui_progress__init(p, total, title, true)
+
void ui_progress__update(struct ui_progress *p, u64 adv);
struct ui_progress_ops {
+ void (*init)(struct ui_progress *p);
void (*update)(struct ui_progress *p);
void (*finish)(void);
};
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
index c4b99008e2c9..68f6144ea603 100644
--- a/tools/perf/ui/tui/progress.c
+++ b/tools/perf/ui/tui/progress.c
@@ -1,12 +1,33 @@
+#include <linux/kernel.h>
#include "../cache.h"
#include "../progress.h"
#include "../libslang.h"
#include "../ui.h"
#include "tui.h"
+#include "units.h"
#include "../browser.h"
+static void __tui_progress__init(struct ui_progress *p)
+{
+ p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1;
+}
+
+static int get_title(struct ui_progress *p, char *buf, size_t size)
+{
+ char buf_cur[20];
+ char buf_tot[20];
+ int ret;
+
+ ret = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr);
+ ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total);
+
+ return ret + scnprintf(buf, size, "%s [%s/%s]",
+ p->title, buf_cur, buf_tot);
+}
+
static void tui_progress__update(struct ui_progress *p)
{
+ char buf[100], *title = (char *) p->title;
int bar, y;
/*
* FIXME: We should have a per UI backend way of showing progress,
@@ -18,13 +39,18 @@ static void tui_progress__update(struct ui_progress *p)
if (p->total == 0)
return;
+ if (p->size) {
+ get_title(p, buf, sizeof(buf));
+ title = buf;
+ }
+
ui__refresh_dimensions(false);
pthread_mutex_lock(&ui__lock);
y = SLtt_Screen_Rows / 2 - 2;
SLsmg_set_color(0);
SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
SLsmg_gotorc(y++, 1);
- SLsmg_write_string((char *)p->title);
+ SLsmg_write_string(title);
SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' ');
SLsmg_set_color(HE_COLORSET_SELECTED);
bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total;
@@ -49,8 +75,8 @@ static void tui_progress__finish(void)
pthread_mutex_unlock(&ui__lock);
}
-static struct ui_progress_ops tui_progress__ops =
-{
+static struct ui_progress_ops tui_progress__ops = {
+ .init = __tui_progress__init,
.update = tui_progress__update,
.finish = tui_progress__finish,
};
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 94518c1bf8b6..369c3163e68c 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -34,6 +34,7 @@ libperf-y += dso.o
libperf-y += symbol.o
libperf-y += symbol_fprintf.o
libperf-y += color.o
+libperf-y += metricgroup.o
libperf-y += header.o
libperf-y += callchain.o
libperf-y += values.o
@@ -78,6 +79,7 @@ libperf-y += data.o
libperf-y += tsc.o
libperf-y += cloexec.o
libperf-y += call-path.o
+libperf-y += rwsem.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index bc75596f9e79..d2b6983b1779 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -700,10 +700,7 @@ struct perf_config_set *perf_config_set__new(void)
if (set) {
INIT_LIST_HEAD(&set->sections);
- if (perf_config_set__init(set) < 0) {
- perf_config_set__delete(set);
- set = NULL;
- }
+ perf_config_set__init(set);
}
return set;
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 263f5a906ba5..1123b30e3033 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -3,6 +3,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
+#include <fcntl.h>
#include <unistd.h>
#include <string.h>
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index b9e087fb8247..339e52971380 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -6,6 +6,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
+#include <fcntl.h>
#include "compress.h"
#include "path.h"
#include "symbol.h"
@@ -1365,9 +1366,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso)
void dsos__add(struct dsos *dsos, struct dso *dso)
{
- pthread_rwlock_wrlock(&dsos->lock);
+ down_write(&dsos->lock);
__dsos__add(dsos, dso);
- pthread_rwlock_unlock(&dsos->lock);
+ up_write(&dsos->lock);
}
struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
@@ -1386,9 +1387,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
{
struct dso *dso;
- pthread_rwlock_rdlock(&dsos->lock);
+ down_read(&dsos->lock);
dso = __dsos__find(dsos, name, cmp_short);
- pthread_rwlock_unlock(&dsos->lock);
+ up_read(&dsos->lock);
return dso;
}
@@ -1415,9 +1416,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
struct dso *dsos__findnew(struct dsos *dsos, const char *name)
{
struct dso *dso;
- pthread_rwlock_wrlock(&dsos->lock);
+ down_write(&dsos->lock);
dso = dso__get(__dsos__findnew(dsos, name));
- pthread_rwlock_unlock(&dsos->lock);
+ up_write(&dsos->lock);
return dso;
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index f886141678eb..a2bbb21f301c 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -6,7 +6,7 @@
#include <linux/rbtree.h>
#include <sys/types.h>
#include <stdbool.h>
-#include <pthread.h>
+#include "rwsem.h"
#include <linux/types.h>
#include <linux/bitops.h>
#include "map.h"
@@ -129,7 +129,7 @@ struct dso_cache {
struct dsos {
struct list_head head;
struct rb_root root; /* rbtree root sorted by long name */
- pthread_rwlock_t lock;
+ struct rw_semaphore lock;
};
struct auxtrace_cache;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 1c905ba3641b..10366b87d0b5 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,5 +1,6 @@
#include <dirent.h>
#include <errno.h>
+#include <fcntl.h>
#include <inttypes.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -683,12 +684,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
bool mmap_data,
unsigned int proc_map_timeout)
{
- DIR *proc;
- char proc_path[PATH_MAX];
- struct dirent *dirent;
union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event;
+ char proc_path[PATH_MAX];
+ struct dirent **dirent;
int err = -1;
+ char *end;
+ pid_t pid;
+ int n, i;
if (machine__is_default_guest(machine))
return 0;
@@ -712,29 +715,32 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
goto out_free_fork;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
- proc = opendir(proc_path);
+ n = scandir(proc_path, &dirent, 0, alphasort);
- if (proc == NULL)
+ if (n < 0)
goto out_free_namespaces;
- while ((dirent = readdir(proc)) != NULL) {
- char *end;
- pid_t pid = strtol(dirent->d_name, &end, 10);
-
- if (*end) /* only interested in proper numerical dirents */
+ for (i = 0; i < n; i++) {
+ if (!isdigit(dirent[i]->d_name[0]))
continue;
- /*
- * We may race with exiting thread, so don't stop just because
- * one thread couldn't be synthesized.
- */
- __event__synthesize_thread(comm_event, mmap_event, fork_event,
- namespaces_event, pid, 1, process,
- tool, machine, mmap_data,
- proc_map_timeout);
- }
+ pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+ /* only interested in proper numerical dirents */
+ if (!*end) {
+ /*
+ * We may race with exiting thread, so don't stop just because
+ * one thread couldn't be synthesized.
+ */
+ __event__synthesize_thread(comm_event, mmap_event, fork_event,
+ namespaces_event, pid, 1, process,
+ tool, machine, mmap_data,
+ proc_map_timeout);
+ }
+ free(dirent[i]);
+ }
+ free(dirent);
err = 0;
- closedir(proc);
+
out_free_namespaces:
free(namespaces_event);
out_free_fork:
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index bf2c4936e35f..b1c14f1fdc27 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -6,6 +6,7 @@
#include <linux/refcount.h>
#include <linux/list.h>
#include <api/fd/array.h>
+#include <fcntl.h>
#include <stdio.h>
#include "../perf.h"
#include "event.h"
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4bb89373eb52..7389746c0dc4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -678,7 +678,7 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
- attr->sample_regs_user = PERF_REGS_MASK;
+ attr->sample_regs_user |= PERF_REGS_MASK;
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
@@ -931,6 +931,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
perf_evsel__set_sample_bit(evsel, REGS_INTR);
}
+ if (opts->sample_user_regs) {
+ attr->sample_regs_user |= opts->sample_user_regs;
+ perf_evsel__set_sample_bit(evsel, REGS_USER);
+ }
+
if (target__has_cpu(&opts->target) || opts->sample_cpu)
perf_evsel__set_sample_bit(evsel, CPU);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index dd2c4b5112a5..db658785d828 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -137,6 +137,7 @@ struct perf_evsel {
const char * metric_name;
struct perf_evsel **metric_events;
bool collect_stat;
+ bool weak_group;
};
union u64_swap {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index df709363ef69..585b4a3d64a4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -30,7 +30,21 @@ static void dsos__init(struct dsos *dsos)
{
INIT_LIST_HEAD(&dsos->head);
dsos->root = RB_ROOT;
- pthread_rwlock_init(&dsos->lock, NULL);
+ init_rwsem(&dsos->lock);
+}
+
+static void machine__threads_init(struct machine *machine)
+{
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ threads->entries = RB_ROOT;
+ init_rwsem(&threads->lock);
+ threads->nr = 0;
+ INIT_LIST_HEAD(&threads->dead);
+ threads->last_match = NULL;
+ }
}
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
@@ -40,11 +54,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
- machine->threads = RB_ROOT;
- pthread_rwlock_init(&machine->threads_lock, NULL);
- machine->nr_threads = 0;
- INIT_LIST_HEAD(&machine->dead_threads);
- machine->last_match = NULL;
+ machine__threads_init(machine);
machine->vdso_info = NULL;
machine->env = NULL;
@@ -120,7 +130,7 @@ static void dsos__purge(struct dsos *dsos)
{
struct dso *pos, *n;
- pthread_rwlock_wrlock(&dsos->lock);
+ down_write(&dsos->lock);
list_for_each_entry_safe(pos, n, &dsos->head, node) {
RB_CLEAR_NODE(&pos->rb_node);
@@ -129,39 +139,49 @@ static void dsos__purge(struct dsos *dsos)
dso__put(pos);
}
- pthread_rwlock_unlock(&dsos->lock);
+ up_write(&dsos->lock);
}
static void dsos__exit(struct dsos *dsos)
{
dsos__purge(dsos);
- pthread_rwlock_destroy(&dsos->lock);
+ exit_rwsem(&dsos->lock);
}
void machine__delete_threads(struct machine *machine)
{
struct rb_node *nd;
+ int i;
- pthread_rwlock_wrlock(&machine->threads_lock);
- nd = rb_first(&machine->threads);
- while (nd) {
- struct thread *t = rb_entry(nd, struct thread, rb_node);
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ down_write(&threads->lock);
+ nd = rb_first(&threads->entries);
+ while (nd) {
+ struct thread *t = rb_entry(nd, struct thread, rb_node);
- nd = rb_next(nd);
- __machine__remove_thread(machine, t, false);
+ nd = rb_next(nd);
+ __machine__remove_thread(machine, t, false);
+ }
+ up_write(&threads->lock);
}
- pthread_rwlock_unlock(&machine->threads_lock);
}
void machine__exit(struct machine *machine)
{
+ int i;
+
machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->current_tid);
- pthread_rwlock_destroy(&machine->threads_lock);
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ exit_rwsem(&threads->lock);
+ }
}
void machine__delete(struct machine *machine)
@@ -379,10 +399,11 @@ out_err:
* lookup/new thread inserted.
*/
static struct thread *____machine__findnew_thread(struct machine *machine,
+ struct threads *threads,
pid_t pid, pid_t tid,
bool create)
{
- struct rb_node **p = &machine->threads.rb_node;
+ struct rb_node **p = &threads->entries.rb_node;
struct rb_node *parent = NULL;
struct thread *th;
@@ -391,14 +412,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* so most of the time we dont have to look up
* the full rbtree:
*/
- th = machine->last_match;
+ th = threads->last_match;
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
- machine->last_match = NULL;
+ threads->last_match = NULL;
}
while (*p != NULL) {
@@ -406,7 +427,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = rb_entry(parent, struct thread, rb_node);
if (th->tid == tid) {
- machine->last_match = th;
+ threads->last_match = th;
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
@@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
- rb_insert_color(&th->rb_node, &machine->threads);
+ rb_insert_color(&th->rb_node, &threads->entries);
/*
* We have to initialize map_groups separately
@@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase_init(&th->rb_node, &machine->threads);
+ rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
@@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* It is now in the rbtree, get a ref
*/
thread__get(th);
- machine->last_match = th;
- ++machine->nr_threads;
+ threads->last_match = th;
+ ++threads->nr;
}
return th;
@@ -452,27 +473,30 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
{
- return ____machine__findnew_thread(machine, pid, tid, true);
+ return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
}
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
+ struct threads *threads = machine__threads(machine, tid);
struct thread *th;
- pthread_rwlock_wrlock(&machine->threads_lock);
+ down_write(&threads->lock);
th = __machine__findnew_thread(machine, pid, tid);
- pthread_rwlock_unlock(&machine->threads_lock);
+ up_write(&threads->lock);
return th;
}
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
+ struct threads *threads = machine__threads(machine, tid);
struct thread *th;
- pthread_rwlock_rdlock(&machine->threads_lock);
- th = ____machine__findnew_thread(machine, pid, tid, false);
- pthread_rwlock_unlock(&machine->threads_lock);
+
+ down_read(&threads->lock);
+ th = ____machine__findnew_thread(machine, threads, pid, tid, false);
+ up_read(&threads->lock);
return th;
}
@@ -564,7 +588,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
{
struct dso *dso;
- pthread_rwlock_wrlock(&machine->dsos.lock);
+ down_write(&machine->dsos.lock);
dso = __dsos__find(&machine->dsos, m->name, true);
if (!dso) {
@@ -578,7 +602,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__get(dso);
out_unlock:
- pthread_rwlock_unlock(&machine->dsos.lock);
+ up_write(&machine->dsos.lock);
return dso;
}
@@ -719,21 +743,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
size_t machine__fprintf(struct machine *machine, FILE *fp)
{
- size_t ret;
struct rb_node *nd;
+ size_t ret;
+ int i;
- pthread_rwlock_rdlock(&machine->threads_lock);
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
- ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+ down_read(&threads->lock);
- for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
- struct thread *pos = rb_entry(nd, struct thread, rb_node);
+ ret = fprintf(fp, "Threads: %u\n", threads->nr);
- ret += thread__fprintf(pos, fp);
- }
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ struct thread *pos = rb_entry(nd, struct thread, rb_node);
- pthread_rwlock_unlock(&machine->threads_lock);
+ ret += thread__fprintf(pos, fp);
+ }
+ up_read(&threads->lock);
+ }
return ret;
}
@@ -1292,7 +1320,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
struct dso *kernel = NULL;
struct dso *dso;
- pthread_rwlock_rdlock(&machine->dsos.lock);
+ down_read(&machine->dsos.lock);
list_for_each_entry(dso, &machine->dsos.head, node) {
@@ -1322,7 +1350,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
break;
}
- pthread_rwlock_unlock(&machine->dsos.lock);
+ up_read(&machine->dsos.lock);
if (kernel == NULL)
kernel = machine__findnew_dso(machine, kmmap_prefix);
@@ -1479,23 +1507,25 @@ out_problem:
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
{
- if (machine->last_match == th)
- machine->last_match = NULL;
+ struct threads *threads = machine__threads(machine, th->tid);
+
+ if (threads->last_match == th)
+ threads->last_match = NULL;
BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
- pthread_rwlock_wrlock(&machine->threads_lock);
- rb_erase_init(&th->rb_node, &machine->threads);
+ down_write(&threads->lock);
+ rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
- --machine->nr_threads;
+ --threads->nr;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
- list_add_tail(&th->node, &machine->dead_threads);
+ list_add_tail(&th->node, &threads->dead);
if (lock)
- pthread_rwlock_unlock(&machine->threads_lock);
+ up_write(&threads->lock);
thread__put(th);
}
@@ -2140,21 +2170,26 @@ int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv)
{
+ struct threads *threads;
struct rb_node *nd;
struct thread *thread;
int rc = 0;
+ int i;
- for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
- thread = rb_entry(nd, struct thread, rb_node);
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
- }
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ threads = &machine->threads[i];
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ thread = rb_entry(nd, struct thread, rb_node);
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
- list_for_each_entry(thread, &machine->dead_threads, node) {
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
+ list_for_each_entry(thread, &threads->dead, node) {
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
}
return rc;
}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 3cdb1340f917..b1cd516f2025 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -6,6 +6,7 @@
#include "map.h"
#include "dso.h"
#include "event.h"
+#include "rwsem.h"
struct addr_location;
struct branch_stack;
@@ -23,6 +24,17 @@ extern const char *ref_reloc_sym_names[];
struct vdso_info;
+#define THREADS__TABLE_BITS 8
+#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
+
+struct threads {
+ struct rb_root entries;
+ struct rw_semaphore lock;
+ unsigned int nr;
+ struct list_head dead;
+ struct thread *last_match;
+};
+
struct machine {
struct rb_node rb_node;
pid_t pid;
@@ -30,11 +42,7 @@ struct machine {
bool comm_exec;
bool kptr_restrict_warned;
char *root_dir;
- struct rb_root threads;
- pthread_rwlock_t threads_lock;
- unsigned int nr_threads;
- struct list_head dead_threads;
- struct thread *last_match;
+ struct threads threads[THREADS__TABLE_SIZE];
struct vdso_info *vdso_info;
struct perf_env *env;
struct dsos dsos;
@@ -48,6 +56,12 @@ struct machine {
};
};
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+ /* Cast it to handle tid == -1 */
+ return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
static inline
struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
{
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index bdaa0a4edc17..5792d7a78152 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -488,7 +488,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
static void maps__init(struct maps *maps)
{
maps->entries = RB_ROOT;
- pthread_rwlock_init(&maps->lock, NULL);
+ init_rwsem(&maps->lock);
}
void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -517,9 +517,9 @@ static void __maps__purge(struct maps *maps)
static void maps__exit(struct maps *maps)
{
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
__maps__purge(maps);
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
void map_groups__exit(struct map_groups *mg)
@@ -586,7 +586,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
struct symbol *sym;
struct rb_node *nd;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -602,7 +602,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
sym = NULL;
out:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return sym;
}
@@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
size_t printed = 0;
struct rb_node *nd;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -650,7 +650,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
}
}
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return printed;
}
@@ -682,7 +682,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
struct rb_node *next;
int err = 0;
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
root = &maps->entries;
next = rb_first(root);
@@ -750,7 +750,7 @@ put_map:
err = 0;
out:
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
return err;
}
@@ -771,7 +771,7 @@ int map_groups__clone(struct thread *thread,
struct map *map;
struct maps *maps = &parent->maps[type];
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
struct map *new = map__clone(map);
@@ -788,7 +788,7 @@ int map_groups__clone(struct thread *thread,
err = 0;
out_unlock:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return err;
}
@@ -815,9 +815,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
void maps__insert(struct maps *maps, struct map *map)
{
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
__maps__insert(maps, map);
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
static void __maps__remove(struct maps *maps, struct map *map)
@@ -828,9 +828,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
void maps__remove(struct maps *maps, struct map *map)
{
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
__maps__remove(maps, map);
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
struct map *maps__find(struct maps *maps, u64 ip)
@@ -838,7 +838,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
struct rb_node **p, *parent = NULL;
struct map *m;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
p = &maps->entries.rb_node;
while (*p != NULL) {
@@ -854,7 +854,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
m = NULL;
out:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return m;
}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 73aacf7a7dc4..d5d7442dac7a 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -9,6 +9,7 @@
#include <stdio.h>
#include <stdbool.h>
#include <linux/types.h>
+#include "rwsem.h"
enum map_type {
MAP__FUNCTION = 0,
@@ -61,7 +62,7 @@ struct kmap {
struct maps {
struct rb_root entries;
- pthread_rwlock_t lock;
+ struct rw_semaphore lock;
};
struct map_groups {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
new file mode 100644
index 000000000000..0ddd9c199227
--- /dev/null
+++ b/tools/perf/util/metricgroup.c
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+/* Manage metrics and groups of metrics from JSON files */
+
+#include "metricgroup.h"
+#include "evlist.h"
+#include "strbuf.h"
+#include "pmu.h"
+#include "expr.h"
+#include "rblist.h"
+#include "pmu.h"
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include "pmu-events/pmu-events.h"
+#include "strbuf.h"
+#include "strlist.h"
+#include <assert.h>
+#include <ctype.h>
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+ struct perf_evsel *evsel,
+ bool create)
+{
+ struct rb_node *nd;
+ struct metric_event me = {
+ .evsel = evsel
+ };
+ nd = rblist__find(metric_events, &me);
+ if (nd)
+ return container_of(nd, struct metric_event, nd);
+ if (create) {
+ rblist__add_node(metric_events, &me);
+ nd = rblist__find(metric_events, &me);
+ if (nd)
+ return container_of(nd, struct metric_event, nd);
+ }
+ return NULL;
+}
+
+static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct metric_event *a = container_of(rb_node,
+ struct metric_event,
+ nd);
+ const struct metric_event *b = entry;
+
+ if (a->evsel == b->evsel)
+ return 0;
+ if ((char *)a->evsel < (char *)b->evsel)
+ return -1;
+ return +1;
+}
+
+static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
+ const void *entry)
+{
+ struct metric_event *me = malloc(sizeof(struct metric_event));
+
+ if (!me)
+ return NULL;
+ memcpy(me, entry, sizeof(struct metric_event));
+ me->evsel = ((struct metric_event *)entry)->evsel;
+ INIT_LIST_HEAD(&me->head);
+ return &me->nd;
+}
+
+static void metricgroup__rblist_init(struct rblist *metric_events)
+{
+ rblist__init(metric_events);
+ metric_events->node_cmp = metric_event_cmp;
+ metric_events->node_new = metric_event_new;
+}
+
+struct egroup {
+ struct list_head nd;
+ int idnum;
+ const char **ids;
+ const char *metric_name;
+ const char *metric_expr;
+};
+
+static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist,
+ const char **ids,
+ int idnum,
+ struct perf_evsel **metric_events)
+{
+ struct perf_evsel *ev, *start = NULL;
+ int ind = 0;
+
+ evlist__for_each_entry (perf_evlist, ev) {
+ if (!strcmp(ev->name, ids[ind])) {
+ metric_events[ind] = ev;
+ if (ind == 0)
+ start = ev;
+ if (++ind == idnum) {
+ metric_events[ind] = NULL;
+ return start;
+ }
+ } else {
+ ind = 0;
+ start = NULL;
+ }
+ }
+ /*
+ * This can happen when an alias expands to multiple
+ * events, like for uncore events.
+ * We don't support this case for now.
+ */
+ return NULL;
+}
+
+static int metricgroup__setup_events(struct list_head *groups,
+ struct perf_evlist *perf_evlist,
+ struct rblist *metric_events_list)
+{
+ struct metric_event *me;
+ struct metric_expr *expr;
+ int i = 0;
+ int ret = 0;
+ struct egroup *eg;
+ struct perf_evsel *evsel;
+
+ list_for_each_entry (eg, groups, nd) {
+ struct perf_evsel **metric_events;
+
+ metric_events = calloc(sizeof(void *), eg->idnum + 1);
+ if (!metric_events) {
+ ret = -ENOMEM;
+ break;
+ }
+ evsel = find_evsel(perf_evlist, eg->ids, eg->idnum,
+ metric_events);
+ if (!evsel) {
+ pr_debug("Cannot resolve %s: %s\n",
+ eg->metric_name, eg->metric_expr);
+ continue;
+ }
+ for (i = 0; i < eg->idnum; i++)
+ metric_events[i]->collect_stat = true;
+ me = metricgroup__lookup(metric_events_list, evsel, true);
+ if (!me) {
+ ret = -ENOMEM;
+ break;
+ }
+ expr = malloc(sizeof(struct metric_expr));
+ if (!expr) {
+ ret = -ENOMEM;
+ break;
+ }
+ expr->metric_expr = eg->metric_expr;
+ expr->metric_name = eg->metric_name;
+ expr->metric_events = metric_events;
+ list_add(&expr->nd, &me->head);
+ }
+ return ret;
+}
+
+static bool match_metric(const char *n, const char *list)
+{
+ int len;
+ char *m;
+
+ if (!list)
+ return false;
+ if (!strcmp(list, "all"))
+ return true;
+ if (!n)
+ return !strcasecmp(list, "No_group");
+ len = strlen(list);
+ m = strcasestr(n, list);
+ if (!m)
+ return false;
+ if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
+ (m[len] == 0 || m[len] == ';'))
+ return true;
+ return false;
+}
+
+struct mep {
+ struct rb_node nd;
+ const char *name;
+ struct strlist *metrics;
+};
+
+static int mep_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct mep *a = container_of(rb_node, struct mep, nd);
+ struct mep *b = (struct mep *)entry;
+
+ return strcmp(a->name, b->name);
+}
+
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
+ const void *entry)
+{
+ struct mep *me = malloc(sizeof(struct mep));
+
+ if (!me)
+ return NULL;
+ memcpy(me, entry, sizeof(struct mep));
+ me->name = strdup(me->name);
+ if (!me->name)
+ goto out_me;
+ me->metrics = strlist__new(NULL, NULL);
+ if (!me->metrics)
+ goto out_name;
+ return &me->nd;
+out_name:
+ free((char *)me->name);
+out_me:
+ free(me);
+ return NULL;
+}
+
+static struct mep *mep_lookup(struct rblist *groups, const char *name)
+{
+ struct rb_node *nd;
+ struct mep me = {
+ .name = name
+ };
+ nd = rblist__find(groups, &me);
+ if (nd)
+ return container_of(nd, struct mep, nd);
+ rblist__add_node(groups, &me);
+ nd = rblist__find(groups, &me);
+ if (nd)
+ return container_of(nd, struct mep, nd);
+ return NULL;
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+ struct rb_node *nd)
+{
+ struct mep *me = container_of(nd, struct mep, nd);
+
+ strlist__delete(me->metrics);
+ free((void *)me->name);
+ free(me);
+}
+
+static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
+{
+ struct str_node *sn;
+ int n = 0;
+
+ strlist__for_each_entry (sn, metrics) {
+ if (raw)
+ printf("%s%s", n > 0 ? " " : "", sn->s);
+ else
+ printf(" %s\n", sn->s);
+ n++;
+ }
+ if (raw)
+ putchar('\n');
+}
+
+void metricgroup__print(bool metrics, bool metricgroups, char *filter,
+ bool raw)
+{
+ struct pmu_events_map *map = perf_pmu__find_map();
+ struct pmu_event *pe;
+ int i;
+ struct rblist groups;
+ struct rb_node *node, *next;
+ struct strlist *metriclist = NULL;
+
+ if (!map)
+ return;
+
+ if (!metricgroups) {
+ metriclist = strlist__new(NULL, NULL);
+ if (!metriclist)
+ return;
+ }
+
+ rblist__init(&groups);
+ groups.node_new = mep_new;
+ groups.node_cmp = mep_cmp;
+ groups.node_delete = mep_delete;
+ for (i = 0; ; i++) {
+ const char *g;
+ pe = &map->table[i];
+
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ g = pe->metric_group;
+ if (!g && pe->metric_name) {
+ if (pe->name)
+ continue;
+ g = "No_group";
+ }
+ if (g) {
+ char *omg;
+ char *mg = strdup(g);
+
+ if (!mg)
+ return;
+ omg = mg;
+ while ((g = strsep(&mg, ";")) != NULL) {
+ struct mep *me;
+ char *s;
+
+ if (*g == 0)
+ g = "No_group";
+ while (isspace(*g))
+ g++;
+ if (filter && !strstr(g, filter))
+ continue;
+ if (raw)
+ s = (char *)pe->metric_name;
+ else {
+ if (asprintf(&s, "%s\n\t[%s]",
+ pe->metric_name, pe->desc) < 0)
+ return;
+ }
+
+ if (!s)
+ continue;
+
+ if (!metricgroups) {
+ strlist__add(metriclist, s);
+ } else {
+ me = mep_lookup(&groups, g);
+ if (!me)
+ continue;
+ strlist__add(me->metrics, s);
+ }
+ }
+ free(omg);
+ }
+ }
+
+ if (metricgroups && !raw)
+ printf("\nMetric Groups:\n\n");
+ else if (metrics && !raw)
+ printf("\nMetrics:\n\n");
+
+ for (node = rb_first(&groups.entries); node; node = next) {
+ struct mep *me = container_of(node, struct mep, nd);
+
+ if (metricgroups)
+ printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n");
+ if (metrics)
+ metricgroup__print_strlist(me->metrics, raw);
+ next = rb_next(node);
+ rblist__remove_node(&groups, node);
+ }
+ if (!metricgroups)
+ metricgroup__print_strlist(metriclist, raw);
+ strlist__delete(metriclist);
+}
+
+static int metricgroup__add_metric(const char *metric, struct strbuf *events,
+ struct list_head *group_list)
+{
+ struct pmu_events_map *map = perf_pmu__find_map();
+ struct pmu_event *pe;
+ int ret = -EINVAL;
+ int i, j;
+
+ if (!map)
+ return 0;
+
+ for (i = 0; ; i++) {
+ pe = &map->table[i];
+
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ if (match_metric(pe->metric_group, metric) ||
+ match_metric(pe->metric_name, metric)) {
+ const char **ids;
+ int idnum;
+ struct egroup *eg;
+
+ pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+ if (expr__find_other(pe->metric_expr,
+ NULL, &ids, &idnum) < 0)
+ continue;
+ if (events->len > 0)
+ strbuf_addf(events, ",");
+ for (j = 0; j < idnum; j++) {
+ pr_debug("found event %s\n", ids[j]);
+ strbuf_addf(events, "%s%s",
+ j == 0 ? "{" : ",",
+ ids[j]);
+ }
+ strbuf_addf(events, "}:W");
+
+ eg = malloc(sizeof(struct egroup));
+ if (!eg) {
+ ret = -ENOMEM;
+ break;
+ }
+ eg->ids = ids;
+ eg->idnum = idnum;
+ eg->metric_name = pe->metric_name;
+ eg->metric_expr = pe->metric_expr;
+ list_add_tail(&eg->nd, group_list);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
+ struct list_head *group_list)
+{
+ char *llist, *nlist, *p;
+ int ret = -EINVAL;
+
+ nlist = strdup(list);
+ if (!nlist)
+ return -ENOMEM;
+ llist = nlist;
+
+ strbuf_init(events, 100);
+ strbuf_addf(events, "%s", "");
+
+ while ((p = strsep(&llist, ",")) != NULL) {
+ ret = metricgroup__add_metric(p, events, group_list);
+ if (ret == -EINVAL) {
+ fprintf(stderr, "Cannot find metric or group `%s'\n",
+ p);
+ break;
+ }
+ }
+ free(nlist);
+ return ret;
+}
+
+static void metricgroup__free_egroups(struct list_head *group_list)
+{
+ struct egroup *eg, *egtmp;
+ int i;
+
+ list_for_each_entry_safe (eg, egtmp, group_list, nd) {
+ for (i = 0; i < eg->idnum; i++)
+ free((char *)eg->ids[i]);
+ free(eg->ids);
+ free(eg);
+ }
+}
+
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ struct rblist *metric_events)
+{
+ struct parse_events_error parse_error;
+ struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value;
+ struct strbuf extra_events;
+ LIST_HEAD(group_list);
+ int ret;
+
+ if (metric_events->nr_entries == 0)
+ metricgroup__rblist_init(metric_events);
+ ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
+ if (ret)
+ return ret;
+ pr_debug("adding %s\n", extra_events.buf);
+ memset(&parse_error, 0, sizeof(struct parse_events_error));
+ ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+ if (ret) {
+ parse_events_print_error(&parse_error, extra_events.buf);
+ goto out;
+ }
+ strbuf_release(&extra_events);
+ ret = metricgroup__setup_events(&group_list, perf_evlist,
+ metric_events);
+out:
+ metricgroup__free_egroups(&group_list);
+ return ret;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
new file mode 100644
index 000000000000..06854e125ee7
--- /dev/null
+++ b/tools/perf/util/metricgroup.h
@@ -0,0 +1,31 @@
+#ifndef METRICGROUP_H
+#define METRICGROUP_H 1
+
+#include "linux/list.h"
+#include "rblist.h"
+#include <subcmd/parse-options.h>
+#include "evlist.h"
+#include "strbuf.h"
+
+struct metric_event {
+ struct rb_node nd;
+ struct perf_evsel *evsel;
+ struct list_head head; /* list of metric_expr */
+};
+
+struct metric_expr {
+ struct list_head nd;
+ const char *metric_expr;
+ const char *metric_name;
+ struct perf_evsel **metric_events;
+};
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+ struct perf_evsel *evsel,
+ bool create);
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ struct rblist *metric_events);
+
+void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
+#endif
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index a58e91197729..5be021701f34 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -11,6 +11,7 @@
#include "event.h"
#include <sys/types.h>
#include <sys/stat.h>
+#include <fcntl.h>
#include <limits.h>
#include <sched.h>
#include <stdlib.h>
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f6257fb4f08c..9183913a6174 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -28,6 +28,7 @@
#include "probe-file.h"
#include "asm/bug.h"
#include "util/parse-branch-options.h"
+#include "metricgroup.h"
#define MAX_NAME_LEN 100
@@ -1218,11 +1219,17 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
struct perf_pmu_info info;
struct perf_pmu *pmu;
struct perf_evsel *evsel;
+ struct parse_events_error *err = parse_state->error;
LIST_HEAD(config_terms);
pmu = perf_pmu__find(name);
- if (!pmu)
+ if (!pmu) {
+ if (asprintf(&err->str,
+ "Cannot find PMU `%s'. Missing kernel support?",
+ name) < 0)
+ err->str = NULL;
return -EINVAL;
+ }
if (pmu->default_config) {
memcpy(&attr, pmu->default_config,
@@ -1366,6 +1373,7 @@ struct event_modifier {
int exclude_GH;
int sample_read;
int pinned;
+ int weak;
};
static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -1384,6 +1392,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
int exclude = eu | ek | eh;
int exclude_GH = evsel ? evsel->exclude_GH : 0;
+ int weak = 0;
memset(mod, 0, sizeof(*mod));
@@ -1421,6 +1430,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
sample_read = 1;
} else if (*str == 'D') {
pinned = 1;
+ } else if (*str == 'W') {
+ weak = 1;
} else
break;
@@ -1451,6 +1462,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
mod->exclude_GH = exclude_GH;
mod->sample_read = sample_read;
mod->pinned = pinned;
+ mod->weak = weak;
return 0;
}
@@ -1464,7 +1476,7 @@ static int check_modifier(char *str)
char *p = str;
/* The sizeof includes 0 byte as well. */
- if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1))
+ if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
return -1;
while (*p) {
@@ -1504,6 +1516,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
evsel->exclude_GH = mod.exclude_GH;
evsel->sample_read = mod.sample_read;
evsel->precise_max = mod.precise_max;
+ evsel->weak_group = mod.weak;
if (perf_evsel__is_group_leader(evsel))
evsel->attr.pinned = mod.pinned;
@@ -1726,8 +1739,8 @@ static int get_term_width(void)
return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
}
-static void parse_events_print_error(struct parse_events_error *err,
- const char *event)
+void parse_events_print_error(struct parse_events_error *err,
+ const char *event)
{
const char *str = "invalid or unsupported event: ";
char _buf[MAX_WIDTH];
@@ -1782,8 +1795,6 @@ static void parse_events_print_error(struct parse_events_error *err,
zfree(&err->str);
zfree(&err->help);
}
-
- fprintf(stderr, "Run 'perf list' for a list of valid events\n");
}
#undef MAX_WIDTH
@@ -1795,8 +1806,10 @@ int parse_events_option(const struct option *opt, const char *str,
struct parse_events_error err = { .idx = 0, };
int ret = parse_events(evlist, str, &err);
- if (ret)
+ if (ret) {
parse_events_print_error(&err, str);
+ fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+ }
return ret;
}
@@ -2374,6 +2387,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
print_tracepoint_events(NULL, NULL, name_only);
print_sdt_events(NULL, NULL, name_only);
+
+ metricgroup__print(true, true, NULL, name_only);
}
int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 635135125111..3909ca0639f2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -202,6 +202,9 @@ int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);
+void parse_events_print_error(struct parse_events_error *err,
+ const char *event);
+
#ifdef HAVE_LIBELF_SUPPORT
/*
* If the probe point starts with '%',
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index c42edeac451f..ea2426daf7e8 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -161,7 +161,7 @@ name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/* If you add a modifier you need to update check_modifier() */
-modifier_event [ukhpPGHSDI]+
+modifier_event [ukhpPGHSDIW]+
modifier_bp [rwx]{1,3}
%%
@@ -288,6 +288,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
/*
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ac16a9db1fb5..0b11dfc0af44 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -2,6 +2,7 @@
#include <linux/compiler.h>
#include <sys/types.h>
#include <errno.h>
+#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdio.h>
@@ -516,16 +517,8 @@ char * __weak get_cpuid_str(void)
return NULL;
}
-/*
- * From the pmu_events_map, find the table of PMU events that corresponds
- * to the current running CPU. Then, add all PMU events from that table
- * as aliases.
- */
-static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
+static char *perf_pmu__getcpuid(void)
{
- int i;
- struct pmu_events_map *map;
- struct pmu_event *pe;
char *cpuid;
static bool printed;
@@ -535,22 +528,50 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
if (!cpuid)
cpuid = get_cpuid_str();
if (!cpuid)
- return;
+ return NULL;
if (!printed) {
pr_debug("Using CPUID %s\n", cpuid);
printed = true;
}
+ return cpuid;
+}
+
+struct pmu_events_map *perf_pmu__find_map(void)
+{
+ struct pmu_events_map *map;
+ char *cpuid = perf_pmu__getcpuid();
+ int i;
i = 0;
- while (1) {
+ for (;;) {
map = &pmu_events_map[i++];
- if (!map->table)
- goto out;
+ if (!map->table) {
+ map = NULL;
+ break;
+ }
if (!strcmp(map->cpuid, cpuid))
break;
}
+ free(cpuid);
+ return map;
+}
+
+/*
+ * From the pmu_events_map, find the table of PMU events that corresponds
+ * to the current running CPU. Then, add all PMU events from that table
+ * as aliases.
+ */
+static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
+{
+ int i;
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+
+ map = perf_pmu__find_map();
+ if (!map)
+ return;
/*
* Found a matching PMU events table. Create aliases
@@ -560,8 +581,11 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
const char *pname;
pe = &map->table[i++];
- if (!pe->name)
+ if (!pe->name) {
+ if (pe->metric_group || pe->metric_name)
+ continue;
break;
+ }
pname = pe->pmu ? pe->pmu : "cpu";
if (strncmp(pname, name, strlen(pname)))
@@ -575,9 +599,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
(char *)pe->metric_expr,
(char *)pe->metric_name);
}
-
-out:
- free(cpuid);
}
struct perf_event_attr * __weak
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 389e9729331f..060f6abba8ed 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -90,4 +90,6 @@ int perf_pmu__test(void);
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+struct pmu_events_map *perf_pmu__find_map(void);
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index cdf8d83a484c..4ae1123c6794 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -15,6 +15,7 @@
*
*/
#include <errno.h>
+#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index 808cc45611fe..b30746f5f613 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
__ilist->rblist.nr_entries)
/* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \
- DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \
+ __machine->threads[hash_bucket].nr)
#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000000..5e52e7baa7b6
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,32 @@
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int exit_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_destroy(&sem->lock);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000000..94565ad4d494
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,19 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+ pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+int exit_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index a7ebd9fe8e40..ceac0848469d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1847,7 +1847,7 @@ static int __perf_session__process_events(struct perf_session *session,
if (data_offset + data_size < file_size)
file_size = data_offset + data_size;
- ui_progress__init(&prog, file_size, "Processing events...");
+ ui_progress__init_size(&prog, file_size, "Processing events...");
mmap_size = MMAP_SIZE;
if (mmap_size > file_size) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index a04cf56d3517..a2c12d1ef32a 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -6,6 +6,7 @@
#include "rblist.h"
#include "evlist.h"
#include "expr.h"
+#include "metricgroup.h"
enum {
CTX_BIT_USER = 1 << 0,
@@ -55,7 +56,6 @@ struct saved_value {
struct rb_node rb_node;
struct perf_evsel *evsel;
int cpu;
- int ctx;
struct stats stats;
};
@@ -66,8 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
rb_node);
const struct saved_value *b = entry;
- if (a->ctx != b->ctx)
- return a->ctx - b->ctx;
if (a->cpu != b->cpu)
return a->cpu - b->cpu;
if (a->evsel == b->evsel)
@@ -89,13 +87,12 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
}
static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
- int cpu, int ctx,
+ int cpu,
bool create)
{
struct rb_node *nd;
struct saved_value dm = {
.cpu = cpu,
- .ctx = ctx,
.evsel = evsel,
};
nd = rblist__find(&runtime_saved_values, &dm);
@@ -231,8 +228,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
if (counter->collect_stat) {
- struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
- true);
+ struct saved_value *v = saved_value_lookup(counter, cpu, true);
update_stats(&v->stats, count[0]);
}
}
@@ -627,15 +623,68 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel,
out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}
+static void generic_metric(const char *metric_expr,
+ struct perf_evsel **metric_events,
+ char *name,
+ const char *metric_name,
+ double avg,
+ int cpu,
+ struct perf_stat_output_ctx *out)
+{
+ print_metric_t print_metric = out->print_metric;
+ struct parse_ctx pctx;
+ double ratio;
+ int i;
+ void *ctxp = out->ctx;
+
+ expr__ctx_init(&pctx);
+ expr__add_id(&pctx, name, avg);
+ for (i = 0; metric_events[i]; i++) {
+ struct saved_value *v;
+ struct stats *stats;
+ double scale;
+
+ if (!strcmp(metric_events[i]->name, "duration_time")) {
+ stats = &walltime_nsecs_stats;
+ scale = 1e-9;
+ } else {
+ v = saved_value_lookup(metric_events[i], cpu, false);
+ if (!v)
+ break;
+ stats = &v->stats;
+ scale = 1.0;
+ }
+ expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
+ }
+ if (!metric_events[i]) {
+ const char *p = metric_expr;
+
+ if (expr__parse(&ratio, &pctx, &p) == 0)
+ print_metric(ctxp, NULL, "%8.1f",
+ metric_name ?
+ metric_name :
+ out->force_header ? name : "",
+ ratio);
+ else
+ print_metric(ctxp, NULL, NULL,
+ out->force_header ?
+ (metric_name ? metric_name : name) : "", 0);
+ } else
+ print_metric(ctxp, NULL, NULL, "", 0);
+}
+
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
- struct perf_stat_output_ctx *out)
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events)
{
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
const char *color = NULL;
int ctx = evsel_context(evsel);
+ struct metric_event *me;
+ int num = 1;
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
@@ -819,33 +868,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
else
print_metric(ctxp, NULL, NULL, name, 0);
} else if (evsel->metric_expr) {
- struct parse_ctx pctx;
- int i;
-
- expr__ctx_init(&pctx);
- expr__add_id(&pctx, evsel->name, avg);
- for (i = 0; evsel->metric_events[i]; i++) {
- struct saved_value *v;
-
- v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
- if (!v)
- break;
- expr__add_id(&pctx, evsel->metric_events[i]->name,
- avg_stats(&v->stats));
- }
- if (!evsel->metric_events[i]) {
- const char *p = evsel->metric_expr;
-
- if (expr__parse(&ratio, &pctx, &p) == 0)
- print_metric(ctxp, NULL, "%8.1f",
- evsel->metric_name ?
- evsel->metric_name :
- out->force_header ? evsel->name : "",
- ratio);
- else
- print_metric(ctxp, NULL, NULL, "", 0);
- } else
- print_metric(ctxp, NULL, NULL, "", 0);
+ generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
+ evsel->metric_name, avg, cpu, out);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];
@@ -863,6 +887,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
print_smi_cost(cpu, evsel, out);
} else {
- print_metric(ctxp, NULL, NULL, NULL, 0);
+ num = 0;
}
+
+ if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
+ struct metric_expr *mexp;
+
+ list_for_each_entry (mexp, &me->head, nd) {
+ if (num++ > 0)
+ out->new_line(ctxp);
+ generic_metric(mexp->metric_expr, mexp->metric_events,
+ evsel->name, mexp->metric_name,
+ avg, cpu, out);
+ }
+ }
+ if (num == 0)
+ print_metric(ctxp, NULL, NULL, NULL, 0);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index eacaf958e19d..47915df346fb 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -91,9 +91,11 @@ struct perf_stat_output_ctx {
bool force_header;
};
+struct rblist;
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
- struct perf_stat_output_ctx *out);
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events);
void perf_stat__collect_metric_expr(struct perf_evlist *);
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5909ee4c7ade..066e38aa4063 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -226,7 +226,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
struct maps *maps = &mg->maps[type];
struct map *next, *curr;
- pthread_rwlock_wrlock(&maps->lock);
+ down_write(&maps->lock);
curr = maps__first(maps);
if (curr == NULL)
@@ -246,7 +246,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
curr->end = ~0ULL;
out_unlock:
- pthread_rwlock_unlock(&maps->lock);
+ up_write(&maps->lock);
}
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -1671,7 +1671,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
struct maps *maps = &mg->maps[type];
struct map *map;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
if (map->dso && strcmp(map->dso->short_name, name) == 0)
@@ -1681,7 +1681,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
map = NULL;
out_unlock:
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
return map;
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index aee9a42102ba..c09bdb509d82 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -264,7 +264,7 @@ static int __thread__prepare_access(struct thread *thread)
struct maps *maps = &thread->mg->maps[i];
struct map *map;
- pthread_rwlock_rdlock(&maps->lock);
+ down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
err = unwind__prepare_access(thread, map, &initialized);
@@ -272,7 +272,7 @@ static int __thread__prepare_access(struct thread *thread)
break;
}
- pthread_rwlock_unlock(&maps->lock);
+ up_read(&maps->lock);
}
return err;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index e7d60d05596d..d7f2113462fb 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -28,7 +28,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
-#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 8a9a677f7576..40b425949aa3 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -27,7 +27,6 @@
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mman.h>
-#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 4c360daa4e24..97e0c8e26477 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -6,6 +6,7 @@
#include <sys/stat.h>
#include <sys/utsname.h>
#include <dirent.h>
+#include <fcntl.h>
#include <inttypes.h>
#include <signal.h>
#include <stdio.h>
@@ -22,6 +23,19 @@
/*
* XXX We need to find a better place for these things...
*/
+
+bool perf_singlethreaded = true;
+
+void perf_set_singlethreaded(void)
+{
+ perf_singlethreaded = true;
+}
+
+void perf_set_multithreaded(void)
+{
+ perf_singlethreaded = false;
+}
+
unsigned int page_size;
int cacheline_size;
@@ -174,7 +188,7 @@ out:
return err;
}
-int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
{
void *ptr;
loff_t pgoff;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index b136c271125f..6c7e6cc902bb 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -5,7 +5,6 @@
/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
#define _DEFAULT_SOURCE 1
-#include <fcntl.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
@@ -35,7 +34,6 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d);
int copyfile(const char *from, const char *to);
int copyfile_mode(const char *from, const char *to, mode_t mode);
int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
-int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
ssize_t readn(int fd, void *buf, size_t n);
ssize_t writen(int fd, const void *buf, size_t n);
@@ -64,4 +62,9 @@ int sched_getcpu(void);
int setns(int fd, int nstype);
#endif
+extern bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void);
+void perf_set_multithreaded(void);
+
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index d3c39eec89a8..f5f843d3c22f 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -319,7 +319,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
struct vdso_info *vdso_info;
struct dso *dso = NULL;
- pthread_rwlock_wrlock(&machine->dsos.lock);
+ down_write(&machine->dsos.lock);
if (!machine->vdso_info)
machine->vdso_info = vdso_info__new();
@@ -347,7 +347,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
out_unlock:
dso__get(dso);
- pthread_rwlock_unlock(&machine->dsos.lock);
+ up_write(&machine->dsos.lock);
return dso;
}
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
index 1329d843eb7b..7c1175310a12 100644
--- a/tools/perf/util/zlib.c
+++ b/tools/perf/util/zlib.c
@@ -1,3 +1,4 @@
+#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/stat.h>