summaryrefslogtreecommitdiffstats
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-15 00:31:23 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-15 00:31:23 +0100
commit1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch)
treea391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/util/machine.c
parentMerge tag 'trace-ring-buffer-v6.8-rc7-2' of git://git.kernel.org/pub/scm/linu... (diff)
parentperf annotate: Add comments in the data structures (diff)
downloadlinux-1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa.tar.xz
linux-1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa.zip
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf stat: - Support new 'cluster' aggregation mode for shared resources depending on the hardware configuration: $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1 Performance counter stats for 'system wide': S0-D0-CLS0 2 85,051,822 cycles S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle S0-D0-CLS2 2 93,365,918 cycles S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle S0-D0-CLS4 2 104,157,523 cycles S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle S0-D0-CLS6 2 65,891,079 cycles S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle 1.002407989 seconds time elapsed - Various fixes and cleanups for event metrics including NaN handling perf script: - Use libcapstone if available to disassemble the instructions. This enables 'perf script -F disasm' and 'perf script --insn-trace=disasm' (for Intel-PT): $ perf script -F event,ip,disasm cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa9839d25 movq %rax, %r14 cycles:P: ffffffffa9cdcaf0 endbr64 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax cycles:P: ffffffffa988d428 wrmsr - Expose sample ID / stream ID to python scripts perf test: - Add more perf test cases from Redhat internal test suites. This time it adds the base infra and a few perf probe tests. More to come. :) - Add 'perf test -p' for parallel execution and fix some issues found by the parallel test - Support symbol test to print symbols in given (active) module: $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko --- start --- Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko Overlapping symbols: 7a990-7a9a0 l __pfx_ext4_exit_fs 7a990-7a9a0 g __pfx_cleanup_module Overlapping symbols: 7a9a0-7aa1c l ext4_exit_fs 7a9a0-7aa1c g cleanup_module ... JSON metric updates: - A new round of Intel metric updates - Support Power11 PVR (compatible to Power10) - Fix cache latency events on Zen 4 to set SliceId properly Internal: - Fix reference counting for 'map' data structure, tireless work from Ian! - More memory optimization for struct thread and annotate histogram. Now, 'perf report' (TUI) and 'perf annotate' should be much lighter-weight in terms of memory footprint - Support cross-arch perf register access. Clean up the build configuration so that it can detect arch-register support at runtime. This can allow to parse register data in sample which was recorded in a different arch Others: - Sync task state in 'perf sched' to kernel using trace event fields. The task states have been changed so tools cannot assume a fixed encoding - Clean up 'perf mem' to generalize the arch-specific events - Add support for local and global variables to data type profiling. This would increase the success rate of type resolution with DWARF - Add short option -H for --hierarchy in 'perf report' and 'perf top'" * tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits) perf annotate: Add comments in the data structures perf annotate: Remove sym_hist.addr[] array perf annotate: Calculate instruction overhead using hashmap perf annotate: Add a hashmap for symbol histogram perf threads: Reduce table size from 256 to 8 perf threads: Switch from rbtree to hashmap perf threads: Move threads to its own files perf machine: Move machine's threads into its own abstraction perf machine: Move fprintf to for_each loop and a callback perf trace: Ignore thread hashing in summary perf report: Sort child tasks by tid perf vendor events amd: Fix Zen 4 cache latency events perf version: Display availability of OpenCSD support perf vendor events intel: Add umasks/occ_sel to PCU events. perf map: Fix map reference count issues libperf evlist: Avoid out-of-bounds access perf lock contention: Account contending locks too perf metrics: Fix segv for metrics with no events perf metrics: Fix metric matching perf pmu: Fix a potential memory leak in perf_pmu__lookup() ...
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c375
1 files changed, 102 insertions, 273 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b397a769006f..527517db3182 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -43,9 +43,6 @@
#include <linux/string.h>
#include <linux/zalloc.h>
-static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
- struct thread *th, bool lock);
-
static struct dso *machine__kernel_dso(struct machine *machine)
{
return map__dso(machine->vmlinux_map);
@@ -58,35 +55,6 @@ static void dsos__init(struct dsos *dsos)
init_rwsem(&dsos->lock);
}
-static void machine__threads_init(struct machine *machine)
-{
- int i;
-
- for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- struct threads *threads = &machine->threads[i];
- threads->entries = RB_ROOT_CACHED;
- init_rwsem(&threads->lock);
- threads->nr = 0;
- threads->last_match = NULL;
- }
-}
-
-static int thread_rb_node__cmp_tid(const void *key, const struct rb_node *nd)
-{
- int to_find = (int) *((pid_t *)key);
-
- return to_find - (int)thread__tid(rb_entry(nd, struct thread_rb_node, rb_node)->thread);
-}
-
-static struct thread_rb_node *thread_rb_node__find(const struct thread *th,
- struct rb_root *tree)
-{
- pid_t to_find = thread__tid(th);
- struct rb_node *nd = rb_find(&to_find, tree, thread_rb_node__cmp_tid);
-
- return rb_entry(nd, struct thread_rb_node, rb_node);
-}
-
static int machine__set_mmap_name(struct machine *machine)
{
if (machine__is_host(machine))
@@ -120,7 +88,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
- machine__threads_init(machine);
+ threads__init(&machine->threads);
machine->vdso_info = NULL;
machine->env = NULL;
@@ -221,27 +189,11 @@ static void dsos__exit(struct dsos *dsos)
void machine__delete_threads(struct machine *machine)
{
- struct rb_node *nd;
- int i;
-
- for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- struct threads *threads = &machine->threads[i];
- down_write(&threads->lock);
- nd = rb_first_cached(&threads->entries);
- while (nd) {
- struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
-
- nd = rb_next(nd);
- __machine__remove_thread(machine, trb, trb->thread, false);
- }
- up_write(&threads->lock);
- }
+ threads__remove_all_threads(&machine->threads);
}
void machine__exit(struct machine *machine)
{
- int i;
-
if (machine == NULL)
return;
@@ -254,12 +206,7 @@ void machine__exit(struct machine *machine)
zfree(&machine->current_tid);
zfree(&machine->kallsyms_filename);
- machine__delete_threads(machine);
- for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- struct threads *threads = &machine->threads[i];
-
- exit_rwsem(&threads->lock);
- }
+ threads__exit(&machine->threads);
}
void machine__delete(struct machine *machine)
@@ -440,7 +387,7 @@ static struct thread *findnew_guest_code(struct machine *machine,
return NULL;
/* Assume maps are set up if there are any */
- if (maps__nr_maps(thread__maps(thread)))
+ if (!maps__empty(thread__maps(thread)))
return thread;
host_thread = machine__find_thread(host_machine, -1, pid);
@@ -526,7 +473,7 @@ static void machine__update_thread_pid(struct machine *machine,
if (thread__pid(th) == thread__tid(th))
return;
- leader = __machine__findnew_thread(machine, thread__pid(th), thread__pid(th));
+ leader = machine__findnew_thread(machine, thread__pid(th), thread__pid(th));
if (!leader)
goto out_err;
@@ -561,159 +508,55 @@ out_err:
}
/*
- * Front-end cache - TID lookups come in blocks,
- * so most of the time we dont have to look up
- * the full rbtree:
- */
-static struct thread*
-__threads__get_last_match(struct threads *threads, struct machine *machine,
- int pid, int tid)
-{
- struct thread *th;
-
- th = threads->last_match;
- if (th != NULL) {
- if (thread__tid(th) == tid) {
- machine__update_thread_pid(machine, th, pid);
- return thread__get(th);
- }
- thread__put(threads->last_match);
- threads->last_match = NULL;
- }
-
- return NULL;
-}
-
-static struct thread*
-threads__get_last_match(struct threads *threads, struct machine *machine,
- int pid, int tid)
-{
- struct thread *th = NULL;
-
- if (perf_singlethreaded)
- th = __threads__get_last_match(threads, machine, pid, tid);
-
- return th;
-}
-
-static void
-__threads__set_last_match(struct threads *threads, struct thread *th)
-{
- thread__put(threads->last_match);
- threads->last_match = thread__get(th);
-}
-
-static void
-threads__set_last_match(struct threads *threads, struct thread *th)
-{
- if (perf_singlethreaded)
- __threads__set_last_match(threads, th);
-}
-
-/*
* Caller must eventually drop thread->refcnt returned with a successful
* lookup/new thread inserted.
*/
-static struct thread *____machine__findnew_thread(struct machine *machine,
- struct threads *threads,
- pid_t pid, pid_t tid,
- bool create)
+static struct thread *__machine__findnew_thread(struct machine *machine,
+ pid_t pid,
+ pid_t tid,
+ bool create)
{
- struct rb_node **p = &threads->entries.rb_root.rb_node;
- struct rb_node *parent = NULL;
- struct thread *th;
- struct thread_rb_node *nd;
- bool leftmost = true;
+ struct thread *th = threads__find(&machine->threads, tid);
+ bool created;
- th = threads__get_last_match(threads, machine, pid, tid);
- if (th)
+ if (th) {
+ machine__update_thread_pid(machine, th, pid);
return th;
-
- while (*p != NULL) {
- parent = *p;
- th = rb_entry(parent, struct thread_rb_node, rb_node)->thread;
-
- if (thread__tid(th) == tid) {
- threads__set_last_match(threads, th);
- machine__update_thread_pid(machine, th, pid);
- return thread__get(th);
- }
-
- if (tid < thread__tid(th))
- p = &(*p)->rb_left;
- else {
- p = &(*p)->rb_right;
- leftmost = false;
- }
}
-
if (!create)
return NULL;
- th = thread__new(pid, tid);
- if (th == NULL)
- return NULL;
-
- nd = malloc(sizeof(*nd));
- if (nd == NULL) {
- thread__put(th);
- return NULL;
- }
- nd->thread = th;
-
- rb_link_node(&nd->rb_node, parent, p);
- rb_insert_color_cached(&nd->rb_node, &threads->entries, leftmost);
- /*
- * We have to initialize maps separately after rb tree is updated.
- *
- * The reason is that we call machine__findnew_thread within
- * thread__init_maps to find the thread leader and that would screwed
- * the rb tree.
- */
- if (thread__init_maps(th, machine)) {
- pr_err("Thread init failed thread %d\n", pid);
- rb_erase_cached(&nd->rb_node, &threads->entries);
- RB_CLEAR_NODE(&nd->rb_node);
- free(nd);
- thread__put(th);
- return NULL;
- }
- /*
- * It is now in the rbtree, get a ref
- */
- threads__set_last_match(threads, th);
- ++threads->nr;
-
- return thread__get(th);
-}
+ th = threads__findnew(&machine->threads, pid, tid, &created);
+ if (created) {
+ /*
+ * We have to initialize maps separately after rb tree is
+ * updated.
+ *
+ * The reason is that we call machine__findnew_thread within
+ * thread__init_maps to find the thread leader and that would
+ * screwed the rb tree.
+ */
+ if (thread__init_maps(th, machine)) {
+ pr_err("Thread init failed thread %d\n", pid);
+ threads__remove(&machine->threads, th);
+ thread__put(th);
+ return NULL;
+ }
+ } else
+ machine__update_thread_pid(machine, th, pid);
-struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
-{
- return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
+ return th;
}
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
- pid_t tid)
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
{
- struct threads *threads = machine__threads(machine, tid);
- struct thread *th;
-
- down_write(&threads->lock);
- th = __machine__findnew_thread(machine, pid, tid);
- up_write(&threads->lock);
- return th;
+ return __machine__findnew_thread(machine, pid, tid, /*create=*/true);
}
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
- struct threads *threads = machine__threads(machine, tid);
- struct thread *th;
-
- down_read(&threads->lock);
- th = ____machine__findnew_thread(machine, threads, pid, tid, false);
- up_read(&threads->lock);
- return th;
+ return __machine__findnew_thread(machine, pid, tid, /*create=*/false);
}
/*
@@ -896,7 +739,6 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct symbol *sym;
struct dso *dso;
struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
- bool put_map = false;
int err = 0;
if (!map) {
@@ -913,12 +755,6 @@ static int machine__process_ksymbol_register(struct machine *machine,
err = -ENOMEM;
goto out;
}
- /*
- * The inserted map has a get on it, we need to put to release
- * the reference count here, but do it after all accesses are
- * done.
- */
- put_map = true;
if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
dso->binary_type = DSO_BINARY_TYPE__OOL;
dso->data.file_size = event->ksymbol.len;
@@ -952,8 +788,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
}
dso__insert_symbol(dso, sym);
out:
- if (put_map)
- map__put(map);
+ map__put(map);
return err;
}
@@ -977,7 +812,7 @@ static int machine__process_ksymbol_unregister(struct machine *machine,
if (sym)
dso__delete_symbol(dso, sym);
}
-
+ map__put(map);
return 0;
}
@@ -1005,11 +840,11 @@ int machine__process_text_poke(struct machine *machine, union perf_event *event,
perf_event__fprintf_text_poke(event, machine, stdout);
if (!event->text_poke.new_len)
- return 0;
+ goto out;
if (cpumode != PERF_RECORD_MISC_KERNEL) {
pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
- return 0;
+ goto out;
}
if (dso) {
@@ -1032,7 +867,8 @@ int machine__process_text_poke(struct machine *machine, union perf_event *event,
pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
event->text_poke.addr);
}
-
+out:
+ map__put(map);
return 0;
}
@@ -1120,29 +956,30 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
return printed;
}
-size_t machine__fprintf(struct machine *machine, FILE *fp)
-{
- struct rb_node *nd;
- size_t ret;
- int i;
-
- for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- struct threads *threads = &machine->threads[i];
-
- down_read(&threads->lock);
+struct machine_fprintf_cb_args {
+ FILE *fp;
+ size_t printed;
+};
- ret = fprintf(fp, "Threads: %u\n", threads->nr);
+static int machine_fprintf_cb(struct thread *thread, void *data)
+{
+ struct machine_fprintf_cb_args *args = data;
- for (nd = rb_first_cached(&threads->entries); nd;
- nd = rb_next(nd)) {
- struct thread *pos = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+ /* TODO: handle fprintf errors. */
+ args->printed += thread__fprintf(thread, args->fp);
+ return 0;
+}
- ret += thread__fprintf(pos, fp);
- }
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+ struct machine_fprintf_cb_args args = {
+ .fp = fp,
+ .printed = 0,
+ };
+ size_t ret = fprintf(fp, "Threads: %zu\n", threads__nr(&machine->threads));
- up_read(&threads->lock);
- }
- return ret;
+ machine__for_each_thread(machine, machine_fprintf_cb, &args);
+ return ret + args.printed;
}
static struct dso *machine__get_kernel(struct machine *machine)
@@ -1300,9 +1137,10 @@ static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data)
return 0;
dest_map = maps__find(args->kmaps, map__pgoff(map));
- if (dest_map != map)
+ if (RC_CHK_ACCESS(dest_map) != RC_CHK_ACCESS(map))
map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map)));
+ map__put(dest_map);
args->found = true;
return 0;
}
@@ -1543,8 +1381,10 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
return 0;
long_name = strdup(path);
- if (long_name == NULL)
+ if (long_name == NULL) {
+ map__put(map);
return -ENOMEM;
+ }
dso = map__dso(map);
dso__set_long_name(dso, long_name, true);
@@ -1558,7 +1398,7 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
dso->symtab_type++;
dso->comp = m->comp;
}
-
+ map__put(map);
return 0;
}
@@ -1765,8 +1605,10 @@ int machine__create_kernel_maps(struct machine *machine)
struct map *next = maps__find_next_entry(machine__kernel_maps(machine),
machine__kernel_map(machine));
- if (next)
+ if (next) {
machine__set_kernel_mmap(machine, start, map__start(next));
+ map__put(next);
+ }
}
out_put:
@@ -2060,36 +1902,9 @@ out_problem:
return 0;
}
-static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
- struct thread *th, bool lock)
-{
- struct threads *threads = machine__threads(machine, thread__tid(th));
-
- if (!nd)
- nd = thread_rb_node__find(th, &threads->entries.rb_root);
-
- if (threads->last_match && RC_CHK_EQUAL(threads->last_match, th))
- threads__set_last_match(threads, NULL);
-
- if (lock)
- down_write(&threads->lock);
-
- BUG_ON(refcount_read(thread__refcnt(th)) == 0);
-
- thread__put(nd->thread);
- rb_erase_cached(&nd->rb_node, &threads->entries);
- RB_CLEAR_NODE(&nd->rb_node);
- --threads->nr;
-
- free(nd);
-
- if (lock)
- up_write(&threads->lock);
-}
-
void machine__remove_thread(struct machine *machine, struct thread *th)
{
- return __machine__remove_thread(machine, NULL, th, true);
+ return threads__remove(&machine->threads, th);
}
int machine__process_fork_event(struct machine *machine, union perf_event *event,
@@ -3223,23 +3038,7 @@ int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv)
{
- struct threads *threads;
- struct rb_node *nd;
- int rc = 0;
- int i;
-
- for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- threads = &machine->threads[i];
- for (nd = rb_first_cached(&threads->entries); nd;
- nd = rb_next(nd)) {
- struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
-
- rc = fn(trb->thread, priv);
- if (rc != 0)
- return rc;
- }
- }
- return rc;
+ return threads__for_each_thread(&machine->threads, fn, priv);
}
int machines__for_each_thread(struct machines *machines,
@@ -3263,6 +3062,36 @@ int machines__for_each_thread(struct machines *machines,
return rc;
}
+
+static int thread_list_cb(struct thread *thread, void *data)
+{
+ struct list_head *list = data;
+ struct thread_list *entry = malloc(sizeof(*entry));
+
+ if (!entry)
+ return -ENOMEM;
+
+ entry->thread = thread__get(thread);
+ list_add_tail(&entry->list, list);
+ return 0;
+}
+
+int machine__thread_list(struct machine *machine, struct list_head *list)
+{
+ return machine__for_each_thread(machine, thread_list_cb, list);
+}
+
+void thread_list__delete(struct list_head *list)
+{
+ struct thread_list *pos, *next;
+
+ list_for_each_entry_safe(pos, next, list, list) {
+ thread__zput(pos->thread);
+ list_del(&pos->list);
+ free(pos);
+ }
+}
+
pid_t machine__get_current_tid(struct machine *machine, int cpu)
{
if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz)