From 80c281fca252827facd05875b8d9d36d7aad0f8d Mon Sep 17 00:00:00 2001 From: Ben Gainey Date: Tue, 1 Oct 2024 13:15:04 +0100 Subject: tools/perf: Correctly calculate sample period for inherited SAMPLE_READ values Sample period calculation in deliver_sample_value is updated to calculate the per-thread period delta for events that are inherit + PERF_SAMPLE_READ. When the sampling event has this configuration, the read_format.id is used with the tid from the sample to lookup the storage of the previously accumulated counter total before calculating the delta. All existing valid configurations where read_format.value represents some global value continue to use just the read_format.id to locate the storage of the previously accumulated total. perf_sample_id is modified to support tracking per-thread values, along with the existing global per-id values. In the per-thread case, values are stored in a hash by tid within the perf_sample_id, and are dynamically allocated as the number is not known ahead of time. Signed-off-by: Ben Gainey Cc: james.clark@arm.com Link: https://lore.kernel.org/r/20241001121505.1009685-2-ben.gainey@arm.com Signed-off-by: Namhyung Kim --- tools/lib/perf/include/internal/evsel.h | 63 +++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) (limited to 'tools/lib/perf/include/internal/evsel.h') diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 5cd220a61962..ea78defa77d0 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -34,8 +60,32 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { @@ -58,6 +108,10 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; /* @@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ -- cgit v1.2.3