diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/events/intel/core.c | 103 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 85 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 12 | ||||
-rw-r--r-- | arch/x86/events/perf_event_flags.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 4 |
7 files changed, 204 insertions, 8 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 584b58df7bf6..e068a96aeb54 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2792,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event) static void intel_pmu_enable_event(struct perf_event *event) { + u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -2800,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event) switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: + if (branch_sample_counters(event)) + enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: @@ -3052,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); + intel_pmu_lbr_save_brstack(&data, cpuc, event); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -3617,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (cpuc->excl_cntrs) return intel_get_excl_constraints(cpuc, event, idx, c2); + /* Not all counters support the branch counter feature. */ + if (branch_sample_counters(event)) { + c2 = dyn_constraint(cpuc, c2, idx); + c2->idxmsk64 &= x86_pmu.lbr_counters; + c2->weight = hweight64(c2->idxmsk64); + } + return c2; } @@ -3905,6 +3915,58 @@ static int intel_pmu_hw_config(struct perf_event *event) if (needs_branch_stack(event) && is_sampling_event(event)) event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; + if (branch_sample_counters(event)) { + struct perf_event *leader, *sibling; + int num = 0; + + if (!(x86_pmu.flags & PMU_FL_BR_CNTR) || + (event->attr.config & ~INTEL_ARCH_EVENT_MASK)) + return -EINVAL; + + /* + * The branch counter logging is not supported in the call stack + * mode yet, since we cannot simply flush the LBR during e.g., + * multiplexing. Also, there is no obvious usage with the call + * stack mode. Simply forbids it for now. + * + * If any events in the group enable the branch counter logging + * feature, the group is treated as a branch counter logging + * group, which requires the extra space to store the counters. + */ + leader = event->group_leader; + if (branch_sample_call_stack(leader)) + return -EINVAL; + if (branch_sample_counters(leader)) + num++; + leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; + + for_each_sibling_event(sibling, leader) { + if (branch_sample_call_stack(sibling)) + return -EINVAL; + if (branch_sample_counters(sibling)) + num++; + } + + if (num > fls(x86_pmu.lbr_counters)) + return -EINVAL; + /* + * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't + * require any branch stack setup. + * Clear the bit to avoid unnecessary branch stack setup. + */ + if (0 == (event->attr.branch_sample_type & + ~(PERF_SAMPLE_BRANCH_PLM_ALL | + PERF_SAMPLE_BRANCH_COUNTERS))) + event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK; + + /* + * Force the leader to be a LBR event. So LBRs can be reset + * with the leader event. See intel_pmu_lbr_del() for details. + */ + if (!intel_pmu_needs_branch_stack(leader)) + return -EINVAL; + } + if (intel_pmu_needs_branch_stack(event)) { ret = intel_pmu_setup_lbr_filter(event); if (ret) @@ -4383,8 +4445,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, */ if (event->attr.precise_ip == 3) { /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */ - if (constraint_match(&fixed0_constraint, event->hw.config)) - return &fixed0_counter0_1_constraint; + if (constraint_match(&fixed0_constraint, event->hw.config)) { + /* The fixed counter 0 doesn't support LBR event logging. */ + if (branch_sample_counters(event)) + return &counter0_1_constraint; + else + return &fixed0_counter0_1_constraint; + } switch (c->idxmsk64 & 0x3ull) { case 0x1: @@ -4563,7 +4630,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); @@ -5535,15 +5602,39 @@ static ssize_t branches_show(struct device *cdev, static DEVICE_ATTR_RO(branches); +static ssize_t branch_counter_nr_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters)); +} + +static DEVICE_ATTR_RO(branch_counter_nr); + +static ssize_t branch_counter_width_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS); +} + +static DEVICE_ATTR_RO(branch_counter_width); + static struct attribute *lbr_attrs[] = { &dev_attr_branches.attr, + &dev_attr_branch_counter_nr.attr, + &dev_attr_branch_counter_width.attr, NULL }; static umode_t lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.lbr_nr ? attr->mode : 0; + /* branches */ + if (i == 0) + return x86_pmu.lbr_nr ? attr->mode : 0; + + return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0; } static char pmu_name_str[30]; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index cb3f329f8fa4..d49d661ec0a7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); - perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); + intel_pmu_lbr_save_brstack(data, cpuc, event); } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index c3b0d15a9841..78cd5084104e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -676,6 +676,25 @@ void intel_pmu_lbr_del(struct perf_event *event) WARN_ON_ONCE(cpuc->lbr_users < 0); WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); perf_sched_cb_dec(event->pmu); + + /* + * The logged occurrences information is only valid for the + * current LBR group. If another LBR group is scheduled in + * later, the information from the stale LBRs will be wrongly + * interpreted. Reset the LBRs here. + * + * Only clear once for a branch counter group with the leader + * event. Because + * - Cannot simply reset the LBRs with the !cpuc->lbr_users. + * Because it's possible that the last LBR user is not in a + * branch counter group, e.g., a branch_counters group + + * several normal LBR events. + * - The LBR reset can be done with any one of the events in a + * branch counter group, since they are always scheduled together. + * It's easy to force the leader event an LBR event. + */ + if (is_branch_counters_group(event) && event == event->group_leader) + intel_pmu_lbr_reset(); } static inline bool vlbr_exclude_host(void) @@ -866,6 +885,8 @@ static __always_inline u16 get_lbr_cycles(u64 info) return cycles; } +static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS); + static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, struct lbr_entry *entries) { @@ -898,11 +919,67 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, e->abort = !!(info & LBR_INFO_ABORT); e->cycles = get_lbr_cycles(info); e->type = get_lbr_br_type(info); + + /* + * Leverage the reserved field of cpuc->lbr_entries[i] to + * temporarily store the branch counters information. + * The later code will decide what content can be disclosed + * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder(). + */ + e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK; } cpuc->lbr_stack.nr = i; } +/* + * The enabled order may be different from the counter order. + * Update the lbr_counters with the enabled order. + */ +static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i, j, pos = 0, order[X86_PMC_IDX_MAX]; + struct perf_event *leader, *sibling; + u64 src, dst, cnt; + + leader = event->group_leader; + if (branch_sample_counters(leader)) + order[pos++] = leader->hw.idx; + + for_each_sibling_event(sibling, leader) { + if (!branch_sample_counters(sibling)) + continue; + order[pos++] = sibling->hw.idx; + } + + WARN_ON_ONCE(!pos); + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + src = cpuc->lbr_entries[i].reserved; + dst = 0; + for (j = 0; j < pos; j++) { + cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK; + dst |= cnt << j * LBR_INFO_BR_CNTR_BITS; + } + cpuc->lbr_counters[i] = dst; + cpuc->lbr_entries[i].reserved = 0; + } +} + +void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, + struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (is_branch_counters_group(event)) { + intel_pmu_lbr_counters_reorder(cpuc, event); + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters); + return; + } + + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); +} + static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) { intel_pmu_store_lbr(cpuc, NULL); @@ -1173,8 +1250,10 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) for (i = 0; i < cpuc->lbr_stack.nr; ) { if (!cpuc->lbr_entries[i].from) { j = i; - while (++j < cpuc->lbr_stack.nr) + while (++j < cpuc->lbr_stack.nr) { cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; + cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j]; + } cpuc->lbr_stack.nr--; if (!cpuc->lbr_entries[i].from) continue; @@ -1525,8 +1604,12 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_mispred = ecx.split.lbr_mispred; x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; x86_pmu.lbr_br_type = ecx.split.lbr_br_type; + x86_pmu.lbr_counters = ecx.split.lbr_counters; x86_pmu.lbr_nr = lbr_nr; + if (!!x86_pmu.lbr_counters) + x86_pmu.flags |= PMU_FL_BR_CNTR; + if (x86_pmu.lbr_mispred) static_branch_enable(&x86_lbr_mispred); if (x86_pmu.lbr_timed_lbr) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 53dd5d495ba6..fb56518356ec 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +static inline bool is_branch_counters_group(struct perf_event *event) +{ + return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ @@ -283,6 +288,7 @@ struct cpu_hw_events { int lbr_pebs_users; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */ union { struct er_account *lbr_sel; struct er_account *lbr_ctl; @@ -888,6 +894,7 @@ struct x86_pmu { unsigned int lbr_mispred:1; unsigned int lbr_timed_lbr:1; unsigned int lbr_br_type:1; + unsigned int lbr_counters:4; void (*lbr_reset)(void); void (*lbr_read)(struct cpu_hw_events *cpuc); @@ -1012,6 +1019,7 @@ do { \ #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ +#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1552,6 +1560,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); void intel_ds_init(void); +void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, + struct cpu_hw_events *cpuc, + struct perf_event *event); + void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, struct perf_event_pmu_context *next_epc); diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h index a1685981c520..6c977c19f2cd 100644 --- a/arch/x86/events/perf_event_flags.h +++ b/arch/x86/events/perf_event_flags.h @@ -21,3 +21,4 @@ PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */ +PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f8b502867dd1..a5b0a19ccdf2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -236,6 +236,11 @@ #define LBR_INFO_CYCLES 0xffff #define LBR_INFO_BR_TYPE_OFFSET 56 #define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) +#define LBR_INFO_BR_CNTR_OFFSET 32 +#define LBR_INFO_BR_CNTR_NUM 4 +#define LBR_INFO_BR_CNTR_BITS 2 +#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) +#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) #define MSR_ARCH_LBR_CTL 0x000014ce #define ARCH_LBR_CTL_LBREN BIT(0) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2618ec7c3d1d..3736b8a46c04 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -31,6 +31,7 @@ #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35) #define INTEL_FIXED_BITS_MASK 0xFULL #define INTEL_FIXED_BITS_STRIDE 4 @@ -223,6 +224,9 @@ union cpuid28_ecx { unsigned int lbr_timed_lbr:1; /* Branch Type Field Supported */ unsigned int lbr_br_type:1; + unsigned int reserved:13; + /* Branch counters (Event Logging) Supported */ + unsigned int lbr_counters:4; } split; unsigned int full; }; |