From 6c1c07b33eb093e5a2a313ece89baa596ba6135e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 21 Jan 2020 10:13:38 -0800 Subject: perf/x86/intel: Avoid unnecessary PEBS_ENABLE MSR access in PMI The perf PMI handler, intel_pmu_handle_irq(), currently does unnecessary MSR accesses for PEBS_ENABLE MSR in __intel_pmu_enable/disable_all() when PEBS is enabled. When entering the handler, global ctrl is explicitly disabled. All counters do not count anymore. It doesn't matter if PEBS is enabled or not in a PMI handler. Furthermore, for most cases, the cpuc->pebs_enabled is not changed in PMI. The PEBS status doesn't change. The PEBS_ENABLE MSR doesn't need to be changed either when exiting the handler. PMI throttle may change the PEBS status during PMI handler. The x86_pmu_stop() ends up in intel_pmu_pebs_disable() which can update cpuc->pebs_enabled. But the MSR_IA32_PEBS_ENABLE is not updated at the same time. Because the cpuc->enabled has been forced to 0. The patch explicitly update the MSR_IA32_PEBS_ENABLE for this case. Use ftrace to measure the duration of intel_pmu_handle_irq() on BDX. #perf record -e cycles:P -- ./tchain_edit The average duration of intel_pmu_handle_irq(): Without the patch 1.144 us With the patch 1.025 us Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200121181338.3234-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dff6623804c2..332954cccece 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1945,6 +1945,14 @@ static __initconst const u64 knl_hw_cache_extra_regs * intel_bts events don't coexist with intel PMU's BTS events because of * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them * disabled around intel PMU's event batching etc, only inside the PMI handler. + * + * Avoid PEBS_ENABLE MSR access in PMIs. + * The GLOBAL_CTRL has been disabled. All the counters do not count anymore. + * It doesn't matter if the PEBS is enabled or not. + * Usually, the PEBS status are not changed in PMIs. It's unnecessary to + * access PEBS_ENABLE MSR in disable_all()/enable_all(). + * However, there are some cases which may change PEBS status, e.g. PMI + * throttle. The PEBS_ENABLE should be updated where the status changes. */ static void __intel_pmu_disable_all(void) { @@ -1954,13 +1962,12 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - - intel_pmu_pebs_disable_all(); } static void intel_pmu_disable_all(void) { __intel_pmu_disable_all(); + intel_pmu_pebs_disable_all(); intel_pmu_lbr_disable_all(); } @@ -1968,7 +1975,6 @@ static void __intel_pmu_enable_all(int added, bool pmi) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(pmi); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); @@ -1986,6 +1992,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) static void intel_pmu_enable_all(int added) { + intel_pmu_pebs_enable_all(); __intel_pmu_enable_all(added, false); } @@ -2374,9 +2381,21 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * PEBS overflow sets bit 62 in the global status register */ if (__test_and_clear_bit(62, (unsigned long *)&status)) { + u64 pebs_enabled = cpuc->pebs_enabled; + handled++; x86_pmu.drain_pebs(regs); status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + + /* + * PMI throttle may be triggered, which stops the PEBS event. + * Although cpuc->pebs_enabled is updated accordingly, the + * MSR_IA32_PEBS_ENABLE is not updated. Because the + * cpuc->enabled has been forced to 0 in PMI. + * Update the MSR if pebs_enabled is changed. + */ + if (pebs_enabled != cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); } /* -- cgit v1.2.3 From bbfd5e4fab63703375eafaf241a0c696024a59e1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 27 Jan 2020 08:53:54 -0800 Subject: perf/core: Add new branch sample type for HW index of raw branch records The low level index is the index in the underlying hardware buffer of the most recently captured taken branch which is always saved in branch_entries[0]. It is very useful for reconstructing the call stack. For example, in Intel LBR call stack mode, the depth of reconstructed LBR call stack limits to the number of LBR registers. With the low level index information, perf tool may stitch the stacks of two samples. The reconstructed LBR call stack can break the HW limitation. Add a new branch sample type to retrieve low level index of raw branch records. The low level index is between -1 (unknown) and max depth which can be retrieved in /sys/devices/cpu/caps/branches. Only when the new branch sample type is set, the low level index information is dumped into the PERF_SAMPLE_BRANCH_STACK output. Perf tool should check the attr.branch_sample_type, and apply the corresponding format for PERF_SAMPLE_BRANCH_STACK samples. Otherwise, some user case may be broken. For example, users may parse a perf.data, which include the new branch sample type, with an old version perf tool (without the check). Users probably get incorrect information without any warning. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200127165355.27495-2-kan.liang@linux.intel.com --- arch/powerpc/perf/core-book3s.c | 1 + arch/x86/events/intel/lbr.c | 3 +++ include/linux/perf_event.h | 12 ++++++++++++ include/uapi/linux/perf_event.h | 8 +++++++- kernel/events/core.c | 10 ++++++++++ 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3086055bf681..3dcfecf858f3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -518,6 +518,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * } } cpuhw->bhrb_stack.nr = u_index; + cpuhw->bhrb_stack.hw_idx = -1ULL; return; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 534c76606049..7639e2097101 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -585,6 +585,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; + cpuc->lbr_stack.hw_idx = -1ULL; } /* @@ -680,6 +681,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) out++; } cpuc->lbr_stack.nr = out; + cpuc->lbr_stack.hw_idx = -1ULL; } void intel_pmu_lbr_read(void) @@ -1120,6 +1122,7 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr) int i; cpuc->lbr_stack.nr = x86_pmu.lbr_nr; + cpuc->lbr_stack.hw_idx = -1ULL; for (i = 0; i < x86_pmu.lbr_nr; i++) { u64 info = lbr->lbr[i].info; struct perf_branch_entry *e = &cpuc->lbr_entries[i]; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 547773f5894e..68e21e828893 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -93,14 +93,26 @@ struct perf_raw_record { /* * branch stack layout: * nr: number of taken branches stored in entries[] + * hw_idx: The low level index of raw branch records + * for the most recent branch. + * -1ULL means invalid/unknown. * * Note that nr can vary from sample to sample * branches (to, from) are stored from most recent * to least recent, i.e., entries[0] contains the most * recent branch. + * The entries[] is an abstraction of raw branch records, + * which may not be stored in age order in HW, e.g. Intel LBR. + * The hw_idx is to expose the low level index of raw + * branch record for the most recent branch aka entries[0]. + * The hw_idx index is between -1 (unknown) and max depth, + * which can be retrieved in /sys/devices/cpu/caps/branches. + * For the architectures whose raw branch records are + * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { __u64 nr; + __u64 hw_idx; struct perf_branch_entry entries[0]; }; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 377d794d3105..397cfd65b3fe 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -208,6 +210,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -853,7 +857,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 nr; - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX + * { u64 from, to, flags } lbr[nr]; + * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER diff --git a/kernel/events/core.c b/kernel/events/core.c index e453589da97c..3f1f77de7247 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6555,6 +6555,11 @@ static void perf_output_read(struct perf_output_handle *handle, perf_output_read_one(handle, event, enabled, running); } +static inline bool perf_sample_save_hw_index(struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, @@ -6643,6 +6648,8 @@ void perf_output_sample(struct perf_output_handle *handle, * sizeof(struct perf_branch_entry); perf_output_put(handle, data->br_stack->nr); + if (perf_sample_save_hw_index(event)) + perf_output_put(handle, data->br_stack->hw_idx); perf_output_copy(handle, data->br_stack->entries, size); } else { /* @@ -6836,6 +6843,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_BRANCH_STACK) { int size = sizeof(u64); /* nr */ if (data->br_stack) { + if (perf_sample_save_hw_index(event)) + size += sizeof(u64); + size += data->br_stack->nr * sizeof(struct perf_branch_entry); } -- cgit v1.2.3 From db278b90c326ce5895be09b6171f5ff3df1e3cca Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 27 Jan 2020 08:53:55 -0800 Subject: perf/x86/intel: Output LBR TOS information correctly For Intel LBR, the LBR Top-of-Stack (TOS) information is the HW index of raw branch record for the most recent branch. For non-adaptive PEBS and non-PEBS, the TOS information can be directly retrieved from TOS MSR read in intel_pmu_lbr_read(). For adaptive PEBS, the LBR information stored in PEBS record doesn't include the TOS information. For single PEBS, TOS can be directly read from MSR, because the PMI is triggered immediately after PEBS is written. TOS MSR is still unchanged. For large PEBS, TOS MSR has stale value. Set -1ULL to indicate that the TOS information is not available. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200127165355.27495-3-kan.liang@linux.intel.com --- arch/x86/events/intel/lbr.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 7639e2097101..65113b16804a 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -585,7 +585,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; - cpuc->lbr_stack.hw_idx = -1ULL; + cpuc->lbr_stack.hw_idx = tos; } /* @@ -681,7 +681,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) out++; } cpuc->lbr_stack.nr = out; - cpuc->lbr_stack.hw_idx = -1ULL; + cpuc->lbr_stack.hw_idx = tos; } void intel_pmu_lbr_read(void) @@ -1122,7 +1122,13 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr) int i; cpuc->lbr_stack.nr = x86_pmu.lbr_nr; - cpuc->lbr_stack.hw_idx = -1ULL; + + /* Cannot get TOS for large PEBS */ + if (cpuc->n_pebs == cpuc->n_large_pebs) + cpuc->lbr_stack.hw_idx = -1ULL; + else + cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { u64 info = lbr->lbr[i].info; struct perf_branch_entry *e = &cpuc->lbr_entries[i]; -- cgit v1.2.3 From fdb64822443ec9fb8c3a74b598a74790ae8d2e22 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 6 Feb 2020 08:15:27 -0800 Subject: perf/x86: Add Intel Tiger Lake uncore support For MSR type of uncore units, there is no difference between Ice Lake and Tiger Lake. Share the same code with Ice Lake. Tiger Lake has two MCs. Both of them are located at 0:0:0. The BAR offset is still 0x48. The offset of the two MCs is 0x10000. Each MC has three counters to count every read/write/total issued by the Memory Controller to DRAM. The counters can be accessed by MMIO. They are free-running counters. The offset of counters are different for TIGERLAKE_L and TIGERLAKE. Add separated mmio_init() functions. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/20200206161527.3529-1-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 12 +++ arch/x86/events/intel/uncore.h | 2 + arch/x86/events/intel/uncore_snb.c | 159 +++++++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86467f85c383..63922e3a34f5 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1470,6 +1470,16 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { + .cpu_init = icl_uncore_cpu_init, + .mmio_init = tgl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { + .cpu_init = icl_uncore_cpu_init, + .mmio_init = tgl_l_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun snr_uncore_init __initconst = { .cpu_init = snr_uncore_cpu_init, .pci_init = snr_uncore_pci_init, @@ -1505,6 +1515,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_TIGERLAKE_L, tgl_l_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_TIGERLAKE, tgl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index bbfdaa720b45..1204dcc9fe9b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -527,6 +527,8 @@ void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); +void tgl_uncore_mmio_init(void); +void tgl_l_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index c37cb12d0ef6..3de1065eefc4 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -44,6 +44,11 @@ #define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 +#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02 +#define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04 +#define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12 +#define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14 +#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36 /* SNB event control */ @@ -1002,3 +1007,157 @@ void nhm_uncore_cpu_init(void) } /* end of Nehalem uncore support */ + +/* Tiger Lake MMIO uncore support */ + +static const struct pci_device_id tgl_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ } +}; + +enum perf_tgl_uncore_imc_freerunning_types { + TGL_MMIO_UNCORE_IMC_DATA_TOTAL, + TGL_MMIO_UNCORE_IMC_DATA_READ, + TGL_MMIO_UNCORE_IMC_DATA_WRITE, + TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 }, +}; + +static struct freerunning_counters tgl_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 }, +}; + +static struct uncore_event_desc tgl_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"), + + { /* end: all zeroes */ } +}; + +static struct pci_dev *tgl_uncore_get_mc_dev(void) +{ + const struct pci_device_id *ids = tgl_uncore_pci_ids; + struct pci_dev *mc_dev = NULL; + + while (ids && ids->vendor) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL); + if (mc_dev) + return mc_dev; + ids++; + } + + return mc_dev; +} + +#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 + +static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = tgl_uncore_get_mc_dev(); + struct intel_uncore_pmu *pmu = box->pmu; + resource_size_t addr; + u32 mch_bar; + + if (!pdev) { + pr_warn("perf uncore: Cannot find matched IMC device.\n"); + return; + } + + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); + /* MCHBAR is disabled */ + if (!(mch_bar & BIT(0))) { + pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + return; + } + mch_bar &= ~BIT(0); + addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); + addr |= ((resource_size_t)mch_bar << 32); +#endif + + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); +} + +static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { + .init_box = tgl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct attribute *tgl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL +}; + +static const struct attribute_group tgl_uncore_imc_format_group = { + .name = "format", + .attrs = tgl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type tgl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .freerunning = tgl_uncore_imc_freerunning, + .ops = &tgl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *tgl_mmio_uncores[] = { + &tgl_uncore_imc_free_running, + NULL +}; + +void tgl_l_uncore_mmio_init(void) +{ + tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning; + uncore_mmio_uncores = tgl_mmio_uncores; +} + +void tgl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = tgl_mmio_uncores; +} + +/* end of Tiger Lake MMIO uncore support */ -- cgit v1.2.3 From c395c3553d6870541f1c283479aea2a6f26364d5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sun, 23 Feb 2020 11:34:49 -0800 Subject: perf diff: Fix undefined string comparison spotted by clang's -Wstring-compare clang warns: util/block-info.c:298:18: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:51: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:18: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:51: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/map.c:434:15: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if (srcline != SRCLINE_UNKNOWN) ^ ~~~~~~~~~~~~~~~ Reviewer Notes: Looks good to me. Some more context: https://clang.llvm.org/docs/DiagnosticsReference.html#wstring-compare The spec says: J.1 Unspecified behavior The following are unspecified: .. Whether two string literals result in distinct arrays (6.4.5). Signed-off-by: Nick Desaulniers Reviewed-by: Ian Rogers Cc: Alexander Shishkin Cc: Changbin Du Cc: Jin Yao Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/900 Link: http://lore.kernel.org/lkml/20200223193456.25291-1-nick.desaulniers@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 3 ++- tools/perf/util/block-info.c | 3 ++- tools/perf/util/map.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8b6ae557d8b..c03c36fde7e2 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1312,7 +1312,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", start_line, end_line, block_he->diff.cycles); } else { diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index c4b030bf6ec2..fbbb6d640dad 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -295,7 +295,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s]", start_line, end_line); } else { diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index a08ca276098e..95428511300d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -431,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { char *srcline = map__srcline(map, addr, NULL); - if (srcline != SRCLINE_UNKNOWN) + if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); } -- cgit v1.2.3 From 7982a898515064ba180d958bfc89ed22d13905ee Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 27 Jan 2020 19:00:31 +0900 Subject: tools lib api fs: Move cgroupsfs_find_mountpoint() Move it from tools/perf/util/cgroup.c as it can be used by other places. Note that cgroup filesystem is different from others since it's usually mounted separately (in v1) for each subsystem. I just copied the code with a little modification to pass a name of subsystem. Suggested-by: Jiri Olsa Signed-off-by: Namhyung Kim Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200127100031.1368732-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/Build | 1 + tools/lib/api/fs/cgroup.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/api/fs/fs.h | 2 ++ tools/perf/util/cgroup.c | 63 ++------------------------------------------ 4 files changed, 72 insertions(+), 61 deletions(-) create mode 100644 tools/lib/api/fs/cgroup.c diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build index f4ed9629ae85..0f75b28654de 100644 --- a/tools/lib/api/fs/Build +++ b/tools/lib/api/fs/Build @@ -1,2 +1,3 @@ libapi-y += fs.o libapi-y += tracing_path.o +libapi-y += cgroup.o diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c new file mode 100644 index 000000000000..889a6eb4aaca --- /dev/null +++ b/tools/lib/api/fs/cgroup.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include "fs.h" + +int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) +{ + FILE *fp; + char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; + char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; + char *token, *saved_ptr = NULL; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return -1; + + /* + * in order to handle split hierarchy, we need to scan /proc/mounts + * and inspect every cgroupfs mount point to find one that has + * perf_event subsystem + */ + path_v1[0] = '\0'; + path_v2[0] = '\0'; + + while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" + __stringify(PATH_MAX)"s %*d %*d\n", + mountpoint, type, tokens) == 3) { + + if (!path_v1[0] && !strcmp(type, "cgroup")) { + + token = strtok_r(tokens, ",", &saved_ptr); + + while (token != NULL) { + if (subsys && !strcmp(token, subsys)) { + strcpy(path_v1, mountpoint); + break; + } + token = strtok_r(NULL, ",", &saved_ptr); + } + } + + if (!path_v2[0] && !strcmp(type, "cgroup2")) + strcpy(path_v2, mountpoint); + + if (path_v1[0] && path_v2[0]) + break; + } + fclose(fp); + + if (path_v1[0]) + path = path_v1; + else if (path_v2[0]) + path = path_v2; + else + return -1; + + if (strlen(path) < maxlen) { + strcpy(buf, path); + return 0; + } + return -1; +} diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 92d03b8396b1..936edb95e1f3 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -28,6 +28,8 @@ FS(bpf_fs) #undef FS +int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys); + int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); int filename__read_xll(const char *filename, unsigned long long *value); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 4881d4af3381..5bc9d3b01bd9 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -3,75 +3,16 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" -#include #include #include #include #include #include #include +#include int nr_cgroups; -static int -cgroupfs_find_mountpoint(char *buf, size_t maxlen) -{ - FILE *fp; - char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; - char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; - char *token, *saved_ptr = NULL; - - fp = fopen("/proc/mounts", "r"); - if (!fp) - return -1; - - /* - * in order to handle split hierarchy, we need to scan /proc/mounts - * and inspect every cgroupfs mount point to find one that has - * perf_event subsystem - */ - path_v1[0] = '\0'; - path_v2[0] = '\0'; - - while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" - __stringify(PATH_MAX)"s %*d %*d\n", - mountpoint, type, tokens) == 3) { - - if (!path_v1[0] && !strcmp(type, "cgroup")) { - - token = strtok_r(tokens, ",", &saved_ptr); - - while (token != NULL) { - if (!strcmp(token, "perf_event")) { - strcpy(path_v1, mountpoint); - break; - } - token = strtok_r(NULL, ",", &saved_ptr); - } - } - - if (!path_v2[0] && !strcmp(type, "cgroup2")) - strcpy(path_v2, mountpoint); - - if (path_v1[0] && path_v2[0]) - break; - } - fclose(fp); - - if (path_v1[0]) - path = path_v1; - else if (path_v2[0]) - path = path_v2; - else - return -1; - - if (strlen(path) < maxlen) { - strcpy(buf, path); - return 0; - } - return -1; -} - static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; @@ -79,7 +20,7 @@ static int open_cgroup(const char *name) int fd; - if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event")) return -1; scnprintf(path, PATH_MAX, "%s/%s", mnt, name); -- cgit v1.2.3 From 1af62ce61cd80a25590d5abeccfb5c3a666527dd Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 14 Feb 2020 16:04:52 +0800 Subject: perf stat: Show percore counts in per CPU output We have supported the event modifier "percore" which sums up the event counts for all hardware threads in a core and show the counts per core. For example, # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1 Performance counter stats for 'system wide': S0-D0-C0 395,072 cpu/event=cpu-cycles,percore/ S0-D0-C1 851,248 cpu/event=cpu-cycles,percore/ S0-D0-C2 954,226 cpu/event=cpu-cycles,percore/ S0-D0-C3 1,233,659 cpu/event=cpu-cycles,percore/ This patch provides a new option "--percore-show-thread". It is used with event modifier "percore" together to sum up the event counts for all hardware threads in a core but show the counts per hardware thread. This is essentially a replacement for the any bit (which is gone in Icelake). Per core counts are useful for some formulas, e.g. CoreIPC. The original percore version was inconvenient to post process. This variant matches the output of the any bit. With this patch, for example, # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1 Performance counter stats for 'system wide': CPU0 2,453,061 cpu/event=cpu-cycles,percore/ CPU1 1,823,921 cpu/event=cpu-cycles,percore/ CPU2 1,383,166 cpu/event=cpu-cycles,percore/ CPU3 1,102,652 cpu/event=cpu-cycles,percore/ CPU4 2,453,061 cpu/event=cpu-cycles,percore/ CPU5 1,823,921 cpu/event=cpu-cycles,percore/ CPU6 1,383,166 cpu/event=cpu-cycles,percore/ CPU7 1,102,652 cpu/event=cpu-cycles,percore/ We can see counts are duplicated in CPU pairs (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7). The interval mode also works. For example, # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -I 1000 # time CPU counts unit events 1.000425421 CPU0 925,032 cpu/event=cpu-cycles,percore/ 1.000425421 CPU1 430,202 cpu/event=cpu-cycles,percore/ 1.000425421 CPU2 436,843 cpu/event=cpu-cycles,percore/ 1.000425421 CPU3 1,192,504 cpu/event=cpu-cycles,percore/ 1.000425421 CPU4 925,032 cpu/event=cpu-cycles,percore/ 1.000425421 CPU5 430,202 cpu/event=cpu-cycles,percore/ 1.000425421 CPU6 436,843 cpu/event=cpu-cycles,percore/ 1.000425421 CPU7 1,192,504 cpu/event=cpu-cycles,percore/ If we offline CPU5, the result is: # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1 Performance counter stats for 'system wide': CPU0 2,752,148 cpu/event=cpu-cycles,percore/ CPU1 1,009,312 cpu/event=cpu-cycles,percore/ CPU2 2,784,072 cpu/event=cpu-cycles,percore/ CPU3 2,427,922 cpu/event=cpu-cycles,percore/ CPU4 2,752,148 cpu/event=cpu-cycles,percore/ CPU6 2,784,072 cpu/event=cpu-cycles,percore/ CPU7 2,427,922 cpu/event=cpu-cycles,percore/ 1.001416041 seconds time elapsed v4: --- Ravi Bangoria reports an issue in v3. Once we offline a CPU, the output is not correct. The issue is we should use the cpu idx in print_percore_thread rather than using the cpu value. v3: --- 1. Fix the interval mode output error 2. Use cpu value (not cpu index) in config->aggr_get_id(). 3. Refine the code according to Jiri's comments. v2: --- Add the explanation in change log. This is essentially a replacement for the any bit. No code change. Signed-off-by: Jin Yao Tested-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200214080452.26402-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 9 +++++++++ tools/perf/builtin-stat.c | 4 ++++ tools/perf/util/stat-display.c | 33 ++++++++++++++++++++++++++++----- tools/perf/util/stat.h | 1 + 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9431b8066fb4..4d56586b2fb9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -334,6 +334,15 @@ Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--percore-show-thread:: +The event modifier "percore" has supported to sum up the event counts +for all hardware threads in a core and show the counts per core. + +This option with event modifier "percore" enabled also sums up the event +counts for all hardware threads in a core but show the sum counts per +hardware thread. This is essentially a replacement for the any bit and +convenient for post processing. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a098c2ebf4ea..ec053dc1e35c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -929,6 +929,10 @@ static struct option stat_options[] = { OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, "Configure all used events to run in user space.", PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread, + "Use with 'percore' event qualifier to show the event " + "counts of one hardware thread by sum up total hardware " + "threads of same physical core"), OPT_END() }; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bc31fccc0057..d89cb0da90f8 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config, config->csv_sep); break; case AGGR_NONE: - if (evsel->percore) { + if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), cpu_map__id_to_die(id), @@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first) + bool *first, int cpu) { struct aggr_data ad; FILE *output = config->output; @@ -654,7 +654,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, + printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); @@ -687,7 +687,7 @@ static void print_aggr(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first); + &first, -1); } if (metric_only) fputc('\n', output); @@ -1146,6 +1146,26 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } +static void print_percore_thread(struct perf_stat_config *config, + struct evsel *counter, char *prefix) +{ + int s, s2, id; + bool first = true; + + for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { + s2 = config->aggr_get_id(config, evsel__cpus(counter), i); + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; + if (s2 == id) + break; + } + + print_counter_aggrdata(config, counter, s, + prefix, false, + &first, i); + } +} + static void print_percore(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1157,13 +1177,16 @@ static void print_percore(struct perf_stat_config *config, if (!(config->aggr_map || config->aggr_get_id)) return; + if (config->percore_show_thread) + return print_percore_thread(config, counter, prefix); + for (s = 0; s < config->aggr_map->nr; s++) { if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first); + &first, -1); } if (metric_only) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fb990efa54a8..b4fdfaa7f2c0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -109,6 +109,7 @@ struct perf_stat_config { bool walltime_run_table; bool all_kernel; bool all_user; + bool percore_show_thread; FILE *output; unsigned int interval; unsigned int timeout; -- cgit v1.2.3 From 357a5d24c4715d454f8acd6796e2adaaef0c48ed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Feb 2020 11:43:53 -0300 Subject: perf llvm: Add debug hint message about missing kernel-devel package To help in debugging, add this extra message: detect_kbuild_dir: Couldn't find "/lib/modules/5.4.20-200.fc31.x86_64/build/include/generated/autoconf.h", missing kernel-devel package?. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index b5af680fc667..dbdffb6673fe 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir) return -ENOMEM; return 0; } + pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", + __func__, autoconf_path); free(autoconf_path); return -ENOENT; } -- cgit v1.2.3 From dabce16bd2926b82ef1bef70acd8b24828e9448b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 4 Feb 2020 10:22:33 +0530 Subject: perf annotate: Get rid of annotation->nr_jumps The 'nr_jumps' field in 'struct annotation' is not used since it's inception in commit 2402e4a936a0 ("perf annotate browser: Show 'jumpy' functions"). Get rid of it. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Ian Rogers Cc: Jin Yao Cc: Namhyung Kim Cc: Song Liu Link: http://lore.kernel.org/lkml/20200204045233.474937-7-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 -- tools/perf/util/annotate.h | 1 - 2 files changed, 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0ea95be84b3b..f1ea0d61eb5b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2611,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (++al->jump_sources > notes->max_jump_sources) notes->max_jump_sources = al->jump_sources; - - ++notes->nr_jumps; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 001258601a37..07c775938d46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -279,7 +279,6 @@ struct annotation { struct annotation_options *options; struct annotation_line **offsets; int nr_events; - int nr_jumps; int max_jump_sources; int nr_entries; int nr_asm_entries; -- cgit v1.2.3 From 76ce02651dabee19df018097db4485800e05e177 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Thu, 27 Feb 2020 20:44:24 +0100 Subject: libperf: Add counting example Current libperf man pages mention file counting.c "coming with libperf package", however, the file is missing. Add the file then. Fixes: 81de3bf37a8b ("libperf: Add man pages") Signed-off-by: Michael Petlan Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra LPU-Reference: 20200227194424.28210-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/examples/counting.c | 83 ++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tools/lib/perf/Documentation/examples/counting.c diff --git a/tools/lib/perf/Documentation/examples/counting.c b/tools/lib/perf/Documentation/examples/counting.c new file mode 100644 index 000000000000..6085693571ef --- /dev/null +++ b/tools/lib/perf/Documentation/examples/counting.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + +int main(int argc, char **argv) +{ + int count = 100000, err = 0; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_thread_map *threads; + struct perf_counts_values counts; + + struct perf_event_attr attr1 = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + struct perf_event_attr attr2 = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + + libperf_init(libperf_print); + threads = perf_thread_map__new_dummy(); + if (!threads) { + fprintf(stderr, "failed to create threads\n"); + return -1; + } + perf_thread_map__set_pid(threads, 0, 0); + evlist = perf_evlist__new(); + if (!evlist) { + fprintf(stderr, "failed to create evlist\n"); + goto out_threads; + } + evsel = perf_evsel__new(&attr1); + if (!evsel) { + fprintf(stderr, "failed to create evsel1\n"); + goto out_evlist; + } + perf_evlist__add(evlist, evsel); + evsel = perf_evsel__new(&attr2); + if (!evsel) { + fprintf(stderr, "failed to create evsel2\n"); + goto out_evlist; + } + perf_evlist__add(evlist, evsel); + perf_evlist__set_maps(evlist, NULL, threads); + err = perf_evlist__open(evlist); + if (err) { + fprintf(stderr, "failed to open evsel\n"); + goto out_evlist; + } + perf_evlist__enable(evlist); + while (count--); + perf_evlist__disable(evlist); + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts); + fprintf(stdout, "count %llu, enabled %llu, run %llu\n", + counts.val, counts.ena, counts.run); + } + perf_evlist__close(evlist); +out_evlist: + perf_evlist__delete(evlist); +out_threads: + perf_thread_map__put(threads); + return err; +} -- cgit v1.2.3 From 401d61cbd4d4c6b44b2a895ab4073c7f214c096b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 3 Mar 2020 23:18:52 -0500 Subject: tools lib traceevent: Remove extra '\n' in print_event_time() If the precision of print_event_time() is zero or greater than the timestamp, it uses a different format. But that format had an extra new line at the end, and caused the output to not look right: cpus=2 sleep-3946 [001]111264306005 : function: inotify_inode_queue_event sleep-3946 [001]111264307158 : function: __fsnotify_parent sleep-3946 [001]111264307637 : function: inotify_dentry_parent_queue_event sleep-3946 [001]111264307989 : function: fsnotify sleep-3946 [001]111264308401 : function: audit_syscall_exit Fixes: 38847db9740a ("libtraceevent, perf tools: Changes in tep_print_event_* APIs") Signed-off-by: Steven Rostedt (VMware) Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200303231852.6ab6882f@oasis.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index beaa8b8c08ff..e1bd2a93c6db 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5541,7 +5541,7 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s, if (p10 > 1 && p10 < time) trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); else - trace_seq_printf(s, "%12llu\n", time); + trace_seq_printf(s, "%12llu", time); } struct print_event_type { -- cgit v1.2.3 From 6339998d22ecae5d6435dd87b4904ff6e16bfe56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Mar 2020 10:50:58 -0300 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h To get the changes in: bbfd5e4fab63 ("perf/core: Add new branch sample type for HW index of raw branch records") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h This update is a prerequisite to adding support for the HW index of raw branch records. Acked-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra (Intel) Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200304134902.GB12612@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 377d794d3105..397cfd65b3fe 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -208,6 +210,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -853,7 +857,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 nr; - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX + * { u64 from, to, flags } lbr[nr]; + * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- cgit v1.2.3 From ab6f824cfdf7363b5e529621cbc72ae6519c78d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Aug 2019 11:17:00 +0200 Subject: perf/core: Unify {pinned,flexible}_sched_in() Less is more; unify the two very nearly identical function. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- kernel/events/core.c | 58 +++++++++++++++++++--------------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3f1f77de7247..b713080eedcc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1986,6 +1986,12 @@ static int perf_get_aux_event(struct perf_event *event, return 1; } +static inline struct list_head *get_event_list(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active; +} + static void perf_group_detach(struct perf_event *event) { struct perf_event *sibling, *tmp; @@ -2028,12 +2034,8 @@ static void perf_group_detach(struct perf_event *event) if (!RB_EMPTY_NODE(&event->group_node)) { add_event_to_groups(sibling, event->ctx); - if (sibling->state == PERF_EVENT_STATE_ACTIVE) { - struct list_head *list = sibling->attr.pinned ? - &ctx->pinned_active : &ctx->flexible_active; - - list_add_tail(&sibling->active_list, list); - } + if (sibling->state == PERF_EVENT_STATE_ACTIVE) + list_add_tail(&sibling->active_list, get_event_list(sibling)); } WARN_ON_ONCE(sibling->ctx != event->ctx); @@ -2350,6 +2352,8 @@ event_sched_in(struct perf_event *event, { int ret = 0; + WARN_ON_ONCE(event->ctx != ctx); + lockdep_assert_held(&ctx->lock); if (event->state <= PERF_EVENT_STATE_OFF) @@ -3425,10 +3429,12 @@ struct sched_in_data { int can_add_hw; }; -static int pinned_sched_in(struct perf_event *event, void *data) +static int merge_sched_in(struct perf_event *event, void *data) { struct sched_in_data *sid = data; + WARN_ON_ONCE(event->ctx != sid->ctx); + if (event->state <= PERF_EVENT_STATE_OFF) return 0; @@ -3437,37 +3443,15 @@ static int pinned_sched_in(struct perf_event *event, void *data) if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { if (!group_sched_in(event, sid->cpuctx, sid->ctx)) - list_add_tail(&event->active_list, &sid->ctx->pinned_active); + list_add_tail(&event->active_list, get_event_list(event)); } - /* - * If this pinned group hasn't been scheduled, - * put it in error state. - */ - if (event->state == PERF_EVENT_STATE_INACTIVE) - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); - - return 0; -} - -static int flexible_sched_in(struct perf_event *event, void *data) -{ - struct sched_in_data *sid = data; - - if (event->state <= PERF_EVENT_STATE_OFF) - return 0; - - if (!event_filter_match(event)) - return 0; + if (event->state == PERF_EVENT_STATE_INACTIVE) { + if (event->attr.pinned) + perf_event_set_state(event, PERF_EVENT_STATE_ERROR); - if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { - int ret = group_sched_in(event, sid->cpuctx, sid->ctx); - if (ret) { - sid->can_add_hw = 0; - sid->ctx->rotate_necessary = 1; - return 0; - } - list_add_tail(&event->active_list, &sid->ctx->flexible_active); + sid->can_add_hw = 0; + sid->ctx->rotate_necessary = 1; } return 0; @@ -3485,7 +3469,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, visit_groups_merge(&ctx->pinned_groups, smp_processor_id(), - pinned_sched_in, &sid); + merge_sched_in, &sid); } static void @@ -3500,7 +3484,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, visit_groups_merge(&ctx->flexible_groups, smp_processor_id(), - flexible_sched_in, &sid); + merge_sched_in, &sid); } static void -- cgit v1.2.3 From 2c2366c7548ecee65adfd264517ddf50f9e2d029 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Aug 2019 11:45:01 +0200 Subject: perf/core: Remove 'struct sched_in_data' We can deduce the ctx and cpuctx from the event, no need to pass them along. Remove the structure and pass in can_add_hw directly. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- kernel/events/core.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b713080eedcc..b7eaabaee76f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3423,17 +3423,11 @@ static int visit_groups_merge(struct perf_event_groups *groups, int cpu, return 0; } -struct sched_in_data { - struct perf_event_context *ctx; - struct perf_cpu_context *cpuctx; - int can_add_hw; -}; - static int merge_sched_in(struct perf_event *event, void *data) { - struct sched_in_data *sid = data; - - WARN_ON_ONCE(event->ctx != sid->ctx); + struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + int *can_add_hw = data; if (event->state <= PERF_EVENT_STATE_OFF) return 0; @@ -3441,8 +3435,8 @@ static int merge_sched_in(struct perf_event *event, void *data) if (!event_filter_match(event)) return 0; - if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { - if (!group_sched_in(event, sid->cpuctx, sid->ctx)) + if (group_can_go_on(event, cpuctx, *can_add_hw)) { + if (!group_sched_in(event, cpuctx, ctx)) list_add_tail(&event->active_list, get_event_list(event)); } @@ -3450,8 +3444,8 @@ static int merge_sched_in(struct perf_event *event, void *data) if (event->attr.pinned) perf_event_set_state(event, PERF_EVENT_STATE_ERROR); - sid->can_add_hw = 0; - sid->ctx->rotate_necessary = 1; + *can_add_hw = 0; + ctx->rotate_necessary = 1; } return 0; @@ -3461,30 +3455,22 @@ static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx) { - struct sched_in_data sid = { - .ctx = ctx, - .cpuctx = cpuctx, - .can_add_hw = 1, - }; + int can_add_hw = 1; visit_groups_merge(&ctx->pinned_groups, smp_processor_id(), - merge_sched_in, &sid); + merge_sched_in, &can_add_hw); } static void ctx_flexible_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx) { - struct sched_in_data sid = { - .ctx = ctx, - .cpuctx = cpuctx, - .can_add_hw = 1, - }; + int can_add_hw = 1; visit_groups_merge(&ctx->flexible_groups, smp_processor_id(), - merge_sched_in, &sid); + merge_sched_in, &can_add_hw); } static void -- cgit v1.2.3 From 98add2af89bbfe8241e189b490fd91e5751c7900 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 13 Feb 2020 23:51:28 -0800 Subject: perf/cgroup: Reorder perf_cgroup_connect() Move perf_cgroup_connect() after perf_event_alloc(), such that we can find/use the PMU's cpu context. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-2-irogers@google.com --- kernel/events/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b7eaabaee76f..dceeeb1d012a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10774,12 +10774,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!has_branch_stack(event)) event->attr.branch_sample_type = 0; - if (cgroup_fd != -1) { - err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); - if (err) - goto err_ns; - } - pmu = perf_init_event(event); if (IS_ERR(pmu)) { err = PTR_ERR(pmu); @@ -10801,6 +10795,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_pmu; } + if (cgroup_fd != -1) { + err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); + if (err) + goto err_pmu; + } + err = exclusive_event_init(event); if (err) goto err_pmu; @@ -10861,12 +10861,12 @@ err_per_task: exclusive_event_destroy(event); err_pmu: + if (is_cgroup_event(event)) + perf_detach_cgroup(event); if (event->destroy) event->destroy(event); module_put(pmu->module); err_ns: - if (is_cgroup_event(event)) - perf_detach_cgroup(event); if (event->ns) put_pid_ns(event->ns); if (event->hw.target) -- cgit v1.2.3 From 6e24628d78e4785385876125cba62315ca3b04b9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:29 -0800 Subject: lib: Introduce generic min-heap Supports push, pop and converting an array into a heap. If the sense of the compare function is inverted then it can provide a max-heap. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-3-irogers@google.com --- include/linux/min_heap.h | 134 ++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 10 +++ lib/Makefile | 1 + lib/test_min_heap.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 339 insertions(+) create mode 100644 include/linux/min_heap.h create mode 100644 lib/test_min_heap.c diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h new file mode 100644 index 000000000000..44077837385f --- /dev/null +++ b/include/linux/min_heap.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MIN_HEAP_H +#define _LINUX_MIN_HEAP_H + +#include +#include +#include + +/** + * struct min_heap - Data structure to hold a min-heap. + * @data: Start of array holding the heap elements. + * @nr: Number of elements currently in the heap. + * @size: Maximum number of elements that can be held in current storage. + */ +struct min_heap { + void *data; + int nr; + int size; +}; + +/** + * struct min_heap_callbacks - Data/functions to customise the min_heap. + * @elem_size: The nr of each element in bytes. + * @less: Partial order function for this heap. + * @swp: Swap elements function. + */ +struct min_heap_callbacks { + int elem_size; + bool (*less)(const void *lhs, const void *rhs); + void (*swp)(void *lhs, void *rhs); +}; + +/* Sift the element at pos down the heap. */ +static __always_inline +void min_heapify(struct min_heap *heap, int pos, + const struct min_heap_callbacks *func) +{ + void *left, *right, *parent, *smallest; + void *data = heap->data; + + for (;;) { + if (pos * 2 + 1 >= heap->nr) + break; + + left = data + ((pos * 2 + 1) * func->elem_size); + parent = data + (pos * func->elem_size); + smallest = parent; + if (func->less(left, smallest)) + smallest = left; + + if (pos * 2 + 2 < heap->nr) { + right = data + ((pos * 2 + 2) * func->elem_size); + if (func->less(right, smallest)) + smallest = right; + } + if (smallest == parent) + break; + func->swp(smallest, parent); + if (smallest == left) + pos = (pos * 2) + 1; + else + pos = (pos * 2) + 2; + } +} + +/* Floyd's approach to heapification that is O(nr). */ +static __always_inline +void min_heapify_all(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + int i; + + for (i = heap->nr / 2; i >= 0; i--) + min_heapify(heap, i, func); +} + +/* Remove minimum element from the heap, O(log2(nr)). */ +static __always_inline +void min_heap_pop(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) + return; + + /* Place last element at the root (position 0) and then sift down. */ + heap->nr--; + memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); + min_heapify(heap, 0, func); +} + +/* + * Remove the minimum element and then push the given element. The + * implementation performs 1 sift (O(log2(nr))) and is therefore more + * efficient than a pop followed by a push that does 2. + */ +static __always_inline +void min_heap_pop_push(struct min_heap *heap, + const void *element, + const struct min_heap_callbacks *func) +{ + memcpy(heap->data, element, func->elem_size); + min_heapify(heap, 0, func); +} + +/* Push an element on to the heap, O(log2(nr)). */ +static __always_inline +void min_heap_push(struct min_heap *heap, const void *element, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + void *child, *parent; + int pos; + + if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) + return; + + /* Place at the end of data. */ + pos = heap->nr; + memcpy(data + (pos * func->elem_size), element, func->elem_size); + heap->nr++; + + /* Sift child at pos up. */ + for (; pos > 0; pos = (pos - 1) / 2) { + child = data + (pos * func->elem_size); + parent = data + ((pos - 1) / 2) * func->elem_size; + if (func->less(parent, child)) + break; + func->swp(parent, child); + } +} + +#endif /* _LINUX_MIN_HEAP_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 69def4a9df00..f04b61c1a1cc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1769,6 +1769,16 @@ config TEST_LIST_SORT If unsure, say N. +config TEST_MIN_HEAP + tristate "Min heap test" + depends on DEBUG_KERNEL || m + help + Enable this to turn on min heap function tests. This test is + executed only once during system boot (so affects only boot time), + or at module load time. + + If unsure, say N. + config TEST_SORT tristate "Array-based sort test" depends on DEBUG_KERNEL || m diff --git a/lib/Makefile b/lib/Makefile index 611872c06926..09a8acb0cf92 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -67,6 +67,7 @@ CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla) UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o +obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c new file mode 100644 index 000000000000..d19c8080fd4d --- /dev/null +++ b/lib/test_min_heap.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) "min_heap_test: " fmt + +/* + * Test cases for the min max heap. + */ + +#include +#include +#include +#include +#include + +static __init bool less_than(const void *lhs, const void *rhs) +{ + return *(int *)lhs < *(int *)rhs; +} + +static __init bool greater_than(const void *lhs, const void *rhs) +{ + return *(int *)lhs > *(int *)rhs; +} + +static __init void swap_ints(void *lhs, void *rhs) +{ + int temp = *(int *)lhs; + + *(int *)lhs = *(int *)rhs; + *(int *)rhs = temp; +} + +static __init int pop_verify_heap(bool min_heap, + struct min_heap *heap, + const struct min_heap_callbacks *funcs) +{ + int *values = heap->data; + int err = 0; + int last; + + last = values[0]; + min_heap_pop(heap, funcs); + while (heap->nr > 0) { + if (min_heap) { + if (last > values[0]) { + pr_err("error: expected %d <= %d\n", last, + values[0]); + err++; + } + } else { + if (last < values[0]) { + pr_err("error: expected %d >= %d\n", last, + values[0]); + err++; + } + } + last = values[0]; + min_heap_pop(heap, funcs); + } + return err; +} + +static __init int test_heapify_all(bool min_heap) +{ + int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0, + -3, -1, -2, -4, 0x8000000, 0x7FFFFFF }; + struct min_heap heap = { + .data = values, + .nr = ARRAY_SIZE(values), + .size = ARRAY_SIZE(values), + }; + struct min_heap_callbacks funcs = { + .elem_size = sizeof(int), + .less = min_heap ? less_than : greater_than, + .swp = swap_ints, + }; + int i, err; + + /* Test with known set of values. */ + min_heapify_all(&heap, &funcs); + err = pop_verify_heap(min_heap, &heap, &funcs); + + + /* Test with randomly generated values. */ + heap.nr = ARRAY_SIZE(values); + for (i = 0; i < heap.nr; i++) + values[i] = get_random_int(); + + min_heapify_all(&heap, &funcs); + err += pop_verify_heap(min_heap, &heap, &funcs); + + return err; +} + +static __init int test_heap_push(bool min_heap) +{ + const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, + -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; + int values[ARRAY_SIZE(data)]; + struct min_heap heap = { + .data = values, + .nr = 0, + .size = ARRAY_SIZE(values), + }; + struct min_heap_callbacks funcs = { + .elem_size = sizeof(int), + .less = min_heap ? less_than : greater_than, + .swp = swap_ints, + }; + int i, temp, err; + + /* Test with known set of values copied from data. */ + for (i = 0; i < ARRAY_SIZE(data); i++) + min_heap_push(&heap, &data[i], &funcs); + + err = pop_verify_heap(min_heap, &heap, &funcs); + + /* Test with randomly generated values. */ + while (heap.nr < heap.size) { + temp = get_random_int(); + min_heap_push(&heap, &temp, &funcs); + } + err += pop_verify_heap(min_heap, &heap, &funcs); + + return err; +} + +static __init int test_heap_pop_push(bool min_heap) +{ + const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, + -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; + int values[ARRAY_SIZE(data)]; + struct min_heap heap = { + .data = values, + .nr = 0, + .size = ARRAY_SIZE(values), + }; + struct min_heap_callbacks funcs = { + .elem_size = sizeof(int), + .less = min_heap ? less_than : greater_than, + .swp = swap_ints, + }; + int i, temp, err; + + /* Fill values with data to pop and replace. */ + temp = min_heap ? 0x80000000 : 0x7FFFFFFF; + for (i = 0; i < ARRAY_SIZE(data); i++) + min_heap_push(&heap, &temp, &funcs); + + /* Test with known set of values copied from data. */ + for (i = 0; i < ARRAY_SIZE(data); i++) + min_heap_pop_push(&heap, &data[i], &funcs); + + err = pop_verify_heap(min_heap, &heap, &funcs); + + heap.nr = 0; + for (i = 0; i < ARRAY_SIZE(data); i++) + min_heap_push(&heap, &temp, &funcs); + + /* Test with randomly generated values. */ + for (i = 0; i < ARRAY_SIZE(data); i++) { + temp = get_random_int(); + min_heap_pop_push(&heap, &temp, &funcs); + } + err += pop_verify_heap(min_heap, &heap, &funcs); + + return err; +} + +static int __init test_min_heap_init(void) +{ + int err = 0; + + err += test_heapify_all(true); + err += test_heapify_all(false); + err += test_heap_push(true); + err += test_heap_push(false); + err += test_heap_pop_push(true); + err += test_heap_pop_push(false); + if (err) { + pr_err("test failed with %d errors\n", err); + return -EINVAL; + } + pr_info("test passed\n"); + return 0; +} +module_init(test_min_heap_init); + +static void __exit test_min_heap_exit(void) +{ + /* do nothing */ +} +module_exit(test_min_heap_exit); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 6eef8a7116deae0706ba6d897c0d7dd887cd2be2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:30 -0800 Subject: perf/core: Use min_heap in visit_groups_merge() visit_groups_merge will pick the next event based on when it was inserted in to the context (perf_event group_index). Events may be per CPU or for any CPU, but in the future we'd also like to have per cgroup events to avoid searching all events for the events to schedule for a cgroup. Introduce a min heap for the events that maintains a property that the earliest inserted event is always at the 0th element. Initialize the heap with per-CPU and any-CPU events for the context. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-4-irogers@google.com --- kernel/events/core.c | 67 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 16 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index dceeeb1d012a..ddfb06c9f367 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "internal.h" @@ -3392,32 +3393,66 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); } -static int visit_groups_merge(struct perf_event_groups *groups, int cpu, - int (*func)(struct perf_event *, void *), void *data) +static bool perf_less_group_idx(const void *l, const void *r) { - struct perf_event **evt, *evt1, *evt2; + const struct perf_event *le = l, *re = r; + + return le->group_index < re->group_index; +} + +static void swap_ptr(void *l, void *r) +{ + void **lp = l, **rp = r; + + swap(*lp, *rp); +} + +static const struct min_heap_callbacks perf_min_heap = { + .elem_size = sizeof(struct perf_event *), + .less = perf_less_group_idx, + .swp = swap_ptr, +}; + +static void __heap_add(struct min_heap *heap, struct perf_event *event) +{ + struct perf_event **itrs = heap->data; + + if (event) { + itrs[heap->nr] = event; + heap->nr++; + } +} + +static noinline int visit_groups_merge(struct perf_event_groups *groups, + int cpu, + int (*func)(struct perf_event *, void *), + void *data) +{ + /* Space for per CPU and/or any CPU event iterators. */ + struct perf_event *itrs[2]; + struct min_heap event_heap = { + .data = itrs, + .nr = 0, + .size = ARRAY_SIZE(itrs), + }; + struct perf_event **evt = event_heap.data; int ret; - evt1 = perf_event_groups_first(groups, -1); - evt2 = perf_event_groups_first(groups, cpu); + __heap_add(&event_heap, perf_event_groups_first(groups, -1)); + __heap_add(&event_heap, perf_event_groups_first(groups, cpu)); - while (evt1 || evt2) { - if (evt1 && evt2) { - if (evt1->group_index < evt2->group_index) - evt = &evt1; - else - evt = &evt2; - } else if (evt1) { - evt = &evt1; - } else { - evt = &evt2; - } + min_heapify_all(&event_heap, &perf_min_heap); + while (event_heap.nr) { ret = func(*evt, data); if (ret) return ret; *evt = perf_event_groups_next(*evt); + if (*evt) + min_heapify(&event_heap, 0, &perf_min_heap); + else + min_heap_pop(&event_heap, &perf_min_heap); } return 0; -- cgit v1.2.3 From 836196beb377e59e54ec9e04f7402076ef7a8bd8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:31 -0800 Subject: perf/core: Add per perf_cpu_context min_heap storage The storage required for visit_groups_merge's min heap needs to vary in order to support more iterators, such as when multiple nested cgroups' events are being visited. This change allows for 2 iterators and doesn't support growth. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-5-irogers@google.com --- include/linux/perf_event.h | 7 +++++++ kernel/events/core.c | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 68e21e828893..8768a39b5258 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -862,6 +862,13 @@ struct perf_cpu_context { int sched_cb_usage; int online; + /* + * Per-CPU storage for iterators used in visit_groups_merge. The default + * storage is of size 2 to hold the CPU and any CPU event iterators. + */ + int heap_size; + struct perf_event **heap; + struct perf_event *heap_default[2]; }; struct perf_output_handle { diff --git a/kernel/events/core.c b/kernel/events/core.c index ddfb06c9f367..7529e76a2e36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3423,22 +3423,34 @@ static void __heap_add(struct min_heap *heap, struct perf_event *event) } } -static noinline int visit_groups_merge(struct perf_event_groups *groups, - int cpu, +static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, + struct perf_event_groups *groups, int cpu, int (*func)(struct perf_event *, void *), void *data) { /* Space for per CPU and/or any CPU event iterators. */ struct perf_event *itrs[2]; - struct min_heap event_heap = { - .data = itrs, - .nr = 0, - .size = ARRAY_SIZE(itrs), - }; - struct perf_event **evt = event_heap.data; + struct min_heap event_heap; + struct perf_event **evt; int ret; - __heap_add(&event_heap, perf_event_groups_first(groups, -1)); + if (cpuctx) { + event_heap = (struct min_heap){ + .data = cpuctx->heap, + .nr = 0, + .size = cpuctx->heap_size, + }; + } else { + event_heap = (struct min_heap){ + .data = itrs, + .nr = 0, + .size = ARRAY_SIZE(itrs), + }; + /* Events not within a CPU context may be on any CPU. */ + __heap_add(&event_heap, perf_event_groups_first(groups, -1)); + } + evt = event_heap.data; + __heap_add(&event_heap, perf_event_groups_first(groups, cpu)); min_heapify_all(&event_heap, &perf_min_heap); @@ -3492,7 +3504,10 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, { int can_add_hw = 1; - visit_groups_merge(&ctx->pinned_groups, + if (ctx != &cpuctx->ctx) + cpuctx = NULL; + + visit_groups_merge(cpuctx, &ctx->pinned_groups, smp_processor_id(), merge_sched_in, &can_add_hw); } @@ -3503,7 +3518,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, { int can_add_hw = 1; - visit_groups_merge(&ctx->flexible_groups, + if (ctx != &cpuctx->ctx) + cpuctx = NULL; + + visit_groups_merge(cpuctx, &ctx->flexible_groups, smp_processor_id(), merge_sched_in, &can_add_hw); } @@ -10364,6 +10382,9 @@ skip_type: cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask); __perf_mux_hrtimer_init(cpuctx, cpu); + + cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default); + cpuctx->heap = cpuctx->heap_default; } got_cpu_context: -- cgit v1.2.3 From c2283c9368d41063f2077cb58def02217360526d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:32 -0800 Subject: perf/cgroup: Grow per perf_cpu_context heap storage Allow the per-CPU min heap storage to have sufficient space for per-cgroup iterators. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-6-irogers@google.com --- kernel/events/core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7529e76a2e36..8065949a865e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -892,6 +892,47 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev, rcu_read_unlock(); } +static int perf_cgroup_ensure_storage(struct perf_event *event, + struct cgroup_subsys_state *css) +{ + struct perf_cpu_context *cpuctx; + struct perf_event **storage; + int cpu, heap_size, ret = 0; + + /* + * Allow storage to have sufficent space for an iterator for each + * possibly nested cgroup plus an iterator for events with no cgroup. + */ + for (heap_size = 1; css; css = css->parent) + heap_size++; + + for_each_possible_cpu(cpu) { + cpuctx = per_cpu_ptr(event->pmu->pmu_cpu_context, cpu); + if (heap_size <= cpuctx->heap_size) + continue; + + storage = kmalloc_node(heap_size * sizeof(struct perf_event *), + GFP_KERNEL, cpu_to_node(cpu)); + if (!storage) { + ret = -ENOMEM; + break; + } + + raw_spin_lock_irq(&cpuctx->ctx.lock); + if (cpuctx->heap_size < heap_size) { + swap(cpuctx->heap, storage); + if (storage == cpuctx->heap_default) + storage = NULL; + cpuctx->heap_size = heap_size; + } + raw_spin_unlock_irq(&cpuctx->ctx.lock); + + kfree(storage); + } + + return ret; +} + static inline int perf_cgroup_connect(int fd, struct perf_event *event, struct perf_event_attr *attr, struct perf_event *group_leader) @@ -911,6 +952,10 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, goto out; } + ret = perf_cgroup_ensure_storage(event, css); + if (ret) + goto out; + cgrp = container_of(css, struct perf_cgroup, css); event->cgrp = cgrp; @@ -3440,6 +3485,8 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, .nr = 0, .size = cpuctx->heap_size, }; + + lockdep_assert_held(&cpuctx->ctx.lock); } else { event_heap = (struct min_heap){ .data = itrs, -- cgit v1.2.3 From 95ed6c707f26a727a29972b60469630ae10d579c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:33 -0800 Subject: perf/cgroup: Order events in RB tree by cgroup id If one is monitoring 6 events on 20 cgroups the per-CPU RB tree will hold 120 events. The scheduling in of the events currently iterates over all events looking to see which events match the task's cgroup or its cgroup hierarchy. If a task is in 1 cgroup with 6 events, then 114 events are considered unnecessarily. This change orders events in the RB tree by cgroup id if it is present. This means scheduling in may go directly to events associated with the task's cgroup if one is present. The per-CPU iterator storage in visit_groups_merge is sized sufficent for an iterator per cgroup depth, where different iterators are needed for the task's cgroup and parent cgroups. By considering the set of iterators when visiting, the lowest group_index event may be selected and the insertion order group_index property is maintained. This also allows event rotation to function correctly, as although events are grouped into a cgroup, rotation always selects the lowest group_index event to rotate (delete/insert into the tree) and the min heap of iterators make it so that the group_index order is maintained. Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20190724223746.153620-3-irogers@google.com --- kernel/events/core.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8065949a865e..ccf8d4fc6374 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1577,6 +1577,30 @@ perf_event_groups_less(struct perf_event *left, struct perf_event *right) if (left->cpu > right->cpu) return false; +#ifdef CONFIG_CGROUP_PERF + if (left->cgrp != right->cgrp) { + if (!left->cgrp || !left->cgrp->css.cgroup) { + /* + * Left has no cgroup but right does, no cgroups come + * first. + */ + return true; + } + if (!right->cgrp || right->cgrp->css.cgroup) { + /* + * Right has no cgroup but left does, no cgroups come + * first. + */ + return false; + } + /* Two dissimilar cgroups, order by id. */ + if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id) + return true; + + return false; + } +#endif + if (left->group_index < right->group_index) return true; if (left->group_index > right->group_index) @@ -1656,25 +1680,48 @@ del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx) } /* - * Get the leftmost event in the @cpu subtree. + * Get the leftmost event in the cpu/cgroup subtree. */ static struct perf_event * -perf_event_groups_first(struct perf_event_groups *groups, int cpu) +perf_event_groups_first(struct perf_event_groups *groups, int cpu, + struct cgroup *cgrp) { struct perf_event *node_event = NULL, *match = NULL; struct rb_node *node = groups->tree.rb_node; +#ifdef CONFIG_CGROUP_PERF + u64 node_cgrp_id, cgrp_id = 0; + + if (cgrp) + cgrp_id = cgrp->kn->id; +#endif while (node) { node_event = container_of(node, struct perf_event, group_node); if (cpu < node_event->cpu) { node = node->rb_left; - } else if (cpu > node_event->cpu) { + continue; + } + if (cpu > node_event->cpu) { node = node->rb_right; - } else { - match = node_event; + continue; + } +#ifdef CONFIG_CGROUP_PERF + node_cgrp_id = 0; + if (node_event->cgrp && node_event->cgrp->css.cgroup) + node_cgrp_id = node_event->cgrp->css.cgroup->kn->id; + + if (cgrp_id < node_cgrp_id) { node = node->rb_left; + continue; + } + if (cgrp_id > node_cgrp_id) { + node = node->rb_right; + continue; } +#endif + match = node_event; + node = node->rb_left; } return match; @@ -1687,12 +1734,26 @@ static struct perf_event * perf_event_groups_next(struct perf_event *event) { struct perf_event *next; +#ifdef CONFIG_CGROUP_PERF + u64 curr_cgrp_id = 0; + u64 next_cgrp_id = 0; +#endif next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node); - if (next && next->cpu == event->cpu) - return next; + if (next == NULL || next->cpu != event->cpu) + return NULL; - return NULL; +#ifdef CONFIG_CGROUP_PERF + if (event->cgrp && event->cgrp->css.cgroup) + curr_cgrp_id = event->cgrp->css.cgroup->kn->id; + + if (next->cgrp && next->cgrp->css.cgroup) + next_cgrp_id = next->cgrp->css.cgroup->kn->id; + + if (curr_cgrp_id != next_cgrp_id) + return NULL; +#endif + return next; } /* @@ -3473,6 +3534,9 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, int (*func)(struct perf_event *, void *), void *data) { +#ifdef CONFIG_CGROUP_PERF + struct cgroup_subsys_state *css = NULL; +#endif /* Space for per CPU and/or any CPU event iterators. */ struct perf_event *itrs[2]; struct min_heap event_heap; @@ -3487,6 +3551,11 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, }; lockdep_assert_held(&cpuctx->ctx.lock); + +#ifdef CONFIG_CGROUP_PERF + if (cpuctx->cgrp) + css = &cpuctx->cgrp->css; +#endif } else { event_heap = (struct min_heap){ .data = itrs, @@ -3494,11 +3563,16 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, .size = ARRAY_SIZE(itrs), }; /* Events not within a CPU context may be on any CPU. */ - __heap_add(&event_heap, perf_event_groups_first(groups, -1)); + __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL)); } evt = event_heap.data; - __heap_add(&event_heap, perf_event_groups_first(groups, cpu)); + __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL)); + +#ifdef CONFIG_CGROUP_PERF + for (; css; css = css->parent) + __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup)); +#endif min_heapify_all(&event_heap, &perf_min_heap); -- cgit v1.2.3 From 42bbabed09ce6208026648a71a45b4394c74585a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:00 -0800 Subject: perf tools: Add hw_idx in struct branch_stack The low level index of raw branch records for the most recent branch can be recorded in a sample with PERF_SAMPLE_BRANCH_HW_INDEX branch_sample_type. Extend struct branch_stack to support it. However, if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, only nr and entries[] will be output by kernel. The pointer of entries[] could be wrong, since the output format is different with new struct branch_stack. Add a variable no_hw_idx in struct perf_sample to indicate whether the hw_idx is output. Add get_branch_entry() to return corresponding pointer of entries[0]. To make dummy branch sample consistent as new branch sample, add hw_idx in struct dummy_branch_stack for cs-etm and intel-pt. Apply the new struct branch_stack for synthetic events as well. Extend test case sample-parsing to support new struct branch_stack. Committer notes: Renamed get_branch_entries() to perf_sample__branch_entries() to have proper namespacing and pave the way for this to be moved to libperf, eventually. Add 'static' to that inline as it is in a header. Add 'hw_idx' to 'struct dummy_branch_stack' in cs-etm.c to fix the build on arm64. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200228163011.19358-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 70 ++++++++++++---------- tools/perf/tests/sample-parsing.c | 7 ++- tools/perf/util/branch.h | 22 +++++++ tools/perf/util/cs-etm.c | 2 + tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 5 ++ tools/perf/util/evsel.h | 5 ++ tools/perf/util/hist.c | 3 +- tools/perf/util/intel-pt.c | 2 + tools/perf/util/machine.c | 35 +++++------ .../util/scripting-engines/trace-event-python.c | 30 +++++----- tools/perf/util/session.c | 8 ++- tools/perf/util/synthetic-events.c | 6 +- 13 files changed, 125 insertions(+), 71 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e2406b291c1c..656b347f6dd8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, return 0; for (i = 0; i < br->nr; i++) { - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (PRINT_FIELD(DSO)) { memset(&alf, 0, sizeof(alf)); @@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; thread__find_symbol_fb(thread, sample->cpumode, from, &alf); thread__find_symbol_fb(thread, sample->cpumode, to, &alt); @@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (thread__find_map_fb(thread, sample->cpumode, from, &alf) && !alf.map->dso->adjust_symbols) @@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(br->entries + i), - br->entries[i].flags.in_tx ? 'X' : '-', - br->entries[i].flags.abort ? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -1053,6 +1056,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, struct machine *machine, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 start, end; int i, insn, len, nr, ilen, printed = 0; struct perf_insn x; @@ -1073,31 +1077,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += fprintf(fp, "%c", '\n'); /* Handle first from jump, of which we don't know the entry. */ - len = grab_bb(buffer, br->entries[nr-1].from, - br->entries[nr-1].from, + len = grab_bb(buffer, entries[nr-1].from, + entries[nr-1].from, machine, thread, &x.is64bit, &x.cpumode, false); if (len > 0) { - printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, + printed += ip__fprintf_sym(entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); - printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], &x, buffer, len, 0, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) - printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from); + printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); } /* Print all blocks */ for (i = nr - 2; i >= 0; i--) { - if (br->entries[i].from || br->entries[i].to) + if (entries[i].from || entries[i].to) pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, - br->entries[i].from, - br->entries[i].to); - start = br->entries[i + 1].to; - end = br->entries[i].from; + entries[i].from, + entries[i].to); + start = entries[i + 1].to; + end = entries[i].from; len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); /* Patch up missing kernel transfers due to ring filters */ if (len == -ENXIO && i > 0) { - end = br->entries[--i].from; + end = entries[--i].from; pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); } @@ -1110,7 +1114,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, + printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, ip); @@ -1134,9 +1138,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * Hit the branch? In this case we are already done, and the target * has not been executed yet. */ - if (br->entries[0].from == sample->ip) + if (entries[0].from == sample->ip) goto out; - if (br->entries[0].flags.abort) + if (entries[0].flags.abort) goto out; /* @@ -1147,7 +1151,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * between final branch and sample. When this happens just * continue walking after the last TO until we hit a branch. */ - start = br->entries[0].to; + start = entries[0].to; end = sample->ip; if (end < start) { /* Missing jump. Scan 128 bytes for the next branch */ diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2762e1155238..14239e472187 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); + COMP(branch_stack->hw_idx); for (i = 0; i < s1->branch_stack->nr; i++) MCOMP(branch_stack->entries[i]); } @@ -186,7 +187,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) u64 data[64]; } branch_stack = { /* 1 branch_entry */ - .data = {1, 211, 212, 213}, + .data = {1, -1ULL, 211, 212, 213}, }; u64 regs[64]; const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; @@ -208,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .transaction = 112, .raw_data = (void *)raw_data, .callchain = &callchain.callchain, + .no_hw_idx = false, .branch_stack = &branch_stack.branch_stack, .user_regs = { .abi = PERF_SAMPLE_REGS_ABI_64, @@ -244,6 +246,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) if (sample_type & PERF_SAMPLE_REGS_INTR) evsel.core.attr.sample_regs_intr = sample_regs; + if (sample_type & PERF_SAMPLE_BRANCH_STACK) + evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + for (i = 0; i < sizeof(regs); i++) *(i + (u8 *)regs) = i & 0xfe; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 88e00d268f6f..154a05cd03af 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -12,6 +12,7 @@ #include #include #include +#include "event.h" struct branch_flags { u64 mispred:1; @@ -39,9 +40,30 @@ struct branch_entry { struct branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries[0]; }; +/* + * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied. + * Otherwise, the output format of a sample with branch stack is + * struct branch_stack { + * u64 nr; + * struct branch_entry entries[0]; + * } + * Check whether the hw_idx is available, + * and return the corresponding pointer of entries[0]. + */ +static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample) +{ + u64 *entry = (u64 *)sample->branch_stack; + + entry++; + if (sample->no_hw_idx) + return (struct branch_entry *)entry; + return (struct branch_entry *)(++entry); +} + struct branch_type_stat { bool branch_to; u64 counts[PERF_BR_MAX]; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5471045ebf5c..b3b3fe3ea345 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1172,6 +1172,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; u64 ip; @@ -1202,6 +1203,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, if (etm->synth_opts.last_branch) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 85223159737c..3cda40a2fafc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -139,6 +139,7 @@ struct perf_sample { u16 insn_len; u8 cpumode; u16 misc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c8dc4450884c..05883a45de5b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2169,7 +2169,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (data->branch_stack->nr > max_branch_nr) return -EFAULT; + sz = data->branch_stack->nr * sizeof(struct branch_entry); + if (perf_evsel__has_branch_hw_idx(evsel)) + sz += sizeof(u64); + else + data->no_hw_idx = true; OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dc14f4a823cd..99a0cb60c556 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -389,6 +389,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } +static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +{ + return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + static inline bool evsel__has_callchain(const struct evsel *evsel) { return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index ca5a8f4d007e..e74a5acf66d9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2584,9 +2584,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, u64 *total_cycles) { struct branch_info *bi; + struct branch_entry *entries = perf_sample__branch_entries(sample); /* If we have branch cycles always annotate them. */ - if (bs && bs->nr && bs->entries[0].flags.cycles) { + if (bs && bs->nr && entries[0].flags.cycles) { int i; bi = sample__resolve_bstack(sample, al); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 33cf8928cf05..23c8289c2472 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct perf_sample sample = { .ip = 0, }; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; @@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fb5c2cd44d30..fd14f1489802 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2081,15 +2081,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, { unsigned int i; const struct branch_stack *bs = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); - bi[i].flags = bs->entries[i].flags; + ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); + ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); + bi[i].flags = entries[i].flags; } return bi; } @@ -2185,6 +2186,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); int lbr_nr = lbr_stack->nr, j, k; bool branch; struct branch_flags *flags; @@ -2210,31 +2212,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = chain->ips[j]; else if (j > i + 1) { k = j - i - 2; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } else { if (j < lbr_nr) { k = lbr_nr - j - 1; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } @@ -2281,6 +2281,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int max_stack) { struct branch_stack *branch = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; @@ -2328,7 +2329,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { if (callchain_param.order == ORDER_CALLEE) { - be[i] = branch->entries[i]; + be[i] = entries[i]; if (chain == NULL) continue; @@ -2347,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i].from >= chain->ips[first_call] - 8) first_call++; } else - be[i] = branch->entries[branch->nr - i - 1]; + be[i] = entries[branch->nr - i - 1]; } memset(iter, 0, sizeof(struct iterations) * nr); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 80ca5d0ab7fe..8c1b27cd8b99 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; @@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); pydict_set_item_string_decref(pyelem, "from", - PyLong_FromUnsignedLongLong(br->entries[i].from)); + PyLong_FromUnsignedLongLong(entries[i].from)); pydict_set_item_string_decref(pyelem, "to", - PyLong_FromUnsignedLongLong(br->entries[i].to)); + PyLong_FromUnsignedLongLong(entries[i].to)); pydict_set_item_string_decref(pyelem, "mispred", - PyBool_FromLong(br->entries[i].flags.mispred)); + PyBool_FromLong(entries[i].flags.mispred)); pydict_set_item_string_decref(pyelem, "predicted", - PyBool_FromLong(br->entries[i].flags.predicted)); + PyBool_FromLong(entries[i].flags.predicted)); pydict_set_item_string_decref(pyelem, "in_tx", - PyBool_FromLong(br->entries[i].flags.in_tx)); + PyBool_FromLong(entries[i].flags.in_tx)); pydict_set_item_string_decref(pyelem, "abort", - PyBool_FromLong(br->entries[i].flags.abort)); + PyBool_FromLong(entries[i].flags.abort)); pydict_set_item_string_decref(pyelem, "cycles", - PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); + PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "from_dsoname", _PyUnicode_FromString(dsoname)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); @@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; char bf[512]; @@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "from", _PyUnicode_FromString(bf)); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "to", _PyUnicode_FromString(bf)); - get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); + get_br_mspred(&entries[i].flags, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "pred", _PyUnicode_FromString(bf)); - if (br->entries[i].flags.in_tx) { + if (entries[i].flags.in_tx) { pydict_set_item_string_decref(pyelem, "in_tx", _PyUnicode_FromString("X")); } else { @@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, _PyUnicode_FromString("-")); } - if (br->entries[i].flags.abort) { + if (entries[i].flags.abort) { pydict_set_item_string_decref(pyelem, "abort", _PyUnicode_FromString("A")); } else { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d0d7d25b23e3..055b00abd56d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1007,6 +1007,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) { struct ip_callchain *callchain = sample->callchain; struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 kernel_callchain_nr = callchain->nr; unsigned int i; @@ -1043,10 +1044,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) i, callchain->ips[i]); printf("..... %2d: %016" PRIx64 "\n", - (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + (int)(kernel_callchain_nr), entries[0].to); for (i = 0; i < lbr_stack->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + (int)(i + kernel_callchain_nr + 1), entries[i].from); } } @@ -1068,6 +1069,7 @@ static void callchain__printf(struct evsel *evsel, static void branch_stack__printf(struct perf_sample *sample, bool callstack) { + struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; printf("%s: nr:%" PRIu64 "\n", @@ -1075,7 +1077,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) sample->branch_stack->nr); for (i = 0; i < sample->branch_stack->nr; i++) { - struct branch_entry *e = &sample->branch_stack->entries[i]; + struct branch_entry *e = &entries[i]; if (!callstack) { printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index c423298fe62d..dd3e6f43fb86 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1183,7 +1183,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); result += sz; } @@ -1344,7 +1345,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); memcpy(array, sample->branch_stack, sz); array = (void *)array + sz; } -- cgit v1.2.3 From d3f85437ad6a55113882d730beaa75759452da8f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:01 -0800 Subject: perf evsel: Support PERF_SAMPLE_BRANCH_HW_INDEX A new branch sample type PERF_SAMPLE_BRANCH_HW_INDEX has been introduced in latest kernel. Enable HW_INDEX by default in LBR call stack mode. If kernel doesn't support the sample type, switching it off. Add HW_INDEX in attr_fprintf as well. User can check whether the branch sample type is set via debug information or header. Committer testing: First collect some samples with LBR callchains, system wide, for a few seconds: # perf record --call-graph lbr -a sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.625 MB perf.data (224 samples) ] # Now lets use 'perf evlist -v' to look at the branch_sample_type: # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES|HW_INDEX # So the machine has the kernel feature, and it was correctly added to perf_event_attr.branch_sample_type, for the default 'cycles' event. If we do it in another machine, where the kernel lacks the HW_INDEX feature, we get: # perf record --call-graph lbr -a sleep 2s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.690 MB perf.data (499 samples) ] # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES # No HW_INDEX in attr.branch_sample_type. Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200228163011.19358-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 15 ++++++++++++--- tools/perf/util/evsel.h | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 05883a45de5b..816d930d774e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS; + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_HW_INDEX; } } else pr_warning("Cannot use LBR callstack with branch stack. " @@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel, if (param->record_mode == CALLCHAIN_LBR) { perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | - PERF_SAMPLE_BRANCH_CALL_STACK); + PERF_SAMPLE_BRANCH_CALL_STACK | + PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { perf_evsel__reset_sample_bit(evsel, REGS_USER); @@ -1673,6 +1675,8 @@ fallback_missing_features: evsel->core.attr.ksymbol = 0; if (perf_missing_features.bpf) evsel->core.attr.bpf_event = 0; + if (perf_missing_features.branch_hw_idx) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->core.attr.sample_id_all = 0; @@ -1784,7 +1788,12 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { + if (!perf_missing_features.branch_hw_idx && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { perf_missing_features.aux_output = true; pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); goto out_close; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 99a0cb60c556..33804740e2ca 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -119,6 +119,7 @@ struct perf_missing_features { bool ksymbol; bool bpf; bool aux_output; + bool branch_hw_idx; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 651203126c71..355d3458d4e6 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name -- cgit v1.2.3 From 277ce1efa7b504873cd32a4106654836c2f80e1b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:02 -0800 Subject: perf header: Add check for unexpected use of reserved membrs in event attr The perf.data may be generated by a newer version of perf tool, which support new input bits in attr, e.g. new bit for branch_sample_type. The perf.data may be parsed by an older version of perf tool later. The old perf tool may parse the perf.data incorrectly. There is no warning message for this case. Current perf header never check for unknown input bits in attr. When read the event desc from header, check the stored event attr. The reserved bits, sample type, read format and branch sample type will be checked. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lkml.kernel.org/r/20200228163011.19358-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4246e7447e54..acbd046bf95c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events) free(events); } +static bool perf_attr_check(struct perf_event_attr *attr) +{ + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) { + pr_warning("Reserved bits are set unexpectedly. " + "Please update perf tool.\n"); + return false; + } + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) { + pr_warning("Unknown sample type (0x%llx) is detected. " + "Please update perf tool.\n", + attr->sample_type); + return false; + } + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) { + pr_warning("Unknown read format (0x%llx) is detected. " + "Please update perf tool.\n", + attr->read_format); + return false; + } + + if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) && + (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) { + pr_warning("Unknown branch sample type (0x%llx) is detected. " + "Please update perf tool.\n", + attr->branch_sample_type); + + return false; + } + + return true; +} + static struct evsel *read_event_desc(struct feat_fd *ff) { struct evsel *evsel, *events = NULL; @@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff) memcpy(&evsel->core.attr, buf, msz); + if (!perf_attr_check(&evsel->core.attr)) + goto error; + if (do_read_u32(ff, &nr)) goto error; -- cgit v1.2.3 From 576a65b6974ddc830a89b6feb6823bd6b5914bde Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:12 +0100 Subject: perf expr: Add expr.c object Add generic expr code into new expr.c object. The expr.c object will be mainly used in following change that will get rid of the manual flex code, Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/expr.c | 19 +++++++++++++++++++ tools/perf/util/expr.y | 16 ---------------- 3 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 tools/perf/util/expr.c diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 07da6c790b63..6fdf073093a6 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -122,6 +122,7 @@ perf-y += vsprintf.o perf-y += units.o perf-y += time-utils.o perf-y += expr-bison.o +perf-y += expr.o perf-y += branch.o perf-y += mem2node.o diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c new file mode 100644 index 000000000000..816b23b2068a --- /dev/null +++ b/tools/perf/util/expr.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "expr.h" + +/* Caller must make sure id is allocated */ +void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +{ + int idx; + + assert(ctx->num_ids < MAX_PARSE_ID); + idx = ctx->num_ids++; + ctx->ids[idx].name = name; + ctx->ids[idx].val = val; +} + +void expr__ctx_init(struct parse_ctx *ctx) +{ + ctx->num_ids = 0; +} diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 7d226241f1d7..7cea8b7c3a5c 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -6,7 +6,6 @@ #define IN_EXPR_Y 1 #include "expr.h" #include "smt.h" -#include #include #define MAXIDLEN 256 @@ -169,21 +168,6 @@ static int expr__lex(YYSTYPE *res, const char **pp) return tok; } -/* Caller must make sure id is allocated */ -void expr__add_id(struct parse_ctx *ctx, const char *name, double val) -{ - int idx; - assert(ctx->num_ids < MAX_PARSE_ID); - idx = ctx->num_ids++; - ctx->ids[idx].name = name; - ctx->ids[idx].val = val; -} - -void expr__ctx_init(struct parse_ctx *ctx) -{ - ctx->num_ids = 0; -} - static bool already_seen(const char *val, const char *one, const char **other, int num_other) { -- cgit v1.2.3 From 26226a97724d1671d553b8eb0cd95b0a5557cfb2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:13 +0100 Subject: perf expr: Move expr lexer to flex Adding expr flex code instead of the manual parser code. So it's easily extensible in upcoming changes. The new flex code is in flex.l object and gets compiled like all the other flexers we use. It's defined as flex reentrant parser. It's used by both expr__parse and expr__find_other interfaces by separating the starting point. There's no intended change of functionality ;-) the test expr is passing. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 10 ++- tools/perf/util/expr.c | 93 +++++++++++++++++++++++++++ tools/perf/util/expr.h | 2 - tools/perf/util/expr.l | 114 +++++++++++++++++++++++++++++++++ tools/perf/util/expr.y | 169 +++++++++---------------------------------------- 5 files changed, 247 insertions(+), 141 deletions(-) create mode 100644 tools/perf/util/expr.l diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 6fdf073093a6..c0cf8dff694e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -121,6 +121,7 @@ perf-y += mem-events.o perf-y += vsprintf.o perf-y += units.o perf-y += time-utils.o +perf-y += expr-flex.o perf-y += expr-bison.o perf-y += expr.o perf-y += branch.o @@ -190,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ +$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c + $(call rule_mkdir) + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l + $(OUTPUT)util/expr-bison.c: util/expr.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) @@ -204,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y CFLAGS_parse-events-flex.o += -w CFLAGS_pmu-flex.o += -w +CFLAGS_expr-flex.o += -w CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c +$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" @@ -217,6 +224,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_parse-events.o += -Wno-redundant-decls +CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 816b23b2068a..b39fd39f10ec 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include "expr.h" +#include "expr-bison.h" +#define YY_EXTRA_TYPE int +#include "expr-flex.h" + +#ifdef PARSER_DEBUG +extern int expr_debug; +#endif /* Caller must make sure id is allocated */ void expr__add_id(struct parse_ctx *ctx, const char *name, double val) @@ -17,3 +25,88 @@ void expr__ctx_init(struct parse_ctx *ctx) { ctx->num_ids = 0; } + +static int +__expr__parse(double *val, struct parse_ctx *ctx, const char *expr, + int start) +{ + YY_BUFFER_STATE buffer; + void *scanner; + int ret; + + ret = expr_lex_init_extra(start, &scanner); + if (ret) + return ret; + + buffer = expr__scan_string(expr, scanner); + +#ifdef PARSER_DEBUG + expr_debug = 1; +#endif + + ret = expr_parse(val, ctx, scanner); + + expr__flush_buffer(buffer, scanner); + expr__delete_buffer(buffer, scanner); + expr_lex_destroy(scanner); + return ret; +} + +int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp) +{ + return __expr__parse(final_val, ctx, *pp, EXPR_PARSE); +} + +static bool +already_seen(const char *val, const char *one, const char **other, + int num_other) +{ + int i; + + if (one && !strcasecmp(one, val)) + return true; + for (i = 0; i < num_other; i++) + if (!strcasecmp(other[i], val)) + return true; + return false; +} + +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_other) +{ + int err, i = 0, j = 0; + struct parse_ctx ctx; + + expr__ctx_init(&ctx); + err = __expr__parse(NULL, &ctx, p, EXPR_OTHER); + if (err) + return -1; + + *other = malloc((ctx.num_ids + 1) * sizeof(char *)); + if (!*other) + return -ENOMEM; + + for (i = 0, j = 0; i < ctx.num_ids; i++) { + const char *str = ctx.ids[i].name; + + if (already_seen(str, one, *other, j)) + continue; + + str = strdup(str); + if (!str) + goto out; + (*other)[j++] = str; + } + (*other)[j] = NULL; + +out: + if (i != ctx.num_ids) { + while (--j) + free((char *) (*other)[i]); + free(*other); + err = -1; + } + + *num_other = j; + return err; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 046160831f90..9332796e6649 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -17,9 +17,7 @@ struct parse_ctx { void expr__ctx_init(struct parse_ctx *ctx); void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -#ifndef IN_EXPR_Y int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); -#endif int expr__find_other(const char *p, const char *one, const char ***other, int *num_other); diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l new file mode 100644 index 000000000000..1928f2a3dddc --- /dev/null +++ b/tools/perf/util/expr.l @@ -0,0 +1,114 @@ +%option prefix="expr_" +%option reentrant +%option bison-bridge + +%{ +#include +#include "expr.h" +#include "expr-bison.h" + +char *expr_get_text(yyscan_t yyscanner); +YYSTYPE *expr_get_lval(yyscan_t yyscanner); + +static int __value(YYSTYPE *yylval, char *str, int base, int token) +{ + u64 num; + + errno = 0; + num = strtoull(str, NULL, base); + if (errno) + return EXPR_ERROR; + + yylval->num = num; + return token; +} + +static int value(yyscan_t scanner, int base) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + char *text = expr_get_text(scanner); + + return __value(yylval, text, base, NUMBER); +} + +/* + * Allow @ instead of / to be able to specify pmu/event/ without + * conflicts with normal division. + */ +static char *normalize(char *str) +{ + char *ret = str; + char *dst = str; + + while (*str) { + if (*str == '@') + *dst++ = '/'; + else if (*str == '\\') + *dst++ = *++str; + else + *dst++ = *str; + str++; + } + + *dst = 0x0; + return ret; +} + +static int str(yyscan_t scanner, int token) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + char *text = expr_get_text(scanner); + + yylval->str = normalize(strdup(text)); + if (!yylval->str) + return EXPR_ERROR; + + yylval->str = normalize(yylval->str); + return token; +} +%} + +number [0-9]+ + +sch [-,=] +spec \\{sch} +sym [0-9a-zA-Z_\.:@]+ +symbol {spec}*{sym}*{spec}*{sym}* + +%% + { + int start_token; + + start_token = parse_events_get_extra(yyscanner); + + if (start_token) { + parse_events_set_extra(NULL, yyscanner); + return start_token; + } + } + +max { return MAX; } +min { return MIN; } +if { return IF; } +else { return ELSE; } +#smt_on { return SMT_ON; } +{number} { return value(yyscanner, 10); } +{symbol} { return str(yyscanner, ID); } +"|" { return '|'; } +"^" { return '^'; } +"&" { return '&'; } +"-" { return '-'; } +"+" { return '+'; } +"*" { return '*'; } +"/" { return '/'; } +"%" { return '%'; } +"(" { return '('; } +")" { return ')'; } +"," { return ','; } +. { } +%% + +int expr_wrap(void *scanner __maybe_unused) +{ + return 1; +} diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 7cea8b7c3a5c..4720cbe79357 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,5 +1,7 @@ /* Simple expression parser */ %{ +#define YYDEBUG 1 +#include #include "util.h" #include "util/debug.h" #include // strtod() @@ -8,23 +10,23 @@ #include "smt.h" #include -#define MAXIDLEN 256 %} %define api.pure full %parse-param { double *final_val } %parse-param { struct parse_ctx *ctx } -%parse-param { const char **pp } -%lex-param { const char **pp } +%parse-param {void *scanner} +%lex-param {void* scanner} %union { - double num; - char id[MAXIDLEN+1]; + double num; + char *str; } +%token EXPR_PARSE EXPR_OTHER EXPR_ERROR %token NUMBER -%token ID +%token ID %token MIN MAX IF ELSE SMT_ON %left MIN MAX IF %left '|' @@ -36,11 +38,9 @@ %type expr if_expr %{ -static int expr__lex(YYSTYPE *res, const char **pp); - -static void expr__error(double *final_val __maybe_unused, +static void expr_error(double *final_val __maybe_unused, struct parse_ctx *ctx __maybe_unused, - const char **pp __maybe_unused, + void *scanner, const char *s) { pr_debug("%s\n", s); @@ -62,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val) %} %% +start: +EXPR_PARSE all_expr +| +EXPR_OTHER all_other + +all_other: all_other other +| + +other: ID +{ + if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { + pr_err("failed: way too many variables"); + YYABORT; + } + + ctx->ids[ctx->num_ids++].name = $1; +} +| +MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' + + all_expr: if_expr { *final_val = $1; } ; @@ -92,131 +113,3 @@ expr: NUMBER ; %% - -static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) -{ - char *dst = res->id; - const char *s = p; - - if (*p == '#') - *dst++ = *p++; - - while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') { - if (p - s >= MAXIDLEN) - return -1; - /* - * Allow @ instead of / to be able to specify pmu/event/ without - * conflicts with normal division. - */ - if (*p == '@') - *dst++ = '/'; - else if (*p == '\\') - *dst++ = *++p; - else - *dst++ = *p; - p++; - } - *dst = 0; - *pp = p; - dst = res->id; - switch (dst[0]) { - case 'm': - if (!strcmp(dst, "min")) - return MIN; - if (!strcmp(dst, "max")) - return MAX; - break; - case 'i': - if (!strcmp(dst, "if")) - return IF; - break; - case 'e': - if (!strcmp(dst, "else")) - return ELSE; - break; - case '#': - if (!strcasecmp(dst, "#smt_on")) - return SMT_ON; - break; - } - return ID; -} - -static int expr__lex(YYSTYPE *res, const char **pp) -{ - int tok; - const char *s; - const char *p = *pp; - - while (isspace(*p)) - p++; - s = p; - switch (*p++) { - case '#': - case 'a' ... 'z': - case 'A' ... 'Z': - return expr__symbol(res, p - 1, pp); - case '0' ... '9': case '.': - res->num = strtod(s, (char **)&p); - tok = NUMBER; - break; - default: - tok = *s; - break; - } - *pp = p; - return tok; -} - -static bool already_seen(const char *val, const char *one, const char **other, - int num_other) -{ - int i; - - if (one && !strcasecmp(one, val)) - return true; - for (i = 0; i < num_other; i++) - if (!strcasecmp(other[i], val)) - return true; - return false; -} - -int expr__find_other(const char *p, const char *one, const char ***other, - int *num_otherp) -{ - const char *orig = p; - int err = -1; - int num_other; - - *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); - if (!*other) - return -1; - - num_other = 0; - for (;;) { - YYSTYPE val; - int tok = expr__lex(&val, &p); - if (tok == 0) { - err = 0; - break; - } - if (tok == ID && !already_seen(val.id, one, *other, num_other)) { - if (num_other >= EXPR_MAX_OTHER - 1) { - pr_debug("Too many extra events in %s\n", orig); - break; - } - (*other)[num_other] = strdup(val.id); - if (!(*other)[num_other]) - return -1; - num_other++; - } - } - (*other)[num_other] = NULL; - *num_otherp = num_other; - if (err) { - *num_otherp = 0; - free(*other); - *other = NULL; - } - return err; -} -- cgit v1.2.3 From 58ca707636dc44c8803a098fb68129f64272f151 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:14 +0100 Subject: perf expr: Increase EXPR_MAX_OTHER to support metrics with more than 15 variables We have metrics that define more than 15 variables, like Branch_Misprediction_Cost. Increasing the allowed variables count to 20. As Andy pointed out, we can't go too high in here, because some of the code has O(n^2) complexity (already_seen) and we might want to do some other changes (like using hash tables) before increasing the maximum even more. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 9332796e6649..df0a17df0cef 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,7 +2,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#define EXPR_MAX_OTHER 15 +#define EXPR_MAX_OTHER 20 #define MAX_PARSE_ID EXPR_MAX_OTHER struct parse_id { -- cgit v1.2.3 From 0f9b1e124bb29719eb1572db74b7893bd616f938 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:15 +0100 Subject: perf expr: Straighten expr__parse()/expr__find_other() interface Now that we have a flex parser we don't need to update the parsed string pointer, so the interface can just be passed the pointer to the expression instead of a pointer to pointer. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 6 +++--- tools/perf/util/expr.c | 8 ++++---- tools/perf/util/expr.h | 4 ++-- tools/perf/util/stat-shadow.c | 4 +--- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 87843af4c118..755d73c86c68 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -10,7 +10,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, &e)) + if (expr__parse(&val, ctx, e)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -44,11 +44,11 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) return ret; p = "FOO/0"; - ret = expr__parse(&val, &ctx, &p); + ret = expr__parse(&val, &ctx, p); TEST_ASSERT_VAL("division by zero", ret == 1); p = "BAR/"; - ret = expr__parse(&val, &ctx, &p); + ret = expr__parse(&val, &ctx, p); TEST_ASSERT_VAL("missing operand", ret == 1); TEST_ASSERT_VAL("find other", diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b39fd39f10ec..45b25530db5b 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -52,9 +52,9 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, return ret; } -int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp) +int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) { - return __expr__parse(final_val, ctx, *pp, EXPR_PARSE); + return __expr__parse(final_val, ctx, expr, EXPR_PARSE); } static bool @@ -71,14 +71,14 @@ already_seen(const char *val, const char *one, const char **other, return false; } -int expr__find_other(const char *p, const char *one, const char ***other, +int expr__find_other(const char *expr, const char *one, const char ***other, int *num_other) { int err, i = 0, j = 0; struct parse_ctx ctx; expr__ctx_init(&ctx); - err = __expr__parse(NULL, &ctx, p, EXPR_OTHER); + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); if (err) return -1; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index df0a17df0cef..9377538f4097 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -17,8 +17,8 @@ struct parse_ctx { void expr__ctx_init(struct parse_ctx *ctx); void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); -int expr__find_other(const char *p, const char *one, const char ***other, +int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); +int expr__find_other(const char *expr, const char *one, const char ***other, int *num_other); #endif diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 90d23cc3c8d4..0fd713d3674f 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -777,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config, } if (!metric_events[i]) { - const char *p = metric_expr; - - if (expr__parse(&ratio, &pctx, &p) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr) == 0) { char *unit; char metric_bf[64]; -- cgit v1.2.3 From d942815a76463fa53b81d3d1c064f76bb3f80ead Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2020 10:36:16 +0100 Subject: perf expr: Make expr__parse() return -1 on error To match the error value of the expr__find_other function, so all exported expr functions return the same values: 0 on success, -1 on error. Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: John Garry Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200228093616.67125-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 4 ++-- tools/perf/util/expr.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 755d73c86c68..28313e59d6f6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -45,11 +45,11 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) p = "FOO/0"; ret = expr__parse(&val, &ctx, p); - TEST_ASSERT_VAL("division by zero", ret == 1); + TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; ret = expr__parse(&val, &ctx, p); - TEST_ASSERT_VAL("missing operand", ret == 1); + TEST_ASSERT_VAL("missing operand", ret == -1); TEST_ASSERT_VAL("find other", expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 45b25530db5b..fd192ddf93c1 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -54,7 +54,7 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE); + return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; } static bool -- cgit v1.2.3 From 3e152aa984ff4f639f7d2daf1ad10d408c0a3332 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:52 +0800 Subject: perf block-info: Fix wrong block address comparison in block_info__cmp() Commit 6041441870ab ("perf block: Cleanup and refactor block info functions") introduces block_info__cmp(), which compares two blocks. But the issues are: 1. It should return the strcmp cmp value only if it's not 0. 2. When symbol names are matched, we need to compare the addresses of blocks further. But it wrongly uses the symbol addresses for comparison. 3. If the syms are both NULL, we can't consider these two blocks are matched. This patch fixes above 3 issues. Fixes: 6041441870ab ("perf block: Cleanup and refactor block info functions") Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/block-info.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index fbbb6d640dad..5b4214656e29 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -74,30 +74,21 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, if (!bi_l->sym || !bi_r->sym) { if (!bi_l->sym && !bi_r->sym) - return 0; + return -1; else if (!bi_l->sym) return -1; else return 1; } - if (bi_l->sym == bi_r->sym) { - if (bi_l->start == bi_r->start) { - if (bi_l->end == bi_r->end) - return 0; - else - return (int64_t)(bi_r->end - bi_l->end); - } else - return (int64_t)(bi_r->start - bi_l->start); - } else { - cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + if (cmp) return cmp; - } - if (bi_l->sym->start != bi_r->sym->start) - return (int64_t)(bi_r->sym->start - bi_l->sym->start); + if (bi_l->start != bi_r->start) + return (int64_t)(bi_r->start - bi_l->start); - return (int64_t)(bi_r->sym->end - bi_l->sym->end); + return (int64_t)(bi_r->end - bi_l->end); } static void init_block_info(struct block_info *bi, struct symbol *sym, -- cgit v1.2.3 From a8a9f6dc0dbfc0f4f225987abec7eb688f4b2d7e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:53 +0800 Subject: perf diff: Use __block_info__cmp() to replace block_pair_cmp() 'perf diff' uses block_pair_cmp() to compare two blocks. But block_info__cmp() has the similar functionality and it's a bit more complete. This patch removes block_pair_cmp() and uses __block_info__cmp() instead. __block_info__cmp() is wrapped by block_info__cmp() and it doesn't receives a perf_hpp_fmt parameter. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 21 ++------------------- tools/perf/util/block-info.c | 9 +++++++-- tools/perf/util/block-info.h | 2 ++ 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c03c36fde7e2..5e697cd2224a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -572,29 +572,12 @@ static void init_block_hist(struct block_hist *bh) bh->valid = true; } -static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) -{ - struct block_info *bi_a = a->block_info; - struct block_info *bi_b = b->block_info; - int cmp; - - if (!bi_a->sym || !bi_b->sym) - return -1; - - cmp = strcmp(bi_a->sym->name, bi_b->sym->name); - - if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) - return 0; - - return -1; -} - static struct hist_entry *get_block_pair(struct hist_entry *he, struct hists *hists_pair) { struct rb_root_cached *root = hists_pair->entries_in; struct rb_node *next = rb_first_cached(root); - int cmp; + int64_t cmp; while (next != NULL) { struct hist_entry *he_pair = rb_entry(next, struct hist_entry, @@ -602,7 +585,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he, next = rb_next(&he_pair->rb_node_in); - cmp = block_pair_cmp(he_pair, he); + cmp = __block_info__cmp(he_pair, he); if (!cmp) return he_pair; } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 5b4214656e29..25f6422c548b 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -65,8 +65,7 @@ struct block_info *block_info__new(void) return bi; } -int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, - struct hist_entry *left, struct hist_entry *right) +int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right) { struct block_info *bi_l = left->block_info; struct block_info *bi_r = right->block_info; @@ -91,6 +90,12 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, return (int64_t)(bi_r->end - bi_l->end); } +int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return __block_info__cmp(left, right); +} + static void init_block_info(struct block_info *bi, struct symbol *sym, struct cyc_hist *ch, int offset, u64 total_cycles) diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index bef0d75e9819..a0fa9fefeaf0 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -61,6 +61,8 @@ static inline void __block_info__zput(struct block_info **bi) #define block_info__zput(bi) __block_info__zput(&bi) +int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); + int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left, struct hist_entry *right); -- cgit v1.2.3 From cca0cc76f5f56dff2c8461b551a3e1fdabcd3fba Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:54 +0800 Subject: perf block-info: Allow selecting which columns to report and its order Currently we use a predefined array to set the block info output formats, it's fixed and inflexible. This patch adds two parameters "block_hpps" and "nr_hpps" in block_info__create_report and other static functions, in order to let user decide which columns to report and with specified report ordering. It should be more flexible. Buffers will be allocated to contain the new fmts, of course, we need to release them before perf exits. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 21 +++++++++++++++--- tools/perf/util/block-info.c | 51 ++++++++++++++++++++++++++++++-------------- tools/perf/util/block-info.h | 7 +++++- 3 files changed, 59 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 72a12b69f120..d7c905f7520f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -104,6 +104,7 @@ struct report { bool symbol_ipc; bool total_cycles_mode; struct block_report *block_reports; + int nr_block_reports; }; static int report__config(const char *var, const char *value, void *cb) @@ -966,8 +967,19 @@ static int __cmd_report(struct report *rep) report__output_resort(rep); if (rep->total_cycles_mode) { + int block_hpps[6] = { + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT, + PERF_HPP_REPORT__BLOCK_LBR_CYCLES, + PERF_HPP_REPORT__BLOCK_CYCLES_PCT, + PERF_HPP_REPORT__BLOCK_AVG_CYCLES, + PERF_HPP_REPORT__BLOCK_RANGE, + PERF_HPP_REPORT__BLOCK_DSO, + }; + rep->block_reports = block_info__create_report(session->evlist, - rep->total_cycles); + rep->total_cycles, + block_hpps, 6, + &rep->nr_block_reports); if (!rep->block_reports) return -1; } @@ -1551,8 +1563,11 @@ error: zfree(&report.ptime_range); } - if (report.block_reports) - zfree(&report.block_reports); + if (report.block_reports) { + block_info__free_report(report.block_reports, + report.nr_block_reports); + report.block_reports = NULL; + } zstd_fini(&(session->zstd_data)); perf_session__delete(session); diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 25f6422c548b..debb8b12cf51 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -373,33 +373,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, } static void register_block_columns(struct perf_hpp_list *hpp_list, - struct block_fmt *block_fmts) + struct block_fmt *block_fmts, + int *block_hpps, int nr_hpps) { - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) - hpp_register(&block_fmts[i], i, hpp_list); + for (int i = 0; i < nr_hpps; i++) + hpp_register(&block_fmts[i], block_hpps[i], hpp_list); } -static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts) +static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, + int *block_hpps, int nr_hpps) { __hists__init(&bh->block_hists, &bh->block_list); perf_hpp_list__init(&bh->block_list); bh->block_list.nr_header_lines = 1; - register_block_columns(&bh->block_list, block_fmts); + register_block_columns(&bh->block_list, block_fmts, + block_hpps, nr_hpps); - perf_hpp_list__register_sort_field(&bh->block_list, - &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt); + /* Sort by the first fmt */ + perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt); } -static void process_block_report(struct hists *hists, - struct block_report *block_report, - u64 total_cycles) +static int process_block_report(struct hists *hists, + struct block_report *block_report, + u64 total_cycles, int *block_hpps, + int nr_hpps) { struct rb_node *next = rb_first_cached(&hists->entries); struct block_hist *bh = &block_report->hist; struct hist_entry *he; - init_block_hist(bh, block_report->fmts); + if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX) + return -1; + + block_report->nr_fmts = nr_hpps; + init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps); while (next) { he = rb_entry(next, struct hist_entry, rb_node); @@ -408,16 +416,19 @@ static void process_block_report(struct hists *hists, next = rb_next(&he->rb_node); } - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) { + for (int i = 0; i < nr_hpps; i++) { block_report->fmts[i].total_cycles = total_cycles; block_report->fmts[i].block_cycles = block_report->cycles; } hists__output_resort(&bh->block_hists, NULL); + return 0; } struct block_report *block_info__create_report(struct evlist *evlist, - u64 total_cycles) + u64 total_cycles, + int *block_hpps, int nr_hpps, + int *nr_reps) { struct block_report *block_reports; int nr_hists = evlist->core.nr_entries, i = 0; @@ -430,13 +441,23 @@ struct block_report *block_info__create_report(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - process_block_report(hists, &block_reports[i], total_cycles); + process_block_report(hists, &block_reports[i], total_cycles, + block_hpps, nr_hpps); i++; } + *nr_reps = nr_hists; return block_reports; } +void block_info__free_report(struct block_report *reps, int nr_reps) +{ + for (int i = 0; i < nr_reps; i++) + hists__delete_entries(&reps[i].hist.block_hists); + + free(reps); +} + int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel, struct perf_env *env, struct annotation_options *annotation_opts) @@ -448,13 +469,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, symbol_conf.report_individual_block = true; hists__fprintf(&bh->block_hists, true, 0, 0, min_percent, stdout, true); - hists__delete_entries(&bh->block_hists); return 0; case 1: symbol_conf.report_individual_block = true; ret = block_hists_tui_browse(bh, evsel, min_percent, env, annotation_opts); - hists__delete_entries(&bh->block_hists); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index a0fa9fefeaf0..42e9dcc4cf0a 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -45,6 +45,7 @@ struct block_report { struct block_hist hist; u64 cycles; struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX]; + int nr_fmts; }; struct block_hist; @@ -70,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, u64 *block_cycles_aggr, u64 total_cycles); struct block_report *block_info__create_report(struct evlist *evlist, - u64 total_cycles); + u64 total_cycles, + int *block_hpps, int nr_hpps, + int *nr_reps); + +void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel, struct perf_env *env, -- cgit v1.2.3 From f787feff69c466dfc6f261c9632627e383b49187 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 2 Feb 2020 22:16:55 +0800 Subject: perf block-info: Support color ops to print block percents in color It would be nice to print the block percents with colors. This patch supports the 'Sampled Cycles%' and 'Avg Cycles%' printed in colors. For example, perf record -b ... perf report --total-cycles or perf report --total-cycles --stdio percent > 5%, colored in red percent > 0.5%, colored in green percent < 0.5%, default color Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200202141655.32053-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/block-info.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index debb8b12cf51..423ec69bda6c 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -181,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt, return block_fmt->width; } +static int color_pct(struct perf_hpp *hpp, int width, double pct) +{ +#ifdef HAVE_SLANG_SUPPORT + if (use_browser) { + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", + width - 1, pct); + } +#endif + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct); +} + static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) @@ -188,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); struct block_info *bi = he->block_info; double ratio = 0.0; - char buf[16]; if (block_fmt->total_cycles) ratio = (double)bi->cycles / (double)block_fmt->total_cycles; - sprintf(buf, "%.2f%%", 100.0 * ratio); - - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + return color_pct(hpp, block_fmt->width, 100.0 * ratio); } static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt, @@ -248,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct block_info *bi = he->block_info; double ratio = 0.0; u64 avg; - char buf[16]; if (block_fmt->block_cycles && bi->num_aggr) { avg = bi->cycles_aggr / bi->num_aggr; ratio = (double)avg / (double)block_fmt->block_cycles; } - sprintf(buf, "%.2f%%", 100.0 * ratio); - - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + return color_pct(hpp, block_fmt->width, 100.0 * ratio); } static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt, @@ -345,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, switch (idx) { case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT: - fmt->entry = block_total_cycles_pct_entry; + fmt->color = block_total_cycles_pct_entry; fmt->cmp = block_info__cmp; fmt->sort = block_total_cycles_pct_sort; break; @@ -353,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, fmt->entry = block_cycles_lbr_entry; break; case PERF_HPP_REPORT__BLOCK_CYCLES_PCT: - fmt->entry = block_cycles_pct_entry; + fmt->color = block_cycles_pct_entry; break; case PERF_HPP_REPORT__BLOCK_AVG_CYCLES: fmt->entry = block_avg_cycles_entry; -- cgit v1.2.3 From bdb04a1abbf92c998f1afb5f00a037f2edaec1f7 Mon Sep 17 00:00:00 2001 From: Tony W Wang-oc Date: Mon, 9 Mar 2020 14:06:30 +0800 Subject: x86/Kconfig: Drop vendor dependency for X86_UMIP Some Centaur family 7 CPUs and Zhaoxin family 7 CPUs support the UMIP feature too. The text size growth which UMIP adds is ~1K and distro kernels enable it anyway so remove the vendor dependency. [ bp: Rewrite commit message. ] Signed-off-by: Tony W Wang-oc Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/1583733990-2587-1-git-send-email-TonyWWang-oc@zhaoxin.com --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index beea77046f9b..cb3633d243cb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1875,7 +1875,6 @@ config X86_SMAP config X86_UMIP def_bool y - depends on CPU_SUP_INTEL || CPU_SUP_AMD prompt "User Mode Instruction Prevention" if EXPERT ---help--- User Mode Instruction Prevention (UMIP) is a security feature in -- cgit v1.2.3 From e7950166e40271c025e0eec348cdf5c63ac734fa Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 10 Mar 2020 15:29:37 +0100 Subject: perf vendor events s390: Add new deflate counters for IBM z15 Add support for new deflate counters: - Counter 247: cycles CPU spent obtaining access to Deflate unit - Counter 252: cycles CPU is using Deflate unit - Counter 264: Increments by one for every DEFLATE CONVERSION CALL instruction executed. - Counter 265: Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2. Also adjust the some crypto counter description to latest documentation. Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20200310142937.32045-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/s390/cf_z15/crypto6.json | 8 +++--- .../perf/pmu-events/arch/s390/cf_z15/extended.json | 30 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index 5e36bc2468d0..c998e4f1d1d2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json @@ -4,27 +4,27 @@ "EventCode": "80", "EventName": "ECC_FUNCTION_COUNT", "BriefDescription": "ECC Function Count", - "PublicDescription": "Long ECC function Count" + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU." }, { "Unit": "CPU-M-CF", "EventCode": "81", "EventName": "ECC_CYCLES_COUNT", "BriefDescription": "ECC Cycles Count", - "PublicDescription": "Long ECC Function cycles count" + "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU." }, { "Unit": "CPU-M-CF", "EventCode": "82", "EventName": "ECC_BLOCKED_FUNCTION_COUNT", "BriefDescription": "Ecc Blocked Function Count", - "PublicDescription": "Long ECC blocked function count" + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU." }, { "Unit": "CPU-M-CF", "EventCode": "83", "EventName": "ECC_BLOCKED_CYCLES_COUNT", "BriefDescription": "ECC Blocked Cycles Count", - "PublicDescription": "Long ECC blocked cycles count" + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 89e070727e1b..2df2e231e9ee 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -25,7 +25,7 @@ "EventCode": "131", "EventName": "DTLB2_HPAGE_WRITES", "BriefDescription": "DTLB2 One-Megabyte Page Writes", - "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page" }, { "Unit": "CPU-M-CF", @@ -356,6 +356,34 @@ "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" }, + { + "Unit": "CPU-M-CF", + "EventCode": "247", + "EventName": "DFLT_ACCESS", + "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit", + "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "252", + "EventName": "DFLT_CYCLES", + "BriefDescription": "Cycles CPU is using Deflate unit", + "PublicDescription": "Cycles CPU is using Deflate unit" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "264", + "EventName": "DFLT_CC", + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "265", + "EventName": "DFLT_CCERROR", + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2" + }, { "Unit": "CPU-M-CF", "EventCode": "448", -- cgit v1.2.3 From 03fe02b113888576dc90c3e918d8e1a76b1ceb63 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:20 -0800 Subject: perf jevents: Support metric constraint A new field "MetricConstraint" is introduced in JSON event list. Extend jevents to parse the field and save the value in metric_constraint. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 19 +++++++++++++------ tools/perf/pmu-events/jevents.h | 2 +- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 079c77b6a2fd..6d0f61ffee42 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -323,7 +323,7 @@ static int print_events_table_entry(void *data, char *name, char *event, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated) + char *deprecated, char *metric_constraint) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -357,6 +357,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); if (deprecated) fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); + if (metric_constraint) + fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint); fprintf(outfp, "},\n"); return 0; @@ -375,6 +377,7 @@ struct event_struct { char *metric_name; char *metric_group; char *deprecated; + char *metric_constraint; }; #define ADD_EVENT_FIELD(field) do { if (field) { \ @@ -422,7 +425,7 @@ static int save_arch_std_events(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated) + char *deprecated, char *metric_constraint) { struct event_struct *es; @@ -486,7 +489,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, char **name, char **long_desc, char **pmu, char **filter, char **perpkg, char **unit, char **metric_expr, char **metric_name, char **metric_group, unsigned long long eventcode, - char **deprecated) + char **deprecated, char **metric_constraint) { /* try to find matching event from arch standard values */ struct event_struct *es; @@ -515,7 +518,7 @@ int json_events(const char *fn, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated), + char *deprecated, char *metric_constraint), void *data) { int err; @@ -545,6 +548,7 @@ int json_events(const char *fn, char *metric_name = NULL; char *metric_group = NULL; char *deprecated = NULL; + char *metric_constraint = NULL; char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; @@ -629,6 +633,8 @@ int json_events(const char *fn, addfield(map, &metric_name, "", "", val); } else if (json_streq(map, field, "MetricGroup")) { addfield(map, &metric_group, "", "", val); + } else if (json_streq(map, field, "MetricConstraint")) { + addfield(map, &metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) @@ -670,13 +676,13 @@ int json_events(const char *fn, &long_desc, &pmu, &filter, &perpkg, &unit, &metric_expr, &metric_name, &metric_group, eventcode, - &deprecated); + &deprecated, &metric_constraint); if (err) goto free_strings; } err = func(data, name, real_event(name, event), desc, long_desc, pmu, unit, perpkg, metric_expr, metric_name, - metric_group, deprecated); + metric_group, deprecated, metric_constraint); free_strings: free(event); free(desc); @@ -691,6 +697,7 @@ free_strings: free(metric_expr); free(metric_name); free(metric_group); + free(metric_constraint); free(arch_std); if (err) diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 5cda49a42143..2afc8304529e 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -8,7 +8,7 @@ int json_events(const char *fn, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated), + char *deprecated, char *metric_constraint), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index caeb577d36c9..53e76d5d5b37 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -18,6 +18,7 @@ struct pmu_event { const char *metric_name; const char *metric_group; const char *deprecated; + const char *metric_constraint; }; /* -- cgit v1.2.3 From f742634ab47f59160a85ddc502418556b21953c2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:21 -0800 Subject: perf metricgroup: Factor out metricgroup__add_metric_weak_group() Factor out metricgroup__add_metric_weak_group() which add metrics into a weak group. The change can improve code readability. Because following patch will introduce a function which add standalone metrics. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-3-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 57 +++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 02aee946b6c1..1cd042cb262e 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -399,13 +399,42 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } +static void metricgroup__add_metric_weak_group(struct strbuf *events, + const char **ids, + int idnum) +{ + bool no_group = false; + int i; + + for (i = 0; i < idnum; i++) { + pr_debug("found event %s\n", ids[i]); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(ids[i], "duration_time")) { + if (i > 0) + strbuf_addf(events, "}:W,"); + strbuf_addf(events, "duration_time"); + no_group = true; + continue; + } + strbuf_addf(events, "%s%s", + i == 0 || no_group ? "{" : ",", + ids[i]); + no_group = false; + } + if (!no_group) + strbuf_addf(events, "}:W"); +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; - int ret = -EINVAL; - int i, j; + int i, ret = -EINVAL; if (!map) return 0; @@ -422,7 +451,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, const char **ids; int idnum; struct egroup *eg; - bool no_group = false; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); @@ -431,27 +459,8 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, continue; if (events->len > 0) strbuf_addf(events, ","); - for (j = 0; j < idnum; j++) { - pr_debug("found event %s\n", ids[j]); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(ids[j], "duration_time")) { - if (j > 0) - strbuf_addf(events, "}:W,"); - strbuf_addf(events, "duration_time"); - no_group = true; - continue; - } - strbuf_addf(events, "%s%s", - j == 0 || no_group ? "{" : ",", - ids[j]); - no_group = false; - } - if (!no_group) - strbuf_addf(events, "}:W"); + + metricgroup__add_metric_weak_group(events, ids, idnum); eg = malloc(sizeof(struct egroup)); if (!eg) { -- cgit v1.2.3 From 2a14c1bf017f48a17c8c0ba26a22625363e77cc7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:22 -0800 Subject: perf util: Factor out sysctl__nmi_watchdog_enabled() The NMI watchdog status is required for metric group constraint examination. Factor out sysctl__nmi_watchdog_enabled() to retrieve the NMI watchdog status. Users may count more than one metric group each time. If so, the NMI watchdog status may be retrieved several times. To reduce the overhead, cache the NMI watchdog status. Replace the NMI watchdog status checking in print_footer() by sysctl__nmi_watchdog_enabled(). Suggested-by: Andi Kleen Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-4-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 6 ++---- tools/perf/util/util.c | 18 ++++++++++++++++++ tools/perf/util/util.h | 2 ++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d89cb0da90f8..76c6052b12e2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -16,6 +16,7 @@ #include #include "cgroup.h" #include +#include "util.h" #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" @@ -1097,7 +1098,6 @@ static void print_footer(struct perf_stat_config *config) { double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; - int n; if (!config->null_run) fprintf(output, "\n"); @@ -1131,9 +1131,7 @@ static void print_footer(struct perf_stat_config *config) } fprintf(output, "\n\n"); - if (config->print_free_counters_hint && - sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && - n > 0) + if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled()) fprintf(output, "Some events weren't counted. Try disabling the NMI watchdog:\n" " echo 0 > /proc/sys/kernel/nmi_watchdog\n" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 969ae560dad9..d707c9624dd9 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -55,6 +55,24 @@ int sysctl__max_stack(void) return sysctl_perf_event_max_stack; } +bool sysctl__nmi_watchdog_enabled(void) +{ + static bool cached; + static bool nmi_watchdog; + int value; + + if (cached) + return nmi_watchdog; + + if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0) + return false; + + nmi_watchdog = (value > 0) ? true : false; + cached = true; + + return nmi_watchdog; +} + bool test_attr__enabled; bool perf_host = true; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9969b8b46f7c..f486fdd3a538 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -29,6 +29,8 @@ size_t hex_width(u64 v); int sysctl__max_stack(void); +bool sysctl__nmi_watchdog_enabled(void); + int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) -- cgit v1.2.3 From ab483d8bc8acb83f0103bc38ef8f2c27d98ffd1b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:23 -0800 Subject: perf metricgroup: Support metric constraint Some metric groups have metric constraints. A metric group can be scheduled as a group only when some constraints are applied. For example, Page_Walks_Utilization has a metric constraint, "NO_NMI_WATCHDOG". When NMI watchdog is disabled, the metric group can be scheduled as a group. Otherwise, splitting the metric group into standalone metrics. Add a new function, metricgroup__has_constraint(), to check whether all constraints are applied. If not, splitting the metric group into standalone metrics. Currently, only one constraint, "NO_NMI_WATCHDOG", is checked. Print a warning for the metric group with the constraint, when NMI WATCHDOG is enabled. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-5-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 54 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 1cd042cb262e..c3a8c701609a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include "util.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -429,6 +431,49 @@ static void metricgroup__add_metric_weak_group(struct strbuf *events, strbuf_addf(events, "}:W"); } +static void metricgroup__add_metric_non_group(struct strbuf *events, + const char **ids, + int idnum) +{ + int i; + + for (i = 0; i < idnum; i++) + strbuf_addf(events, ",%s", ids[i]); +} + +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; + + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } + + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(struct pmu_event *pe) +{ + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { @@ -460,7 +505,10 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, if (events->len > 0) strbuf_addf(events, ","); - metricgroup__add_metric_weak_group(events, ids, idnum); + if (metricgroup__has_constraint(pe)) + metricgroup__add_metric_non_group(events, ids, idnum); + else + metricgroup__add_metric_weak_group(events, ids, idnum); eg = malloc(sizeof(struct egroup)); if (!eg) { @@ -502,6 +550,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, } } free(nlist); + + if (!ret) + metricgroup___watchdog_constraint_hint(NULL, true); + return ret; } -- cgit v1.2.3 From b95fcd2c1c25bd14f55d5d6ab268b3ab00b8a774 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 24 Feb 2020 13:59:24 -0800 Subject: perf vendor events intel: Add NO_NMI_WATCHDOG metric constraint Add NO_NMI_WATCHDOG metric constraint to Page_Walks_Utilization for Sky Lake and Cascade Lake. Committer testing: On a Lenovo T480S, Intel(R) Core(TM) i7-8650U Kaby Lake, that looking at x86's mapfile.csv file is a: $ grep -w skylake tools/perf/pmu-events/arch/x86/mapfile.csv GenuineIntel-6-[4589]E,v24,skylake,core $ So uses the constraint added in this patch in this file: tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json Before: # perf stat -a -M Page_Walks_Utilization sleep 2 Performance counter stats for 'system wide': itlb_misses.walk_pending (0.00%) dtlb_load_misses.walk_pending (0.00%) dtlb_store_misses.walk_pending (0.00%) ept.walk_pending (0.00%) cycles (0.00%) 2.001750514 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog The events in group usually have to be from the same PMU. Try reorganizing the group. # After: # perf stat -a -M Page_Walks_Utilization sleep 2 Splitting metric group Page_Walks_Utilization into standalone metrics. Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog , Performance counter stats for 'system wide': 36,883,102 itlb_misses.walk_pending # 0.1 Page_Walks_Utilization (79.99%) 123,104,146 dtlb_load_misses.walk_pending (80.02%) 13,720,795 dtlb_store_misses.walk_pending (79.99%) 0 ept.walk_pending (79.99%) 1,519,948,400 cycles (80.01%) 2.002170780 seconds time elapsed # Before and after, if we disable the nmi_watchdog we get: # echo 0 > /proc/sys/kernel/nmi_watchdog # perf stat -a -M Page_Walks_Utilization sleep 2 Performance counter stats for 'system wide': 33,721,658 itlb_misses.walk_pending # 0.1 Page_Walks_Utilization 84,070,996 dtlb_load_misses.walk_pending 9,816,071 dtlb_store_misses.walk_pending 0 ept.walk_pending 704,920,899 cycles 2.002331670 seconds time elapsed # More information about the metric expressions: # perf stat -v -a -M Page_Walks_Utilization sleep 2 Using CPUID GenuineIntel-6-8E-A metric expr ( itlb_misses.walk_pending + dtlb_load_misses.walk_pending + dtlb_store_misses.walk_pending + ept.walk_pending ) / ( 2 * cycles ) for Page_Walks_Utilization found event itlb_misses.walk_pending found event dtlb_load_misses.walk_pending found event dtlb_store_misses.walk_pending found event ept.walk_pending found event cycles adding {itlb_misses.walk_pending,dtlb_load_misses.walk_pending,dtlb_store_misses.walk_pending,ept.walk_pending,cycles}:W -> cpu/umask=0x10,(null)=0x186a3,event=0x85/ -> cpu/umask=0x10,(null)=0x1e8483,event=0x8/ -> cpu/umask=0x10,(null)=0x1e8483,event=0x49/ -> cpu/umask=0x10,(null)=0x1e8483,event=0x4f/ itlb_misses.walk_pending: 8085772 16010162799 16010162799 dtlb_load_misses.walk_pending: 28134579 16010162799 16010162799 dtlb_store_misses.walk_pending: 7276535 16010162799 16010162799 ept.walk_pending: 2 16010162799 16010162799 cycles: 315140605 16010162799 16010162799 Performance counter stats for 'system wide': 8,085,772 itlb_misses.walk_pending # 0.1 Page_Walks_Utilization 28,134,579 dtlb_load_misses.walk_pending 7,276,535 dtlb_store_misses.walk_pending 2 ept.walk_pending 315,140,605 cycles 2.002333181 seconds time elapsed # Signed-off-by: Kan Liang Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1582581564-184429-6-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 3 ++- tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json | 3 ++- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index f94653229dd4..a728c6e5119b 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index e7feb60f9fa9..f97e8316ad2f 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 21d7a0c2c2e8..35f5db1786f7 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", -- cgit v1.2.3 From b8fdcfb5a17f1d4fd503caef5c457005a765ecd7 Mon Sep 17 00:00:00 2001 From: disconnect3d Date: Mon, 9 Mar 2020 11:48:53 +0100 Subject: perf map: Fix off by one in strncpy() size argument This patch fixes an off-by-one error in strncpy size argument in tools/perf/util/map.c. The issue is that in: strncmp(filename, "/system/lib/", 11) the passed string literal: "/system/lib/" has 12 bytes (without the NULL byte) and the passed size argument is 11. As a result, the logic won't match the ending "/" byte and will pass filepaths that are stored in other directories e.g. "/system/libmalicious/bin" or just "/system/libmalicious". This functionality seems to be present only on Android. I assume the /system/ directory is only writable by the root user, so I don't think this bug has much (or any) security impact. Fixes: eca818369996 ("perf tools: Add automatic remapping of Android libraries") Signed-off-by: disconnect3d Cc: Alexander Shishkin Cc: Changbin Du Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Michael Lentine Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200309104855.3775-1-dominik.b.czarnota@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 95428511300d..b342f744b1fc 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (!strncmp(filename, "/system/lib/", 12)) { char *ndk, *app; const char *arch; size_t ndk_length; -- cgit v1.2.3 From bdadd647cbf7b6e7f5d5891bd4e711292793cf23 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Mar 2020 16:53:41 -0300 Subject: perf map: Use strstarts() to look for Android libraries And add the '/' to avoid looking at things like "/system/libsomething", when all we want to know if it is like "/system/lib/something", i.e. if it is in that system library dir. Using strstarts() avoids off-by-one errors like recently fixed in this file. Since this adds the '/' I separated this patch, another patch will make this consistent by removing other strncmp(str, prefix, manually calculated prefix length) usage. Reported-by: Dominik Czarnota Acked-by: Dominik Czarnota Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: Link: http://lore.kernel.org/lkml/CABEVAa0_q-uC0vrrqpkqRHy_9RLOSXOJxizMLm1n5faHRy2AeA@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b342f744b1fc..53d96611e6a6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename) static inline int is_android_lib(const char *filename) { - return !strncmp(filename, "/data/app-lib", 13) || - !strncmp(filename, "/system/lib", 11); + return strstarts(filename, "/data/app-lib/") || + strstarts(filename, "/system/lib/"); } static inline bool replace_android_lib(const char *filename, char *newfilename) @@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) app_abi_length = strlen(app_abi); - if (!strncmp(filename, "/data/app-lib", 13)) { + if (strstarts(filename, "/data/app-lib/")) { char *apk_path; if (!app_abi_length) @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 12)) { + if (strstarts(filename, "/system/lib/")) { char *ndk, *app; const char *arch; size_t ndk_length; -- cgit v1.2.3 From d01751563caf0dec7be36f81de77cc0197b77e59 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:07 +0800 Subject: perf cs-etm: Swap packets for instruction samples If use option '--itrace=iNNN' with Arm CoreSight trace data, perf tool fails inject instruction samples; the root cause is the packets are only swapped for branch samples and last branches but not for instruction samples, so the new coming packets cannot be properly handled for only synthesizing instruction samples. To fix this issue, this patch refactors the code with a new function cs_etm__packet_swap() which is used to swap packets and adds the condition for instruction samples. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index b3b3fe3ea345..294b09cfb034 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -363,6 +363,23 @@ struct cs_etm_packet_queue return NULL; } +static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, + struct cs_etm_traceid_queue *tidq) +{ + struct cs_etm_packet *tmp; + + if (etm->sample_branches || etm->synth_opts.last_branch || + etm->sample_instructions) { + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; + } +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -1342,7 +1359,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) { struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; int ret; u8 trace_chan_id = tidq->trace_chan_id; u64 instrs_executed = tidq->packet->instr_count; @@ -1406,15 +1422,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); return 0; } @@ -1443,7 +1451,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, { int err = 0; struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; /* Handle start tracing packet */ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) @@ -1478,15 +1485,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } swap_packet: - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); return err; } -- cgit v1.2.3 From f1410028c762893daf353765112cf6797e4442fa Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:08 +0800 Subject: perf cs-etm: Continuously record last branch Every time synthesize instruction sample, the last branch recording will be reset. This is fine if the instruction period is big enough, for example if use the option '--itrace=i100000', the last branch array is reset for every sample with 100000 instructions per period; before generate the next instruction sample, there has the sufficient packets coming to fill the last branch array. On the other hand, if set a very small period, the packets will be significantly reduced between two continuous instruction samples, thus the last branch array is almost empty for new instruction sample by frequently resetting. To allow the last branches to work properly for any instruction periods, this patch avoids to reset the last branch for every instruction sample and only reset it when flush the trace data. The last branches will be reset only for two cases, one is for trace starting, another case is for discontinuous trace; other cases can keep recording last branches for continuous instruction samples. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 294b09cfb034..2c4156c5ed09 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1170,9 +1170,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); - if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(tidq); - return ret; } @@ -1487,6 +1484,10 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, swap_packet: cs_etm__packet_swap(etm, tidq); + /* Reset last branches after flush the trace */ + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(tidq); + return err; } -- cgit v1.2.3 From c9f5baa136777b2c982f6f7a90c9da69a88be148 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:09 +0800 Subject: perf cs-etm: Correct synthesizing instruction samples When 'etm->instructions_sample_period' is less than 'tidq->period_instructions', the function cs_etm__sample() cannot handle this case properly with its logic. Let's see below flow as an example: - If we set itrace option '--itrace=i4', then function cs_etm__sample() has variables with initialized values: tidq->period_instructions = 0 etm->instructions_sample_period = 4 - When the first packet is coming: packet->instr_count = 10; the number of instructions executed in this packet is 10, thus update period_instructions as below: tidq->period_instructions = 0 + 10 = 10 instrs_over = 10 - 4 = 6 offset = 10 - 6 - 1 = 3 tidq->period_instructions = instrs_over = 6 - When the second packet is coming: packet->instr_count = 10; in the second pass, assume 10 instructions in the trace sample again: tidq->period_instructions = 6 + 10 = 16 instrs_over = 16 - 4 = 12 offset = 10 - 12 - 1 = -3 -> the negative value tidq->period_instructions = instrs_over = 12 So after handle these two packets, there have below issues: The first issue is that cs_etm__instr_addr() returns the address within the current trace sample of the instruction related to offset, so the offset is supposed to be always unsigned value. But in fact, function cs_etm__sample() might calculate a negative offset value (in handling the second packet, the offset is -3) and pass to cs_etm__instr_addr() with u64 type with a big positive integer. The second issue is it only synthesizes 2 samples for sample period = 4. In theory, every packet has 10 instructions so the two packets have total 20 instructions, 20 instructions should generate 5 samples (4 x 5 = 20). This is because cs_etm__sample() only calls once cs_etm__synth_instruction_sample() to generate instruction sample per range packet. This patch fixes the logic in function cs_etm__sample(); the basic idea for handling coming packet is: - To synthesize the first instruction sample, it combines the left instructions from the previous packet and the head of the new packet; then generate continuous samples with sample period; - At the tail of the new packet, if it has the rest instructions, these instructions will be left for the sequential sample. Suggested-by: Mike Leach Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 87 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2c4156c5ed09..1ddcc67e13dd 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1358,9 +1358,12 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_auxtrace *etm = etmq->etm; int ret; u8 trace_chan_id = tidq->trace_chan_id; - u64 instrs_executed = tidq->packet->instr_count; + u64 instrs_prev; - tidq->period_instructions += instrs_executed; + /* Get instructions remainder from previous packet */ + instrs_prev = tidq->period_instructions; + + tidq->period_instructions += tidq->packet->instr_count; /* * Record a branch when the last instruction in @@ -1378,26 +1381,76 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, * TODO: allow period to be defined in cycles and clock time */ - /* Get number of instructions executed after the sample point */ - u64 instrs_over = tidq->period_instructions - - etm->instructions_sample_period; + /* + * Below diagram demonstrates the instruction samples + * generation flows: + * + * Instrs Instrs Instrs Instrs + * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) + * | | | | + * V V V V + * -------------------------------------------------- + * ^ ^ + * | | + * Period Period + * instructions(Pi) instructions(Pi') + * + * | | + * \---------------- -----------------/ + * V + * tidq->packet->instr_count + * + * Instrs Sample(n...) are the synthesised samples occurring + * every etm->instructions_sample_period instructions - as + * defined on the perf command line. Sample(n) is being the + * last sample before the current etm packet, n+1 to n+3 + * samples are generated from the current etm packet. + * + * tidq->packet->instr_count represents the number of + * instructions in the current etm packet. + * + * Period instructions (Pi) contains the the number of + * instructions executed after the sample point(n) from the + * previous etm packet. This will always be less than + * etm->instructions_sample_period. + * + * When generate new samples, it combines with two parts + * instructions, one is the tail of the old packet and another + * is the head of the new coming packet, to generate + * sample(n+1); sample(n+2) and sample(n+3) consume the + * instructions with sample period. After sample(n+3), the rest + * instructions will be used by later packet and it is assigned + * to tidq->period_instructions for next round calculation. + */ /* - * Calculate the address of the sampled instruction (-1 as - * sample is reported as though instruction has just been - * executed, but PC has not advanced to next instruction) + * Get the initial offset into the current packet instructions; + * entry conditions ensure that instrs_prev is less than + * etm->instructions_sample_period. */ - u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, - tidq->packet, offset); + u64 offset = etm->instructions_sample_period - instrs_prev; + u64 addr; - ret = cs_etm__synth_instruction_sample( - etmq, tidq, addr, etm->instructions_sample_period); - if (ret) - return ret; + while (tidq->period_instructions >= + etm->instructions_sample_period) { + /* + * Calculate the address of the sampled instruction (-1 + * as sample is reported as though instruction has just + * been executed, but PC has not advanced to next + * instruction) + */ + addr = cs_etm__instr_addr(etmq, trace_chan_id, + tidq->packet, offset - 1); + ret = cs_etm__synth_instruction_sample( + etmq, tidq, addr, + etm->instructions_sample_period); + if (ret) + return ret; - /* Carry remaining instructions into next sample period */ - tidq->period_instructions = instrs_over; + offset += etm->instructions_sample_period; + tidq->period_instructions -= + etm->instructions_sample_period; + } } if (etm->sample_branches) { -- cgit v1.2.3 From 695378b567df1fe6631c6684fcc9eeb4257df70f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:10 +0800 Subject: perf cs-etm: Optimize copying last branches If an instruction range packet can generate multiple instruction samples, these samples share the same last branches; it's not necessary to copy the same last branches repeatedly for these samples within the same packet. This patch moves out the last branches copying from function cs_etm__synth_instruction_sample(), and execute it prior to generating instruction samples. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1ddcc67e13dd..87d9943177bc 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1151,10 +1151,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); - if (etm->synth_opts.last_branch) { - cs_etm__copy_last_branch_rb(etmq, tidq); + if (etm->synth_opts.last_branch) sample.branch_stack = tidq->last_branch; - } if (etm->synth_opts.inject) { ret = cs_etm__inject_event(event, &sample, @@ -1431,6 +1429,10 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, u64 offset = etm->instructions_sample_period - instrs_prev; u64 addr; + /* Prepare last branches for instruction sample */ + if (etm->synth_opts.last_branch) + cs_etm__copy_last_branch_rb(etmq, tidq); + while (tidq->period_instructions >= etm->instructions_sample_period) { /* @@ -1508,6 +1510,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, if (etmq->etm->synth_opts.last_branch && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1515,7 +1522,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, * Use the address of the end of the last reported execution * range */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, @@ -1560,11 +1567,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, */ if (etmq->etm->synth_opts.last_branch && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Use the address of the end of the last reported execution * range. */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, -- cgit v1.2.3 From bc010dd657ee0309276c88ab828b9ad156f75b31 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 Feb 2020 10:18:11 +0800 Subject: perf cs-etm: Fix unsigned variable comparison to zero The variable 'offset' in function cs_etm__sample() is u64 type, it's not appropriate to check it with 'while (offset > 0)'; this patch changes to 'while (offset)'. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight ml Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200219021811.20067-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 87d9943177bc..62d2f9b9ce1b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -962,7 +962,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, if (packet->isa == CS_ETM_ISA_T32) { u64 addr = packet->start_addr; - while (offset > 0) { + while (offset) { addr += cs_etm__t32_instr_size(etmq, trace_chan_id, addr); offset--; -- cgit v1.2.3 From 0c2d041232411c8124136c9497c0e352dcf18baa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 10 Mar 2020 22:21:10 -0700 Subject: perf doc: Set man page date to last git commit Currently the man page dates reflect the date the man pages were built. This patch adjusts the date so that the date is when then man page last had a commit against it. The date is generated using 'git log'. Committer testing: $ git log -1 --pretty="format:%cd" --date=short tools/perf/Documentation/perf-top.txt 2020-01-14 Before: rm -rf /tmp/build/perf mkdir -p /tmp/build/perf make -C tools/perf O=/tmp/build/perf/ install $ date Wed 11 Mar 2020 10:21:19 AM -03 $ man perf-top | tail -1 perf 03/11/2020 PERF-TOP(1) $ After: rm -rf /tmp/build/perf mkdir -p /tmp/build/perf make -C tools/perf O=/tmp/build/perf/ install $ date $ date Wed 11 Mar 2020 10:24:06 AM -03 $ man perf-top | tail -1 perf 2020-01-14 PERF-TOP(1) $ Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Mark Rutland Cc: Masanari Iida Cc: Mukesh Ojha Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20200311052110.23132-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index adc5a7e44b98..31824d5269cc 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -295,7 +295,10 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ + -aperf_date=$(shell git log -1 --pretty="format:%cd" \ + --date=short $<) \ + -o $@+ $< && \ mv $@+ $@ XSLT = docbook.xsl -- cgit v1.2.3 From 97256d1a2a62390077bc72009628af5be44fd8a9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Mar 2020 14:20:32 +0200 Subject: perf intel-pt: Rename intel-pt.txt and put it in man page format Make the Intel PT documentation into a man page. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200311122034.3697-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 991 --------------------------- tools/perf/Documentation/perf-intel-pt.txt | 1000 ++++++++++++++++++++++++++++ 2 files changed, 1000 insertions(+), 991 deletions(-) delete mode 100644 tools/perf/Documentation/intel-pt.txt create mode 100644 tools/perf/Documentation/perf-intel-pt.txt diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt deleted file mode 100644 index 2cf2d9e9d0da..000000000000 --- a/tools/perf/Documentation/intel-pt.txt +++ /dev/null @@ -1,991 +0,0 @@ -Intel Processor Trace -===================== - -Overview -======== - -Intel Processor Trace (Intel PT) is an extension of Intel Architecture that -collects information about software execution such as control flow, execution -modes and timings and formats it into highly compressed binary packets. -Technical details are documented in the Intel 64 and IA-32 Architectures -Software Developer Manuals, Chapter 36 Intel Processor Trace. - -Intel PT is first supported in Intel Core M and 5th generation Intel Core -processors that are based on the Intel micro-architecture code name Broadwell. - -Trace data is collected by 'perf record' and stored within the perf.data file. -See below for options to 'perf record'. - -Trace data must be 'decoded' which involves walking the object code and matching -the trace data packets. For example a TNT packet only tells whether a -conditional branch was taken or not taken, so to make use of that packet the -decoder must know precisely which instruction was being executed. - -Decoding is done on-the-fly. The decoder outputs samples in the same format as -samples output by perf hardware events, for example as though the "instructions" -or "branches" events had been recorded. Presently 3 tools support this: -'perf script', 'perf report' and 'perf inject'. See below for more information -on using those tools. - -The main distinguishing feature of Intel PT is that the decoder can determine -the exact flow of software execution. Intel PT can be used to understand why -and how did software get to a certain point, or behave a certain way. The -software does not have to be recompiled, so Intel PT works with debug or release -builds, however the executed images are needed - which makes use in JIT-compiled -environments, or with self-modified code, a challenge. Also symbols need to be -provided to make sense of addresses. - -A limitation of Intel PT is that it produces huge amounts of trace data -(hundreds of megabytes per second per core) which takes a long time to decode, -for example two or three orders of magnitude longer than it took to collect. -Another limitation is the performance impact of tracing, something that will -vary depending on the use-case and architecture. - - -Quickstart -========== - -It is important to start small. That is because it is easy to capture vastly -more data than can possibly be processed. - -The simplest thing to do with Intel PT is userspace profiling of small programs. -Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: - - perf record -e intel_pt//u ls - -And profiled with 'perf report' e.g. - - perf report - -To also trace kernel space presents a problem, namely kernel self-modifying -code. A fairly good kernel image is available in /proc/kcore but to get an -accurate image a copy of /proc/kcore needs to be made under the same conditions -as the data capture. A script perf-with-kcore can do that, but beware that the -script makes use of 'sudo' to copy /proc/kcore. If you have perf installed -locally from the source tree you can do: - - ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls - -which will create a directory named 'pt_ls' and put the perf.data file and -copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use -'perf report' becomes: - - ~/libexec/perf-core/perf-with-kcore report pt_ls - -Because samples are synthesized after-the-fact, the sampling period can be -selected for reporting. e.g. sample every microsecond - - ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge - -See the sections below for more information about the --itrace option. - -Beware the smaller the period, the more samples that are produced, and the -longer it takes to process them. - -Also note that the coarseness of Intel PT timing information will start to -distort the statistical value of the sampling as the sampling period becomes -smaller. - -To represent software control flow, "branches" samples are produced. By default -a branch sample is synthesized for every single branch. To get an idea what -data is available you can use the 'perf script' tool with all itrace sampling -options, which will list all the samples. - - perf record -e intel_pt//u ls - perf script --itrace=ibxwpe - -An interesting field that is not printed by default is 'flags' which can be -displayed as follows: - - perf script --itrace=ibxwpe -F+flags - -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. - -Another interesting field that is not printed by default is 'ipc' which can be -displayed as follows: - - perf script --itrace=be -F+ipc - -There are two ways that instructions-per-cycle (IPC) can be calculated depending -on the recording. - -If the 'cyc' config term (see config terms section below) was used, then IPC is -calculated using the cycle count from CYC packets, otherwise MTC packets are -used - refer to the 'mtc' config term. When MTC is used, however, the values -are less accurate because the timing is less accurate. - -Because Intel PT does not update the cycle count on every branch or instruction, -the values will often be zero. When there are values, they will be the number -of instructions and number of cycles since the last update, and thus represent -the average IPC since the last IPC for that event type. Note IPC for "branches" -events is calculated separately from IPC for "instructions" events. - -Also note that the IPC instruction count may or may not include the current -instruction. If the cycle count is associated with an asynchronous branch -(e.g. page fault or interrupt), then the instruction count does not include the -current instruction, otherwise it does. That is consistent with whether or not -that instruction has retired when the cycle count is updated. - -Another note, in the case of "branches" events, non-taken branches are not -presently sampled, so IPC values for them do not appear e.g. a CYC packet with a -TNT packet that starts with a non-taken branch. To see every possible IPC -value, "instructions" events can be used e.g. --itrace=i0ns - -While it is possible to create scripts to analyze the data, an alternative -approach is available to export the data to a sqlite or postgresql database. -Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script exported-sql-viewer.py for an example of using the database. - -There is also script intel-pt-events.py which provides an example of how to -unpack the raw data for power events and PTWRITE. - -As mentioned above, it is easy to capture too much data. One way to limit the -data captured is to use 'snapshot' mode which is explained further below. -Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. - -Another problem that will be experienced is decoder errors. They can be caused -by inability to access the executed image, self-modified or JIT-ed code, or the -inability to match side-band information (such as context switches and mmaps) -which results in the decoder not knowing what code was executed. - -There is also the problem of perf not being able to copy the data fast enough, -resulting in data lost because the buffer was full. See 'Buffer handling' below -for more details. - - -perf record -=========== - -new event ---------- - -The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are -selected by providing the PMU name followed by the "config" separated by slashes. -An enhancement has been made to allow default "config" e.g. the option - - -e intel_pt// - -will use a default config value. Currently that is the same as - - -e intel_pt/tsc,noretcomp=0/ - -which is the same as - - -e intel_pt/tsc=1,noretcomp=0/ - -Note there are now new config terms - see section 'config terms' further below. - -The config terms are listed in /sys/devices/intel_pt/format. They are bit -fields within the config member of the struct perf_event_attr which is -passed to the kernel by the perf_event_open system call. They correspond to bit -fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: - - $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* - /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 - /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 - /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 - /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 - /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 - /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 - /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 - -Note that the default config must be overridden for each term i.e. - - -e intel_pt/noretcomp=0/ - -is the same as: - - -e intel_pt/tsc=1,noretcomp=0/ - -So, to disable TSC packets use: - - -e intel_pt/tsc=0/ - -It is also possible to specify the config value explicitly: - - -e intel_pt/config=0x400/ - -Note that, as with all events, the event is suffixed with event modifiers: - - u userspace - k kernel - h hypervisor - G guest - H host - p precise ip - -'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. -'p' is also not relevant to Intel PT. So only options 'u' and 'k' are -meaningful for Intel PT. - -perf_event_attr is displayed if the -vv option is used e.g. - - ------------------------------------------------------------ - perf_event_attr: - type 6 - size 112 - config 0x400 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - enable_on_exec 1 - sample_id_all 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - - -config terms ------------- - -The June 2015 version of Intel 64 and IA-32 Architectures Software Developer -Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. -Some of the features are reflect in new config terms. All the config terms are -described below. - -tsc Always supported. Produces TSC timestamp packets to provide - timing information. In some cases it is possible to decode - without timing information, for example a per-thread context - that does not overlap executable memory maps. - - The default config selects tsc (i.e. tsc=1). - -noretcomp Always supported. Disables "return compression" so a TIP packet - is produced when a function returns. Causes more packets to be - produced but might make decoding more reliable. - - The default config does not select noretcomp (i.e. noretcomp=0). - -psb_period Allows the frequency of PSB packets to be specified. - - The PSB packet is a synchronization packet that provides a - starting point for decoding or recovery from errors. - - Support for psb_period is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc - - which contains "1" if the feature is supported and "0" - otherwise. - - Valid values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_periods - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The psb_period value is converted to the approximate number of - trace bytes between PSB packets as: - - 2 ^ (value + 11) - - e.g. value 3 means 16KiB bytes between PSBs - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/psb_period=15/u uname - Invalid psb_period for intel_pt. Valid values are: 0-5 - - If MTC packets are selected, the default config selects a value - of 3 (i.e. psb_period=3) or the nearest lower value that is - supported (0 is always supported). Otherwise the default is 0. - - If decoding is expected to be reliable and the buffer is large - then a large PSB period can be used. - - Because a TSC packet is produced with PSB, the PSB period can - also affect the granularity to timing information in the absence - of MTC or CYC. - -mtc Produces MTC timing packets. - - MTC packets provide finer grain timestamp information than TSC - packets. MTC packets record time using the hardware crystal - clock (CTC) which is related to TSC packets using a TMA packet. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/mtc - - which contains "1" if the feature is supported and - "0" otherwise. - - The frequency of MTC packets can also be specified - see - mtc_period below. - -mtc_period Specifies how frequently MTC packets are produced - see mtc - above for how to determine if MTC packets are supported. - - Valid values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/mtc_periods - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The mtc_period value is converted to the MTC frequency as: - - CTC-frequency / (2 ^ value) - - e.g. value 3 means one eighth of CTC-frequency - - Where CTC is the hardware crystal clock, the frequency of which - can be related to TSC via values provided in cpuid leaf 0x15. - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/mtc_period=15/u uname - Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 - - The default value is 3 or the nearest lower value - that is supported (0 is always supported). - -cyc Produces CYC timing packets. - - CYC packets provide even finer grain timestamp information than - MTC and TSC packets. A CYC packet contains the number of CPU - cycles since the last CYC packet. Unlike MTC and TSC packets, - CYC packets are only sent when another packet is also sent. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc - - which contains "1" if the feature is supported and - "0" otherwise. - - The number of CYC packets produced can be reduced by specifying - a threshold - see cyc_thresh below. - -cyc_thresh Specifies how frequently CYC packets are produced - see cyc - above for how to determine if CYC packets are supported. - - Valid cyc_thresh values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The cyc_thresh value represents the minimum number of CPU cycles - that must have passed before a CYC packet can be sent. The - number of CPU cycles is: - - 2 ^ (value - 1) - - e.g. value 4 means 8 CPU cycles must pass before a CYC packet - can be sent. Note a CYC packet is still only sent when another - packet is sent, not at, e.g. every 8 CPU cycles. - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname - Invalid cyc_thresh for intel_pt. Valid values are: 0-12 - - CYC packets are not requested by default. - -pt Specifies pass-through which enables the 'branch' config term. - - The default config selects 'pt' if it is available, so a user will - never need to specify this term. - -branch Enable branch tracing. Branch tracing is enabled by default so to - disable branch tracing use 'branch=0'. - - The default config selects 'branch' if it is available. - -ptw Enable PTWRITE packets which are produced when a ptwrite instruction - is executed. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/ptwrite - - which contains "1" if the feature is supported and - "0" otherwise. - -fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet - provides the address of the ptwrite instruction. In the absence of - fup_on_ptw, the decoder will use the address of the previous branch - if branch tracing is enabled, otherwise the address will be zero. - Note that fup_on_ptw will work even when branch tracing is disabled. - -pwr_evt Enable power events. The power events provide information about - changes to the CPU C-state. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/power_event_trace - - which contains "1" if the feature is supported and - "0" otherwise. - - -AUX area sampling option ------------------------- - -To select Intel PT "sampling" the AUX area sampling option can be used: - - --aux-sample - -Optionally it can be followed by the sample size in bytes e.g. - - --aux-sample=8192 - -In addition, the Intel PT event to sample must be defined e.g. - - -e intel_pt//u - -Samples on other events will be created containing Intel PT data e.g. the -following will create Intel PT samples on the branch-misses event, note the -events must be grouped using {}: - - perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' - -An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to -events. In this case, the grouping is implied e.g. - - perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u - -is the same as: - - perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' - -but allows for also using an address filter e.g.: - - perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls - -It is important to select a sample size that is big enough to contain at least -one PSB packet. If not a warning will be displayed: - - Intel PT sample size (%zu) may be too small for PSB period (%zu) - -The calculation used for that is: if sample_size <= psb_period + 256 display the -warning. When sampling is used, psb_period defaults to 0 (2KiB). - -The default sample size is 4KiB. - -The sample size is passed in aux_sample_size in struct perf_event_attr. The -sample size is limited by the maximum event size which is 64KiB. It is -difficult to know how big the event might be without the trace sample attached, -but the tool validates that the sample size is not greater than 60KiB. - - -new snapshot option -------------------- - -The difference between full trace and snapshot from the kernel's perspective is -that in full trace we don't overwrite trace data that the user hasn't collected -yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let -the trace run and overwrite older data in the buffer so that whenever something -interesting happens, we can stop it and grab a snapshot of what was going on -around that interesting moment. - -To select snapshot mode a new option has been added: - - -S - -Optionally it can be followed by the snapshot size e.g. - - -S0x100000 - -The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size -nor snapshot size is specified, then the default is 4MiB for privileged users -(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. -If an unprivileged user does not specify mmap pages, the mmap pages will be -reduced as described in the 'new auxtrace mmap size option' section below. - -The snapshot size is displayed if the option -vv is used e.g. - - Intel PT snapshot size: %zu - - -new auxtrace mmap size option ---------------------------- - -Intel PT buffer size is specified by an addition to the -m option e.g. - - -m,16 - -selects a buffer size of 16 pages i.e. 64KiB. - -Note that the existing functionality of -m is unchanged. The auxtrace mmap size -is specified by the optional addition of a comma and the value. - -The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users -(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. -If an unprivileged user does not specify mmap pages, the mmap pages will be -reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the -user is likely to get an error as they exceed their mlock limit (Max locked -memory as shown in /proc/self/limits). Note that perf does not count the first -512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu -against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus -their mlock limit (which defaults to 64KiB but is not multiplied by the number -of cpus). - -In full-trace mode, powers of two are allowed for buffer size, with a minimum -size of 2 pages. In snapshot mode or sampling mode, it is the same but the -minimum size is 1 page. - -The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. - - mmap length 528384 - auxtrace mmap length 4198400 - - -Intel PT modes of operation ---------------------------- - -Intel PT can be used in 2 modes: - full-trace mode - sample mode - snapshot mode - -Full-trace mode traces continuously e.g. - - perf record -e intel_pt//u uname - -Sample mode attaches a Intel PT sample to other events e.g. - - perf record --aux-sample -e intel_pt//u -e branch-misses:u - -Snapshot mode captures the available data when a signal is sent e.g. - - perf record -v -e intel_pt//u -S ./loopy 1000000000 & - [1] 11435 - kill -USR2 11435 - Recording AUX area tracing snapshot - -Note that the signal sent is SIGUSR2. -Note that "Recording AUX area tracing snapshot" is displayed because the -v -option is used. - -The 2 modes cannot be used together. - - -Buffer handling ---------------- - -There may be buffer limitations (i.e. single ToPa entry) which means that actual -buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to -provide other sizes, and in particular an arbitrarily large size, multiple -buffers are logically concatenated. However an interrupt must be used to switch -between buffers. That has two potential problems: - a) the interrupt may not be handled in time so that the current buffer - becomes full and some trace data is lost. - b) the interrupts may slow the system and affect the performance - results. - -If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event -which the tools report as an error. - -In full-trace mode, the driver waits for data to be copied out before allowing -the (logical) buffer to wrap-around. If data is not copied out quickly enough, -again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to -wait, the intel_pt event gets disabled. Because it is difficult to know when -that happens, perf tools always re-enable the intel_pt event after copying out -data. - - -Intel PT and build ids ----------------------- - -By default "perf record" post-processes the event stream to find all build ids -for executables for all addresses sampled. Deliberately, Intel PT is not -decoded for that purpose (it would take too long). Instead the build ids for -all executables encountered (due to mmap, comm or task events) are included -in the perf.data file. - -To see buildids included in the perf.data file use the command: - - perf buildid-list - -If the perf.data file contains Intel PT data, that is the same as: - - perf buildid-list --with-hits - - -Snapshot mode and event disabling ---------------------------------- - -In order to make a snapshot, the intel_pt event is disabled using an IOCTL, -namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the -collection of side-band information. In order to prevent that, a dummy -software event has been introduced that permits tracking events (like mmaps) to -continue to be recorded while intel_pt is disabled. That is important to ensure -there is complete side-band information to allow the decoding of subsequent -snapshots. - -A test has been created for that. To find the test: - - perf test list - ... - 23: Test using a dummy software event to keep tracking - -To run the test: - - perf test 23 - 23: Test using a dummy software event to keep tracking : Ok - - -perf record modes (nothing new here) ------------------------------------- - -perf record essentially operates in one of three modes: - per thread - per cpu - workload only - -"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a -workload). -"per cpu" is selected by -C or -a. -"workload only" mode is selected by not using the other options but providing a -command to run (i.e. the workload). - -In per-thread mode an exact list of threads is traced. There is no inheritance. -Each thread has its own event buffer. - -In per-cpu mode all processes (or processes from the selected cgroup i.e. -G -option, or processes selected with -p or -u) are traced. Each cpu has its own -buffer. Inheritance is allowed. - -In workload-only mode, the workload is traced but with per-cpu buffers. -Inheritance is allowed. Note that you can now trace a workload in per-thread -mode by using the --per-thread option. - - -Privileged vs non-privileged users ----------------------------------- - -Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users -have memory limits imposed upon them. That affects what buffer sizes they can -have as outlined above. - -The v4.2 kernel introduced support for a context switch metadata event, -PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes -are scheduled out and in, just not by whom, which is left for the -PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, -which in turn requires CAP_SYS_ADMIN. - -Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context -switches") commit, that introduces these metadata events for further info. - -When working with kernels < v4.2, the following considerations must be taken, -as the sched:sched_switch tracepoints will be used to receive such information: - -Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are -not permitted to use tracepoints which means there is insufficient side-band -information to decode Intel PT in per-cpu mode, and potentially workload-only -mode too if the workload creates new processes. - -Note also, that to use tracepoints, read-access to debugfs is required. So if -debugfs is not mounted or the user does not have read-access, it will again not -be possible to decode Intel PT in per-cpu mode. - - -sched_switch tracepoint ------------------------ - -The sched_switch tracepoint is used to provide side-band data for Intel PT -decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't -available. - -The sched_switch events are automatically added. e.g. the second event shown -below: - - $ perf record -vv -e intel_pt//u uname - ------------------------------------------------------------ - perf_event_attr: - type 6 - size 112 - config 0x400 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - enable_on_exec 1 - sample_id_all 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - perf_event_attr: - type 2 - size 112 - config 0x108 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER - read_format ID - inherit 1 - sample_id_all 1 - exclude_guest 1 - ------------------------------------------------------------ - sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - perf_event_attr: - type 1 - size 112 - config 0x9 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - mmap 1 - comm 1 - enable_on_exec 1 - task 1 - sample_id_all 1 - mmap2 1 - comm_exec 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - mmap size 528384B - AUX area mmap length 4194304 - perf event ring buffer mmapped per cpu - Synthesizing auxtrace information - Linux - [ perf record: Woken up 1 times to write data ] - [ perf record: Captured and wrote 0.042 MB perf.data ] - -Note, the sched_switch event is only added if the user is permitted to use it -and only in per-cpu mode. - -Note also, the sched_switch event is only added if TSC packets are requested. -That is because, in the absence of timing information, the sched_switch events -cannot be matched against the Intel PT trace. - - -perf script -=========== - -By default, perf script will decode trace data found in the perf.data file. -This can be further controlled by new option --itrace. - - -New --itrace option -------------------- - -Having no option is the same as - - --itrace - -which, in turn, is the same as - - --itrace=cepwx - -The letters are: - - i synthesize "instructions" events - b synthesize "branches" events - x synthesize "transactions" events - w synthesize "ptwrite" events - p synthesize "power" events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - e synthesize tracing error events - d create a debug log - g synthesize a call chain (use with i or x) - l synthesize last branch entries (use with i or x) - s skip initial number of events - -"Instructions" events look like they were recorded by "perf record -e -instructions". - -"Branches" events look like they were recorded by "perf record -e branches". "c" -and "r" can be combined to get calls and returns. - -"Transactions" events correspond to the start or end of transactions. The -'flags' field can be used in perf script to determine whether the event is a -tranasaction start, commit or abort. - -Note that "instructions", "branches" and "transactions" events depend on code -flow packets which can be disabled by using the config term "branch=0". Refer -to the config terms section above. - -"ptwrite" events record the payload of the ptwrite instruction and whether -"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are -recorded only if the "ptw" config term was used. Refer to the config terms -section above. perf script "synth" field displays "ptwrite" information like -this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was -used. - -"Power" events correspond to power event packets and CBR (core-to-bus ratio) -packets. While CBR packets are always recorded when tracing is enabled, power -event packets are recorded only if the "pwr_evt" config term was used. Refer to -the config terms section above. The power events record information about -C-state changes, whereas CBR is indicative of CPU frequency. perf script -"event,synth" fields display information like this: - cbr: cbr: 22 freq: 2189 MHz (200%) - mwait: hints: 0x60 extensions: 0x1 - pwre: hw: 0 cstate: 2 sub-cstate: 0 - exstop: ip: 1 - pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 -Where: - "cbr" includes the frequency and the percentage of maximum non-turbo - "mwait" shows mwait hints and extensions - "pwre" shows C-state transitions (to a C-state deeper than C0) and - whether initiated by hardware - "exstop" indicates execution stopped and whether the IP was recorded - exactly, - "pwrx" indicates return to C0 -For more details refer to the Intel 64 and IA-32 Architectures Software -Developer Manuals. - -Error events show where the decoder lost the trace. Error events -are quite important. Users must know if what they are seeing is a complete -picture or not. - -The "d" option will cause the creation of a file "intel_pt.log" containing all -decoded packets and instructions. Note that this option slows down the decoder -and that the resulting file may be very large. - -In addition, the period of the "instructions" event can be specified. e.g. - - --itrace=i10us - -sets the period to 10us i.e. one instruction sample is synthesized for each 10 -microseconds of trace. Alternatives to "us" are "ms" (milliseconds), -"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). - -"ms", "us" and "ns" are converted to TSC ticks. - -The timing information included with Intel PT does not give the time of every -instruction. Consequently, for the purpose of sampling, the decoder estimates -the time since the last timing packet based on 1 tick per instruction. The time -on the sample is *not* adjusted and reflects the last known value of TSC. - -For Intel PT, the default period is 100us. - -Setting it to a zero period means "as often as possible". - -In the case of Intel PT that is the same as a period of 1 and a unit of -'instructions' (i.e. --itrace=i1i). - -Also the call chain size (default 16, max. 1024) for instructions or -transactions events can be specified. e.g. - - --itrace=ig32 - --itrace=xg32 - -Also the number of last branch entries (default 64, max. 1024) for instructions or -transactions events can be specified. e.g. - - --itrace=il10 - --itrace=xl10 - -Note that last branch entries are cleared for each sample, so there is no overlap -from one sample to the next. - -To disable trace decoding entirely, use the option --no-itrace. - -It is also possible to skip events generated (instructions, branches, transactions) -at the beginning. This is useful to ignore initialization code. - - --itrace=i0nss1000000 - -skips the first million instructions. - -dump option ------------ - -perf script has an option (-D) to "dump" the events i.e. display the binary -data. - -When -D is used, Intel PT packets are displayed. The packet decoder does not -pay attention to PSB packets, but just decodes the bytes - so the packets seen -by the actual decoder may not be identical in places where the data is corrupt. -One example of that would be when the buffer-switching interrupt has been too -slow, and the buffer has been filled completely. In that case, the last packet -in the buffer might be truncated and immediately followed by a PSB as the trace -continues in the next buffer. - -To disable the display of Intel PT packets, combine the -D option with ---no-itrace. - - -perf report -=========== - -By default, perf report will decode trace data found in the perf.data file. -This can be further controlled by new option --itrace exactly the same as -perf script, with the exception that the default is --itrace=igxe. - - -perf inject -=========== - -perf inject also accepts the --itrace option in which case tracing data is -removed and replaced with the synthesized events. e.g. - - perf inject --itrace -i perf.data -o perf.data.new - -Below is an example of using Intel PT with autofdo. It requires autofdo -(https://github.com/google/autofdo) and gcc version 5. The bubble -sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) -amended to take the number of elements as a parameter. - - $ gcc-5 -O3 sort.c -o sort_optimized - $ ./sort_optimized 30000 - Bubble sorting array of 30000 elements - 2254 ms - - $ cat ~/.perfconfig - [intel-pt] - mispred-all = on - - $ perf record -e intel_pt//u ./sort 3000 - Bubble sorting array of 3000 elements - 58 ms - [ perf record: Woken up 2 times to write data ] - [ perf record: Captured and wrote 3.939 MB perf.data ] - $ perf inject -i perf.data -o inj --itrace=i100usle --strip - $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 - $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo - $ ./sort_autofdo 30000 - Bubble sorting array of 30000 elements - 2155 ms - -Note there is currently no advantage to using Intel PT instead of LBR, but -that may change in the future if greater use is made of the data. - - -PEBS via Intel PT -================= - -Some hardware has the feature to redirect PEBS records to the Intel PT trace. -Recording is selected by using the aux-output config term e.g. - - perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname - -Note that currently, software only supports redirecting at most one PEBS event. - -To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. - - perf script --itrace=oe diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt new file mode 100644 index 000000000000..7d743a98a9c1 --- /dev/null +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -0,0 +1,1000 @@ +perf-intel-pt(1) +================ + +NAME +---- +perf-intel-pt - Support for Intel Processor Trace within perf tools + +SYNOPSIS +-------- +[verse] +'perf record' -e intel_pt// + +DESCRIPTION +----------- + +Intel Processor Trace (Intel PT) is an extension of Intel Architecture that +collects information about software execution such as control flow, execution +modes and timings and formats it into highly compressed binary packets. +Technical details are documented in the Intel 64 and IA-32 Architectures +Software Developer Manuals, Chapter 36 Intel Processor Trace. + +Intel PT is first supported in Intel Core M and 5th generation Intel Core +processors that are based on the Intel micro-architecture code name Broadwell. + +Trace data is collected by 'perf record' and stored within the perf.data file. +See below for options to 'perf record'. + +Trace data must be 'decoded' which involves walking the object code and matching +the trace data packets. For example a TNT packet only tells whether a +conditional branch was taken or not taken, so to make use of that packet the +decoder must know precisely which instruction was being executed. + +Decoding is done on-the-fly. The decoder outputs samples in the same format as +samples output by perf hardware events, for example as though the "instructions" +or "branches" events had been recorded. Presently 3 tools support this: +'perf script', 'perf report' and 'perf inject'. See below for more information +on using those tools. + +The main distinguishing feature of Intel PT is that the decoder can determine +the exact flow of software execution. Intel PT can be used to understand why +and how did software get to a certain point, or behave a certain way. The +software does not have to be recompiled, so Intel PT works with debug or release +builds, however the executed images are needed - which makes use in JIT-compiled +environments, or with self-modified code, a challenge. Also symbols need to be +provided to make sense of addresses. + +A limitation of Intel PT is that it produces huge amounts of trace data +(hundreds of megabytes per second per core) which takes a long time to decode, +for example two or three orders of magnitude longer than it took to collect. +Another limitation is the performance impact of tracing, something that will +vary depending on the use-case and architecture. + + +Quickstart +---------- + +It is important to start small. That is because it is easy to capture vastly +more data than can possibly be processed. + +The simplest thing to do with Intel PT is userspace profiling of small programs. +Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: + + perf record -e intel_pt//u ls + +And profiled with 'perf report' e.g. + + perf report + +To also trace kernel space presents a problem, namely kernel self-modifying +code. A fairly good kernel image is available in /proc/kcore but to get an +accurate image a copy of /proc/kcore needs to be made under the same conditions +as the data capture. A script perf-with-kcore can do that, but beware that the +script makes use of 'sudo' to copy /proc/kcore. If you have perf installed +locally from the source tree you can do: + + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + +which will create a directory named 'pt_ls' and put the perf.data file and +copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use +'perf report' becomes: + + ~/libexec/perf-core/perf-with-kcore report pt_ls + +Because samples are synthesized after-the-fact, the sampling period can be +selected for reporting. e.g. sample every microsecond + + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + +See the sections below for more information about the --itrace option. + +Beware the smaller the period, the more samples that are produced, and the +longer it takes to process them. + +Also note that the coarseness of Intel PT timing information will start to +distort the statistical value of the sampling as the sampling period becomes +smaller. + +To represent software control flow, "branches" samples are produced. By default +a branch sample is synthesized for every single branch. To get an idea what +data is available you can use the 'perf script' tool with all itrace sampling +options, which will list all the samples. + + perf record -e intel_pt//u ls + perf script --itrace=ibxwpe + +An interesting field that is not printed by default is 'flags' which can be +displayed as follows: + + perf script --itrace=ibxwpe -F+flags + +The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, and +in transaction, respectively. + +Another interesting field that is not printed by default is 'ipc' which can be +displayed as follows: + + perf script --itrace=be -F+ipc + +There are two ways that instructions-per-cycle (IPC) can be calculated depending +on the recording. + +If the 'cyc' config term (see config terms section below) was used, then IPC is +calculated using the cycle count from CYC packets, otherwise MTC packets are +used - refer to the 'mtc' config term. When MTC is used, however, the values +are less accurate because the timing is less accurate. + +Because Intel PT does not update the cycle count on every branch or instruction, +the values will often be zero. When there are values, they will be the number +of instructions and number of cycles since the last update, and thus represent +the average IPC since the last IPC for that event type. Note IPC for "branches" +events is calculated separately from IPC for "instructions" events. + +Also note that the IPC instruction count may or may not include the current +instruction. If the cycle count is associated with an asynchronous branch +(e.g. page fault or interrupt), then the instruction count does not include the +current instruction, otherwise it does. That is consistent with whether or not +that instruction has retired when the cycle count is updated. + +Another note, in the case of "branches" events, non-taken branches are not +presently sampled, so IPC values for them do not appear e.g. a CYC packet with a +TNT packet that starts with a non-taken branch. To see every possible IPC +value, "instructions" events can be used e.g. --itrace=i0ns + +While it is possible to create scripts to analyze the data, an alternative +approach is available to export the data to a sqlite or postgresql database. +Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, +and to script exported-sql-viewer.py for an example of using the database. + +There is also script intel-pt-events.py which provides an example of how to +unpack the raw data for power events and PTWRITE. + +As mentioned above, it is easy to capture too much data. One way to limit the +data captured is to use 'snapshot' mode which is explained further below. +Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. + +Another problem that will be experienced is decoder errors. They can be caused +by inability to access the executed image, self-modified or JIT-ed code, or the +inability to match side-band information (such as context switches and mmaps) +which results in the decoder not knowing what code was executed. + +There is also the problem of perf not being able to copy the data fast enough, +resulting in data lost because the buffer was full. See 'Buffer handling' below +for more details. + + +perf record +----------- + +new event +~~~~~~~~~ + +The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are +selected by providing the PMU name followed by the "config" separated by slashes. +An enhancement has been made to allow default "config" e.g. the option + + -e intel_pt// + +will use a default config value. Currently that is the same as + + -e intel_pt/tsc,noretcomp=0/ + +which is the same as + + -e intel_pt/tsc=1,noretcomp=0/ + +Note there are now new config terms - see section 'config terms' further below. + +The config terms are listed in /sys/devices/intel_pt/format. They are bit +fields within the config member of the struct perf_event_attr which is +passed to the kernel by the perf_event_open system call. They correspond to bit +fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: + + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 + +Note that the default config must be overridden for each term i.e. + + -e intel_pt/noretcomp=0/ + +is the same as: + + -e intel_pt/tsc=1,noretcomp=0/ + +So, to disable TSC packets use: + + -e intel_pt/tsc=0/ + +It is also possible to specify the config value explicitly: + + -e intel_pt/config=0x400/ + +Note that, as with all events, the event is suffixed with event modifiers: + + u userspace + k kernel + h hypervisor + G guest + H host + p precise ip + +'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. +'p' is also not relevant to Intel PT. So only options 'u' and 'k' are +meaningful for Intel PT. + +perf_event_attr is displayed if the -vv option is used e.g. + + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + + +config terms +~~~~~~~~~~~~ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + +pt Specifies pass-through which enables the 'branch' config term. + + The default config selects 'pt' if it is available, so a user will + never need to specify this term. + +branch Enable branch tracing. Branch tracing is enabled by default so to + disable branch tracing use 'branch=0'. + + The default config selects 'branch' if it is available. + +ptw Enable PTWRITE packets which are produced when a ptwrite instruction + is executed. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/ptwrite + + which contains "1" if the feature is supported and + "0" otherwise. + +fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet + provides the address of the ptwrite instruction. In the absence of + fup_on_ptw, the decoder will use the address of the previous branch + if branch tracing is enabled, otherwise the address will be zero. + Note that fup_on_ptw will work even when branch tracing is disabled. + +pwr_evt Enable power events. The power events provide information about + changes to the CPU C-state. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/power_event_trace + + which contains "1" if the feature is supported and + "0" otherwise. + + +AUX area sampling option +~~~~~~~~~~~~~~~~~~~~~~~~ + +To select Intel PT "sampling" the AUX area sampling option can be used: + + --aux-sample + +Optionally it can be followed by the sample size in bytes e.g. + + --aux-sample=8192 + +In addition, the Intel PT event to sample must be defined e.g. + + -e intel_pt//u + +Samples on other events will be created containing Intel PT data e.g. the +following will create Intel PT samples on the branch-misses event, note the +events must be grouped using {}: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' + +An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to +events. In this case, the grouping is implied e.g. + + perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u + +is the same as: + + perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' + +but allows for also using an address filter e.g.: + + perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls + +It is important to select a sample size that is big enough to contain at least +one PSB packet. If not a warning will be displayed: + + Intel PT sample size (%zu) may be too small for PSB period (%zu) + +The calculation used for that is: if sample_size <= psb_period + 256 display the +warning. When sampling is used, psb_period defaults to 0 (2KiB). + +The default sample size is 4KiB. + +The sample size is passed in aux_sample_size in struct perf_event_attr. The +sample size is limited by the maximum event size which is 64KiB. It is +difficult to know how big the event might be without the trace sample attached, +but the tool validates that the sample size is not greater than 60KiB. + + +new snapshot option +~~~~~~~~~~~~~~~~~~~ + +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + +To select snapshot mode a new option has been added: + + -S + +Optionally it can be followed by the snapshot size e.g. + + -S0x100000 + +The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size +nor snapshot size is specified, then the default is 4MiB for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced as described in the 'new auxtrace mmap size option' section below. + +The snapshot size is displayed if the option -vv is used e.g. + + Intel PT snapshot size: %zu + + +new auxtrace mmap size option +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel PT buffer size is specified by an addition to the -m option e.g. + + -m,16 + +selects a buffer size of 16 pages i.e. 64KiB. + +Note that the existing functionality of -m is unchanged. The auxtrace mmap size +is specified by the optional addition of a comma and the value. + +The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the +user is likely to get an error as they exceed their mlock limit (Max locked +memory as shown in /proc/self/limits). Note that perf does not count the first +512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu +against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus +their mlock limit (which defaults to 64KiB but is not multiplied by the number +of cpus). + +In full-trace mode, powers of two are allowed for buffer size, with a minimum +size of 2 pages. In snapshot mode or sampling mode, it is the same but the +minimum size is 1 page. + +The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. + + mmap length 528384 + auxtrace mmap length 4198400 + + +Intel PT modes of operation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel PT can be used in 2 modes: + full-trace mode + sample mode + snapshot mode + +Full-trace mode traces continuously e.g. + + perf record -e intel_pt//u uname + +Sample mode attaches a Intel PT sample to other events e.g. + + perf record --aux-sample -e intel_pt//u -e branch-misses:u + +Snapshot mode captures the available data when a signal is sent e.g. + + perf record -v -e intel_pt//u -S ./loopy 1000000000 & + [1] 11435 + kill -USR2 11435 + Recording AUX area tracing snapshot + +Note that the signal sent is SIGUSR2. +Note that "Recording AUX area tracing snapshot" is displayed because the -v +option is used. + +The 2 modes cannot be used together. + + +Buffer handling +~~~~~~~~~~~~~~~ + +There may be buffer limitations (i.e. single ToPa entry) which means that actual +buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +provide other sizes, and in particular an arbitrarily large size, multiple +buffers are logically concatenated. However an interrupt must be used to switch +between buffers. That has two potential problems: + a) the interrupt may not be handled in time so that the current buffer + becomes full and some trace data is lost. + b) the interrupts may slow the system and affect the performance + results. + +If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event +which the tools report as an error. + +In full-trace mode, the driver waits for data to be copied out before allowing +the (logical) buffer to wrap-around. If data is not copied out quickly enough, +again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to +wait, the intel_pt event gets disabled. Because it is difficult to know when +that happens, perf tools always re-enable the intel_pt event after copying out +data. + + +Intel PT and build ids +~~~~~~~~~~~~~~~~~~~~~~ + +By default "perf record" post-processes the event stream to find all build ids +for executables for all addresses sampled. Deliberately, Intel PT is not +decoded for that purpose (it would take too long). Instead the build ids for +all executables encountered (due to mmap, comm or task events) are included +in the perf.data file. + +To see buildids included in the perf.data file use the command: + + perf buildid-list + +If the perf.data file contains Intel PT data, that is the same as: + + perf buildid-list --with-hits + + +Snapshot mode and event disabling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to make a snapshot, the intel_pt event is disabled using an IOCTL, +namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the +collection of side-band information. In order to prevent that, a dummy +software event has been introduced that permits tracking events (like mmaps) to +continue to be recorded while intel_pt is disabled. That is important to ensure +there is complete side-band information to allow the decoding of subsequent +snapshots. + +A test has been created for that. To find the test: + + perf test list + ... + 23: Test using a dummy software event to keep tracking + +To run the test: + + perf test 23 + 23: Test using a dummy software event to keep tracking : Ok + + +perf record modes (nothing new here) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +perf record essentially operates in one of three modes: + per thread + per cpu + workload only + +"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a +workload). +"per cpu" is selected by -C or -a. +"workload only" mode is selected by not using the other options but providing a +command to run (i.e. the workload). + +In per-thread mode an exact list of threads is traced. There is no inheritance. +Each thread has its own event buffer. + +In per-cpu mode all processes (or processes from the selected cgroup i.e. -G +option, or processes selected with -p or -u) are traced. Each cpu has its own +buffer. Inheritance is allowed. + +In workload-only mode, the workload is traced but with per-cpu buffers. +Inheritance is allowed. Note that you can now trace a workload in per-thread +mode by using the --per-thread option. + + +Privileged vs non-privileged users +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users +have memory limits imposed upon them. That affects what buffer sizes they can +have as outlined above. + +The v4.2 kernel introduced support for a context switch metadata event, +PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes +are scheduled out and in, just not by whom, which is left for the +PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, +which in turn requires CAP_SYS_ADMIN. + +Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context +switches") commit, that introduces these metadata events for further info. + +When working with kernels < v4.2, the following considerations must be taken, +as the sched:sched_switch tracepoints will be used to receive such information: + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are +not permitted to use tracepoints which means there is insufficient side-band +information to decode Intel PT in per-cpu mode, and potentially workload-only +mode too if the workload creates new processes. + +Note also, that to use tracepoints, read-access to debugfs is required. So if +debugfs is not mounted or the user does not have read-access, it will again not +be possible to decode Intel PT in per-cpu mode. + + +sched_switch tracepoint +~~~~~~~~~~~~~~~~~~~~~~~ + +The sched_switch tracepoint is used to provide side-band data for Intel PT +decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't +available. + +The sched_switch events are automatically added. e.g. the second event shown +below: + + $ perf record -vv -e intel_pt//u uname + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 2 + size 112 + config 0x108 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER + read_format ID + inherit 1 + sample_id_all 1 + exclude_guest 1 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 1 + size 112 + config 0x9 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + mmap 1 + comm 1 + enable_on_exec 1 + task 1 + sample_id_all 1 + mmap2 1 + comm_exec 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + mmap size 528384B + AUX area mmap length 4194304 + perf event ring buffer mmapped per cpu + Synthesizing auxtrace information + Linux + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.042 MB perf.data ] + +Note, the sched_switch event is only added if the user is permitted to use it +and only in per-cpu mode. + +Note also, the sched_switch event is only added if TSC packets are requested. +That is because, in the absence of timing information, the sched_switch events +cannot be matched against the Intel PT trace. + + +perf script +----------- + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace. + + +New --itrace option +~~~~~~~~~~~~~~~~~~~ + +Having no option is the same as + + --itrace + +which, in turn, is the same as + + --itrace=cepwx + +The letters are: + + i synthesize "instructions" events + b synthesize "branches" events + x synthesize "transactions" events + w synthesize "ptwrite" events + p synthesize "power" events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + e synthesize tracing error events + d create a debug log + g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) + s skip initial number of events + +"Instructions" events look like they were recorded by "perf record -e +instructions". + +"Branches" events look like they were recorded by "perf record -e branches". "c" +and "r" can be combined to get calls and returns. + +"Transactions" events correspond to the start or end of transactions. The +'flags' field can be used in perf script to determine whether the event is a +tranasaction start, commit or abort. + +Note that "instructions", "branches" and "transactions" events depend on code +flow packets which can be disabled by using the config term "branch=0". Refer +to the config terms section above. + +"ptwrite" events record the payload of the ptwrite instruction and whether +"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are +recorded only if the "ptw" config term was used. Refer to the config terms +section above. perf script "synth" field displays "ptwrite" information like +this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was +used. + +"Power" events correspond to power event packets and CBR (core-to-bus ratio) +packets. While CBR packets are always recorded when tracing is enabled, power +event packets are recorded only if the "pwr_evt" config term was used. Refer to +the config terms section above. The power events record information about +C-state changes, whereas CBR is indicative of CPU frequency. perf script +"event,synth" fields display information like this: + cbr: cbr: 22 freq: 2189 MHz (200%) + mwait: hints: 0x60 extensions: 0x1 + pwre: hw: 0 cstate: 2 sub-cstate: 0 + exstop: ip: 1 + pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 +Where: + "cbr" includes the frequency and the percentage of maximum non-turbo + "mwait" shows mwait hints and extensions + "pwre" shows C-state transitions (to a C-state deeper than C0) and + whether initiated by hardware + "exstop" indicates execution stopped and whether the IP was recorded + exactly, + "pwrx" indicates return to C0 +For more details refer to the Intel 64 and IA-32 Architectures Software +Developer Manuals. + +Error events show where the decoder lost the trace. Error events +are quite important. Users must know if what they are seeing is a complete +picture or not. + +The "d" option will cause the creation of a file "intel_pt.log" containing all +decoded packets and instructions. Note that this option slows down the decoder +and that the resulting file may be very large. + +In addition, the period of the "instructions" event can be specified. e.g. + + --itrace=i10us + +sets the period to 10us i.e. one instruction sample is synthesized for each 10 +microseconds of trace. Alternatives to "us" are "ms" (milliseconds), +"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). + +"ms", "us" and "ns" are converted to TSC ticks. + +The timing information included with Intel PT does not give the time of every +instruction. Consequently, for the purpose of sampling, the decoder estimates +the time since the last timing packet based on 1 tick per instruction. The time +on the sample is *not* adjusted and reflects the last known value of TSC. + +For Intel PT, the default period is 100us. + +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + +Also the call chain size (default 16, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=ig32 + --itrace=xg32 + +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + +To disable trace decoding entirely, use the option --no-itrace. + +It is also possible to skip events generated (instructions, branches, transactions) +at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + +skips the first million instructions. + +dump option +~~~~~~~~~~~ + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel PT packets are displayed. The packet decoder does not +pay attention to PSB packets, but just decodes the bytes - so the packets seen +by the actual decoder may not be identical in places where the data is corrupt. +One example of that would be when the buffer-switching interrupt has been too +slow, and the buffer has been filled completely. In that case, the last packet +in the buffer might be truncated and immediately followed by a PSB as the trace +continues in the next buffer. + +To disable the display of Intel PT packets, combine the -D option with +--no-itrace. + + +perf report +----------- + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script, with the exception that the default is --itrace=igxe. + + +perf inject +----------- + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all = on + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. + + +PEBS via Intel PT +----------------- + +Some hardware has the feature to redirect PEBS records to the Intel PT trace. +Recording is selected by using the aux-output config term e.g. + + perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname + +Note that currently, software only supports redirecting at most one PEBS event. + +To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. + + perf script --itrace=oe -- cgit v1.2.3 From 870d325b15fb31af031cec58b89c1fb099a94bc7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Mar 2020 14:20:33 +0200 Subject: perf intel-pt: Add Intel PT man page references Add references to Intel PT man page in man pages of associated tools. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200311122034.3697-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 3 ++- tools/perf/Documentation/perf-intel-pt.txt | 7 +++++++ tools/perf/Documentation/perf-record.txt | 2 +- tools/perf/Documentation/perf-report.txt | 3 ++- tools/perf/Documentation/perf-script.txt | 2 +- 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index a64d6588470e..70969ea73e01 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -66,4 +66,5 @@ include::itrace.txt[] SEE ALSO -------- -linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] +linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1], +linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 7d743a98a9c1..456fdcbf26ac 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -998,3 +998,10 @@ Note that currently, software only supports redirecting at most one PEBS event. To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. perf script --itrace=oe + + +SEE ALSO +-------- + +linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1], +linkperf:perf-inject[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b23a4012a606..7f4db7592467 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -589,4 +589,4 @@ appended unit character - B/K/M/G SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-list[1] +linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index db61f16ffa56..bd0a029d4c08 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -546,4 +546,5 @@ include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1], +linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2599b057e47b..db6a36aac47e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -429,4 +429,4 @@ include::itrace.txt[] SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], -linkperf:perf-script-python[1] +linkperf:perf-script-python[1], linkperf:perf-intel-pt[1] -- cgit v1.2.3 From ec2eab9deb8009fd8c69d61d04c66b77d438f17d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Mar 2020 14:20:34 +0200 Subject: perf intel-pt: Update intel-pt.txt file with new location of the documentation Make it easy for people looking in intel-pt.txt to find the new file. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200311122034.3697-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/perf/Documentation/intel-pt.txt diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..fd9241a1b987 --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt @@ -0,0 +1 @@ +Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt -- cgit v1.2.3 From 67439d555f7d46349deef56886129da96bd15744 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Wed, 11 Mar 2020 14:28:36 +0100 Subject: perf scripting perl: Add common_callchain to fix argument order Since common_callchain has been added to the argument array, we need to reflect it in perl-based scripts, because otherwise the following args would be shifted and thus incorrect. E.g. rw-by-pid and calculation of read and written bytes: Before: read counts by pid: pid comm # reads bytes_requested bytes_read ------ -------------------- ----------- ---------- ---------- 19301 dd 4 424510450039736 0 After: read counts by pid: pid comm # reads bytes_requested bytes_read ------ -------------------- ----------- ---------- ---------- 19301 dd 4 9536 4341 Committer testing: To see before after first do: # perf script record rw-by-pid ^C Now you'll have a perf.data file to report on, then do before and after using: # perf script report rw-by-pid Anbd notice the bytes_request/bytes_read, as above. Signed-off-by: Michael Petlan Tested-by: Arnaldo Carvalho de Melo Cc: Benjamin Salon Cc: Jiri Olsa LPU-Reference: 20200311132836.12693-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/perl/check-perf-trace.pl | 6 +++--- tools/perf/scripts/perl/failed-syscalls.pl | 2 +- tools/perf/scripts/perl/rw-by-file.pl | 6 +++--- tools/perf/scripts/perl/rw-by-pid.pl | 10 +++++----- tools/perf/scripts/perl/rwtop.pl | 10 +++++----- tools/perf/scripts/perl/wakeup-latency.pl | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl index 4e7076c20616..d307ce8fd6ed 100644 --- a/tools/perf/scripts/perl/check-perf-trace.pl +++ b/tools/perf/scripts/perl/check-perf-trace.pl @@ -28,7 +28,7 @@ sub trace_end sub irq::softirq_entry { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $vec) = @_; print_header($event_name, $common_cpu, $common_secs, $common_nsecs, @@ -43,7 +43,7 @@ sub irq::softirq_entry sub kmem::kmalloc { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $call_site, $ptr, $bytes_req, $bytes_alloc, $gfp_flags) = @_; @@ -92,7 +92,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl index 55e7ae4c5c88..05954a8f363a 100644 --- a/tools/perf/scripts/perl/failed-syscalls.pl +++ b/tools/perf/scripts/perl/failed-syscalls.pl @@ -18,7 +18,7 @@ my %failed_syscalls; sub raw_syscalls::sys_exit { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $id, $ret) = @_; if ($ret < 0) { diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl index 168fa5e94b44..92a750b8552b 100644 --- a/tools/perf/scripts/perl/rw-by-file.pl +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -28,7 +28,7 @@ my %writes; sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; if ($common_comm eq $for_comm) { $reads{$fd}{bytes_requested} += $count; @@ -39,7 +39,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; if ($common_comm eq $for_comm) { $writes{$fd}{bytes_written} += $count; @@ -98,7 +98,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl index 495698250b2f..d789fe39caab 100644 --- a/tools/perf/scripts/perl/rw-by-pid.pl +++ b/tools/perf/scripts/perl/rw-by-pid.pl @@ -24,7 +24,7 @@ my %writes; sub syscalls::sys_exit_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; if ($ret > 0) { @@ -40,7 +40,7 @@ sub syscalls::sys_exit_read sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; $reads{$common_pid}{bytes_requested} += $count; @@ -51,7 +51,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_exit_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; if ($ret <= 0) { @@ -62,7 +62,7 @@ sub syscalls::sys_exit_write sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; $writes{$common_pid}{bytes_written} += $count; @@ -178,7 +178,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl index 6473442568a2..eba4df67af6b 100644 --- a/tools/perf/scripts/perl/rwtop.pl +++ b/tools/perf/scripts/perl/rwtop.pl @@ -35,7 +35,7 @@ if (!$interval) { sub syscalls::sys_exit_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; print_check(); @@ -53,7 +53,7 @@ sub syscalls::sys_exit_read sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; print_check(); @@ -66,7 +66,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_exit_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; print_check(); @@ -79,7 +79,7 @@ sub syscalls::sys_exit_write sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; print_check(); @@ -197,7 +197,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl index efcfec5e347a..53444ff4ec7f 100644 --- a/tools/perf/scripts/perl/wakeup-latency.pl +++ b/tools/perf/scripts/perl/wakeup-latency.pl @@ -28,7 +28,7 @@ my $total_wakeups = 0; sub sched::sched_switch { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid, $next_prio) = @_; @@ -51,7 +51,7 @@ sub sched::sched_switch sub sched::sched_wakeup { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $comm, $pid, $prio, $success, $target_cpu) = @_; $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs); @@ -101,7 +101,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } -- cgit v1.2.3 From 753039ef8b2f1078e5bff8cd42f80578bf6385b0 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 11 Mar 2020 14:14:51 -0500 Subject: x86/cpu/amd: Call init_amd_zn() om Family 19h processors too Family 19h CPUs are Zen-based and still share most architectural features with Family 17h CPUs, and therefore still need to call init_amd_zn() e.g., to set the RECLAIM_DISTANCE override. init_amd_zn() also sets X86_FEATURE_ZEN, which today is only used in amd_set_core_ssb_state(), which isn't called on some late model Family 17h CPUs, nor on any Family 19h CPUs: X86_FEATURE_AMD_SSBD replaces X86_FEATURE_LS_CFG_SSBD on those later model CPUs, where the SSBD mitigation is done via the SPEC_CTRL MSR instead of the LS_CFG MSR. Family 19h CPUs also don't have the erratum where the CPB feature bit isn't set, but that code can stay unchanged and run safely on Family 19h. Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200311191451.13221-1-kim.phillips@amd.com --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/amd.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f3327cb56edf..f980efcacfeb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -217,7 +217,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ac83a0fef628..dc6894a3f22d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -925,7 +925,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: init_amd_zn(c); break; + case 0x17: fallthrough; + case 0x19: init_amd_zn(c); break; } /* -- cgit v1.2.3 From 44d462acc0bf3eabe1522471fd1f683d8ce612cb Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Thu, 12 Mar 2020 15:21:45 +0300 Subject: perf record: Fix binding of AIO user space buffers to nodes Correct maxnode parameter value passed to mbind() syscall to be the amount of node mask bits to analyze plus 1. Dynamically allocate node mask memory depending on the index of node of cpu being profiled. Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes") Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/c7ea8ffe-1357-bf9e-3a89-1da1d8e9b75b@linux.intel.com [ Remove leftover nr_bits + 1 comment in mbind() call ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 3b664fa673a6..ab7108d22428 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) { void *data; size_t mmap_len; - unsigned long node_mask; + unsigned long *node_mask; + unsigned long node_index; + int err = 0; if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { data = map->aio.data[idx]; mmap_len = mmap__mmap_len(map); - node_mask = 1UL << cpu__get_node(cpu); - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", - data, data + mmap_len, cpu__get_node(cpu)); + node_index = cpu__get_node(cpu); + node_mask = bitmap_alloc(node_index + 1); + if (!node_mask) { + pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; } + set_bit(node_index, node_mask); + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { + pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", + data, data + mmap_len, node_index); + err = -1; + } + bitmap_free(node_mask); } - return 0; + return err; } #else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct mmap *map, int idx) -- cgit v1.2.3 From b2bf6660709c3bdd92d7558f4aa3cf07c0b0dda8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 12 Mar 2020 17:56:02 -0700 Subject: perf test: Print if shell directory isn't present If the shell test directory isn't present the exit code will be 255 but with no error messages printed. Add an error message. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200313005602.45236-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5f05db75cdd8..54d9516c9839 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -543,8 +543,11 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) return -1; dir = opendir(st.dir); - if (!dir) + if (!dir) { + pr_err("failed to open shell test directory: %s\n", + st.dir); return -1; + } for_each_shell_test(dir, st.dir, ent) { int curr = i++; -- cgit v1.2.3 From 19d33357ecdf6f4d591b4c17f119bacd6ae834eb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 16 Mar 2020 13:23:21 +0100 Subject: x86/amd_nb, char/amd64-agp: Use amd_nb_num() accessor ... to find whether there are northbridges present on the system. Convert the last forgotten user and therefore, unexport amd_nb_misc_ids[] too. Signed-off-by: Borislav Petkov Cc: Michal Kubecek Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20200316150725.925-1-bp@alien8.de --- arch/x86/include/asm/amd_nb.h | 1 - arch/x86/kernel/amd_nb.c | 4 +--- drivers/char/agp/amd64-agp.c | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 1ae4e5791afa..c7df20e78b09 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -12,7 +12,6 @@ struct amd_nb_bus_dev_range { u8 dev_limit; }; -extern const struct pci_device_id amd_nb_misc_ids[]; extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; extern bool early_is_amd_nb(u32 value); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 69aed0ebbdfc..b6b3297851f3 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -36,10 +36,9 @@ static const struct pci_device_id amd_root_ids[] = { {} }; - #define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704 -const struct pci_device_id amd_nb_misc_ids[] = { +static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, @@ -56,7 +55,6 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, {} }; -EXPORT_SYMBOL_GPL(amd_nb_misc_ids); static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 594aee281977..b40edae32817 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -775,7 +775,7 @@ int __init agp_amd64_init(void) } /* First check that we have at least one AMD64 NB */ - if (!pci_dev_present(amd_nb_misc_ids)) { + if (!amd_nb_num()) { pci_unregister_driver(&agp_amd64_pci_driver); return -ENODEV; } -- cgit v1.2.3 From 4dcc3df82573a946c620dda5fb00e27c7b080105 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 13 Mar 2020 18:10:22 -0500 Subject: perf/amd/uncore: Prepare L3 thread mask code for Family 19h In order to better accommodate the upcoming Family 19h, given the 80-char line limit, move the existing code into a new l3_thread_slice_mask() function. No functional changes. [ bp: Touchups. ] Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200313231024.17601-1-kim.phillips@amd.com --- arch/x86/events/amd/uncore.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a6ea07f2aa84..2abcb1abd07c 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -180,6 +180,20 @@ static void amd_uncore_del(struct perf_event *event, int flags) hwc->idx = -1; } +/* + * Convert logical CPU number to L3 PMC Config ThreadMask format + */ +static u64 l3_thread_slice_mask(int cpu) +{ + int thread = 2 * (cpu_data(cpu).cpu_core_id % 4); + + if (smp_num_siblings > 1) + thread += cpu_data(cpu).apicid & 1; + + return (1ULL << (AMD64_L3_THREAD_SHIFT + thread) & + AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK; +} + static int amd_uncore_event_init(struct perf_event *event) { struct amd_uncore *uncore; @@ -209,15 +223,8 @@ static int amd_uncore_event_init(struct perf_event *event) * SliceMask and ThreadMask need to be set for certain L3 events in * Family 17h. For other events, the two fields do not affect the count. */ - if (l3_mask && is_llc_event(event)) { - int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4); - - if (smp_num_siblings > 1) - thread += cpu_data(event->cpu).apicid & 1; - - hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) & - AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK; - } + if (l3_mask && is_llc_event(event)) + hwc->config |= l3_thread_slice_mask(event->cpu); uncore = event_to_amd_uncore(event); if (!uncore) -- cgit v1.2.3 From 9689dbbeaea884d19e3085439c6a247ef986b2af Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 13 Mar 2020 18:10:23 -0500 Subject: perf/amd/uncore: Make L3 thread mask code more readable Convert the l3_thread_slice_mask() function to use the more readable topology_* helper functions, more intuitive variable names like shift and thread_mask, and BIT_ULL(). No functional changes. Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200313231024.17601-2-kim.phillips@amd.com --- arch/x86/events/amd/uncore.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 2abcb1abd07c..07af497b517f 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -185,13 +185,16 @@ static void amd_uncore_del(struct perf_event *event, int flags) */ static u64 l3_thread_slice_mask(int cpu) { - int thread = 2 * (cpu_data(cpu).cpu_core_id % 4); + u64 thread_mask, core = topology_core_id(cpu); + unsigned int shift, thread = 0; - if (smp_num_siblings > 1) - thread += cpu_data(cpu).apicid & 1; + if (topology_smt_supported() && !topology_is_primary_thread(cpu)) + thread = 1; - return (1ULL << (AMD64_L3_THREAD_SHIFT + thread) & - AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK; + shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread; + thread_mask = BIT_ULL(shift); + + return AMD64_L3_SLICE_MASK | thread_mask; } static int amd_uncore_event_init(struct perf_event *event) -- cgit v1.2.3 From e48667b865480d8bf0f1171a8b474ffc785b9ace Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 13 Mar 2020 18:10:24 -0500 Subject: perf/amd/uncore: Add support for Family 19h L3 PMU Family 19h introduces change in slice, core and thread specification in its L3 Performance Event Select (ChL3PmcCfg) h/w register. The change is incompatible with Family 17h's version of the register. Introduce a new path in l3_thread_slice_mask() to do things differently for Family 19h vs. Family 17h, otherwise the new hardware doesn't get programmed correctly. Instead of a linear core--thread bitmask, Family 19h takes an encoded core number, and a separate thread mask. There are new bits that are set for all cores and all slices, of which only the latter is used, since the driver counts events for all slices on behalf of the specified CPU. Also update amd_uncore_init() to base its L2/NB vs. L3/Data Fabric mode decision based on Family 17h or above, not just 17h and 18h: the Family 19h Data Fabric PMC is compatible with the Family 17h DF PMC. [ bp: Touchups. ] Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200313231024.17601-3-kim.phillips@amd.com --- arch/x86/events/amd/uncore.c | 20 ++++++++++++++------ arch/x86/include/asm/perf_event.h | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 07af497b517f..46018e515fe2 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -191,10 +191,18 @@ static u64 l3_thread_slice_mask(int cpu) if (topology_smt_supported() && !topology_is_primary_thread(cpu)) thread = 1; - shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread; + if (boot_cpu_data.x86 <= 0x18) { + shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread; + thread_mask = BIT_ULL(shift); + + return AMD64_L3_SLICE_MASK | thread_mask; + } + + core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK; + shift = AMD64_L3_THREAD_SHIFT + thread; thread_mask = BIT_ULL(shift); - return AMD64_L3_SLICE_MASK | thread_mask; + return AMD64_L3_EN_ALL_SLICES | core | thread_mask; } static int amd_uncore_event_init(struct perf_event *event) @@ -223,8 +231,8 @@ static int amd_uncore_event_init(struct perf_event *event) return -EINVAL; /* - * SliceMask and ThreadMask need to be set for certain L3 events in - * Family 17h. For other events, the two fields do not affect the count. + * SliceMask and ThreadMask need to be set for certain L3 events. + * For other events, the two fields do not affect the count. */ if (l3_mask && is_llc_event(event)) hwc->config |= l3_thread_slice_mask(event->cpu); @@ -533,9 +541,9 @@ static int __init amd_uncore_init(void) if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) return -ENODEV; - if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) { + if (boot_cpu_data.x86 >= 0x17) { /* - * For F17h or F18h, the Northbridge counters are + * For F17h and above, the Northbridge counters are * repurposed as Data Fabric counters. Also, L3 * counters are supported too. The PMUs are exported * based on family as either L2 or L3 and NB or DF. diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 29964b0e1075..e855e9cf2c37 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -50,11 +50,22 @@ #define AMD64_L3_SLICE_SHIFT 48 #define AMD64_L3_SLICE_MASK \ - ((0xFULL) << AMD64_L3_SLICE_SHIFT) + (0xFULL << AMD64_L3_SLICE_SHIFT) +#define AMD64_L3_SLICEID_MASK \ + (0x7ULL << AMD64_L3_SLICE_SHIFT) #define AMD64_L3_THREAD_SHIFT 56 #define AMD64_L3_THREAD_MASK \ - ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + (0xFFULL << AMD64_L3_THREAD_SHIFT) +#define AMD64_L3_F19H_THREAD_MASK \ + (0x3ULL << AMD64_L3_THREAD_SHIFT) + +#define AMD64_L3_EN_ALL_CORES BIT_ULL(47) +#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46) + +#define AMD64_L3_COREID_SHIFT 42 +#define AMD64_L3_COREID_MASK \ + (0x7ULL << AMD64_L3_COREID_SHIFT) #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ -- cgit v1.2.3 From 3b7a15b0643d42e4dca78c5aed8f1ad209a3d1ab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 12 Mar 2020 22:31:29 -0700 Subject: perf tools: Give synthetic mmap events an inode generation When mmap2 events are synthesized the ino_generation field isn't being set leading to uninitialized memory being compared. Caught with clang's -fsanitize=memory: ==124733==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x55a96a6a65cc in __dso_id__cmp tools/perf/util/dsos.c:23:6 #1 0x55a96a6a81d5 in dso_id__cmp tools/perf/util/dsos.c:38:9 #2 0x55a96a6a717f in __dso__cmp_long_name tools/perf/util/dsos.c:74:15 #3 0x55a96a6a6c4c in __dsos__findnew_link_by_longname_id tools/perf/util/dsos.c:106:12 #4 0x55a96a6a851e in __dsos__findnew_by_longname_id tools/perf/util/dsos.c:178:9 #5 0x55a96a6a7798 in __dsos__find_id tools/perf/util/dsos.c:191:9 #6 0x55a96a6a7b57 in __dsos__findnew_id tools/perf/util/dsos.c:251:20 #7 0x55a96a6a7a57 in dsos__findnew_id tools/perf/util/dsos.c:259:17 #8 0x55a96a7776ae in machine__findnew_dso_id tools/perf/util/machine.c:2709:9 #9 0x55a96a77dfcf in map__new tools/perf/util/map.c:193:10 #10 0x55a96a77240a in machine__process_mmap2_event tools/perf/util/machine.c:1670:8 #11 0x55a96a7741a3 in machine__process_event tools/perf/util/machine.c:1882:9 #12 0x55a96a6aee39 in perf_event__process tools/perf/util/event.c:454:9 #13 0x55a96a87d633 in perf_tool__process_synth_event tools/perf/util/synthetic-events.c:63:9 #14 0x55a96a87f131 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:403:7 #15 0x55a96a8815d6 in __event__synthesize_thread tools/perf/util/synthetic-events.c:548:9 #16 0x55a96a882bff in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:681:3 #17 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #18 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #19 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #20 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #21 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #22 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 #23 0x55a96a52966e in __cmd_test tools/perf/tests/builtin-test.c:603:4 #24 0x55a96a52855d in cmd_test tools/perf/tests/builtin-test.c:747:9 #25 0x55a96a2844d4 in run_builtin tools/perf/perf.c:312:11 #26 0x55a96a282bd0 in handle_internal_command tools/perf/perf.c:364:8 #27 0x55a96a284097 in run_argv tools/perf/perf.c:408:2 #28 0x55a96a282223 in main tools/perf/perf.c:538:3 Uninitialized value was stored to memory at #1 0x55a96a6a18f7 in dso__new_id tools/perf/util/dso.c:1230:14 #2 0x55a96a6a78ee in __dsos__addnew_id tools/perf/util/dsos.c:233:20 #3 0x55a96a6a7bcc in __dsos__findnew_id tools/perf/util/dsos.c:252:21 #4 0x55a96a6a7a57 in dsos__findnew_id tools/perf/util/dsos.c:259:17 #5 0x55a96a7776ae in machine__findnew_dso_id tools/perf/util/machine.c:2709:9 #6 0x55a96a77dfcf in map__new tools/perf/util/map.c:193:10 #7 0x55a96a77240a in machine__process_mmap2_event tools/perf/util/machine.c:1670:8 #8 0x55a96a7741a3 in machine__process_event tools/perf/util/machine.c:1882:9 #9 0x55a96a6aee39 in perf_event__process tools/perf/util/event.c:454:9 #10 0x55a96a87d633 in perf_tool__process_synth_event tools/perf/util/synthetic-events.c:63:9 #11 0x55a96a87f131 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:403:7 #12 0x55a96a8815d6 in __event__synthesize_thread tools/perf/util/synthetic-events.c:548:9 #13 0x55a96a882bff in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:681:3 #14 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #15 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #16 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #17 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #18 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #19 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 Uninitialized value was stored to memory at #0 0x55a96a7725af in machine__process_mmap2_event tools/perf/util/machine.c:1646:25 #1 0x55a96a7741a3 in machine__process_event tools/perf/util/machine.c:1882:9 #2 0x55a96a6aee39 in perf_event__process tools/perf/util/event.c:454:9 #3 0x55a96a87d633 in perf_tool__process_synth_event tools/perf/util/synthetic-events.c:63:9 #4 0x55a96a87f131 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:403:7 #5 0x55a96a8815d6 in __event__synthesize_thread tools/perf/util/synthetic-events.c:548:9 #6 0x55a96a882bff in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:681:3 #7 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #8 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #9 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #10 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #11 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #12 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 #13 0x55a96a52966e in __cmd_test tools/perf/tests/builtin-test.c:603:4 #14 0x55a96a52855d in cmd_test tools/perf/tests/builtin-test.c:747:9 #15 0x55a96a2844d4 in run_builtin tools/perf/perf.c:312:11 #16 0x55a96a282bd0 in handle_internal_command tools/perf/perf.c:364:8 #17 0x55a96a284097 in run_argv tools/perf/perf.c:408:2 #18 0x55a96a282223 in main tools/perf/perf.c:538:3 Uninitialized value was created by a heap allocation #0 0x55a96a22f60d in malloc llvm/llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:925:3 #1 0x55a96a882948 in __perf_event__synthesize_threads tools/perf/util/synthetic-events.c:655:15 #2 0x55a96a881ec2 in perf_event__synthesize_threads tools/perf/util/synthetic-events.c:750:9 #3 0x55a96a562b26 in synth_all tools/perf/tests/mmap-thread-lookup.c:136:9 #4 0x55a96a5623b1 in mmap_events tools/perf/tests/mmap-thread-lookup.c:174:8 #5 0x55a96a561fa0 in test__mmap_thread_lookup tools/perf/tests/mmap-thread-lookup.c:230:2 #6 0x55a96a52c182 in run_test tools/perf/tests/builtin-test.c:378:9 #7 0x55a96a52afc1 in test_and_print tools/perf/tests/builtin-test.c:408:9 #8 0x55a96a52966e in __cmd_test tools/perf/tests/builtin-test.c:603:4 #9 0x55a96a52855d in cmd_test tools/perf/tests/builtin-test.c:747:9 #10 0x55a96a2844d4 in run_builtin tools/perf/perf.c:312:11 #11 0x55a96a282bd0 in handle_internal_command tools/perf/perf.c:364:8 #12 0x55a96a284097 in run_argv tools/perf/perf.c:408:2 #13 0x55a96a282223 in main tools/perf/perf.c:538:3 SUMMARY: MemorySanitizer: use-of-uninitialized-value tools/perf/util/dsos.c:23:6 in __dso_id__cmp Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200313053129.131264-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index dd3e6f43fb86..3f28af39f9c6 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -345,6 +345,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, continue; event->mmap2.ino = (u64)ino; + event->mmap2.ino_generation = 0; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c -- cgit v1.2.3 From c3b10649a80e9da2892c1fd3038c53abd57588f6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 13 Mar 2020 21:46:07 +0800 Subject: perf report: Fix no branch type statistics report issue Previously we could get the report of branch type statistics. For example: # perf record -j any,save_type ... # t perf report --stdio # # Branch Statistics: # COND_FWD: 40.6% COND_BWD: 4.1% CROSS_4K: 24.7% CROSS_2M: 12.3% COND: 44.7% UNCOND: 0.0% IND: 6.1% CALL: 24.5% RET: 24.7% But now for the recent perf, it can't report the branch type statistics. It's a regression issue caused by commit 40c39e304641 ("perf report: Fix a no annotate browser displayed issue"), which only counts the branch type statistics for browser mode. This patch moves the branch_type_count() outside of ui__has_annotation() checking, then branch type statistics can work for stdio mode. Fixes: 40c39e304641 ("perf report: Fix a no annotate browser displayed issue") Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200313134607.12873-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d7c905f7520f..5f4045df76f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -186,24 +186,23 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, { struct hist_entry *he = iter->he; struct report *rep = arg; - struct branch_info *bi; + struct branch_info *bi = he->branch_info; struct perf_sample *sample = iter->sample; struct evsel *evsel = iter->evsel; int err; + branch_type_count(&rep->brtype_stat, &bi->flags, + bi->from.addr, bi->to.addr); + if (!ui__has_annotation() && !rep->symbol_ipc) return 0; - bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); - branch_type_count(&rep->brtype_stat, &bi->flags, - bi->from.addr, bi->to.addr); - out: return err; } -- cgit v1.2.3 From 59a08b4b3b1a9374adacd13cd7544c03e5582e0e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Mar 2020 16:56:09 +0100 Subject: perf expr: Fix copy/paste mistake Copy/paste leftover from recent refactor. Fixes: 26226a97724d ("perf expr: Move expr lexer to flex") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kajol Jain Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200315155609.603948-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 1928f2a3dddc..eaad29243c23 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -79,10 +79,10 @@ symbol {spec}*{sym}*{spec}*{sym}* { int start_token; - start_token = parse_events_get_extra(yyscanner); + start_token = expr_get_extra(yyscanner); if (start_token) { - parse_events_set_extra(NULL, yyscanner); + expr_set_extra(NULL, yyscanner); return start_token; } } -- cgit v1.2.3 From d8a738689794c42c3844284b99ddf165d10a724e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2020 10:21:30 +0100 Subject: x86/optprobe: Fix OPTPROBE vs UACCESS While looking at an objtool UACCESS warning, it suddenly occurred to me that it is entirely possible to have an OPTPROBE right in the middle of an UACCESS region. In this case we must of course clear FLAGS.AC while running the KPROBE. Luckily the trampoline already saves/restores [ER]FLAGS, so all we need to do is inject a CLAC. Unfortunately we cannot use ALTERNATIVE() in the trampoline text, so we have to frob that manually. Fixes: ca0bbc70f147 ("sched/x86_64: Don't save flags on context switch") Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200305092130.GU2596@hirez.programming.kicks-ass.net --- arch/x86/include/asm/kprobes.h | 1 + arch/x86/kernel/kprobes/opt.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 95b1f053bd96..073eb7ad2f56 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -36,6 +36,7 @@ typedef u8 kprobe_opcode_t; /* optinsn template addresses */ extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_clac[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3f45b5c43a71..ea13f6888284 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -71,6 +71,21 @@ found: return (unsigned long)buf; } +static void synthesize_clac(kprobe_opcode_t *addr) +{ + /* + * Can't be static_cpu_has() due to how objtool treats this feature bit. + * This isn't a fast path anyway. + */ + if (!boot_cpu_has(X86_FEATURE_SMAP)) + return; + + /* Replace the NOP3 with CLAC */ + addr[0] = 0x0f; + addr[1] = 0x01; + addr[2] = 0xca; +} + /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) { @@ -92,6 +107,9 @@ asm ( /* We don't bother saving the ss register */ " pushq %rsp\n" " pushfq\n" + ".global optprobe_template_clac\n" + "optprobe_template_clac:\n" + ASM_NOP3 SAVE_REGS_STRING " movq %rsp, %rsi\n" ".global optprobe_template_val\n" @@ -111,6 +129,9 @@ asm ( #else /* CONFIG_X86_32 */ " pushl %esp\n" " pushfl\n" + ".global optprobe_template_clac\n" + "optprobe_template_clac:\n" + ASM_NOP3 SAVE_REGS_STRING " movl %esp, %edx\n" ".global optprobe_template_val\n" @@ -134,6 +155,8 @@ asm ( void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); +#define TMPL_CLAC_IDX \ + ((long)optprobe_template_clac - (long)optprobe_template_entry) #define TMPL_MOVE_IDX \ ((long)optprobe_template_val - (long)optprobe_template_entry) #define TMPL_CALL_IDX \ @@ -389,6 +412,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, op->optinsn.size = ret; len = TMPL_END_IDX + op->optinsn.size; + synthesize_clac(buf + TMPL_CLAC_IDX); + /* Set probe information */ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); -- cgit v1.2.3 From 90c91dfb86d0ff545bd329d3ddd72c147e2ae198 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2020 13:38:51 +0100 Subject: perf/core: Fix endless multiplex timer Kan and Andi reported that we fail to kill rotation when the flexible events go empty, but the context does not. XXX moar Fixes: fd7d55172d1e ("perf/cgroups: Don't rotate events for cgroups unnecessarily") Reported-by: Andi Kleen Reported-by: Kan Liang Tested-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200305123851.GX2596@hirez.programming.kicks-ass.net --- kernel/events/core.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ccf8d4fc6374..b5a68d26f0b9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2291,6 +2291,7 @@ __perf_remove_from_context(struct perf_event *event, if (!ctx->nr_events && ctx->is_active) { ctx->is_active = 0; + ctx->rotate_necessary = 0; if (ctx->task) { WARN_ON_ONCE(cpuctx->task_ctx != ctx); cpuctx->task_ctx = NULL; @@ -3188,12 +3189,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (!ctx->nr_active || !(is_active & EVENT_ALL)) return; - /* - * If we had been multiplexing, no rotations are necessary, now no events - * are active. - */ - ctx->rotate_necessary = 0; - perf_pmu_disable(ctx->pmu); if (is_active & EVENT_PINNED) { list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list) @@ -3203,6 +3198,13 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (is_active & EVENT_FLEXIBLE) { list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list) group_sched_out(event, cpuctx, ctx); + + /* + * Since we cleared EVENT_FLEXIBLE, also clear + * rotate_necessary, is will be reset by + * ctx_flexible_sched_in() when needed. + */ + ctx->rotate_necessary = 0; } perf_pmu_enable(ctx->pmu); } @@ -3985,6 +3987,12 @@ ctx_event_to_rotate(struct perf_event_context *ctx) typeof(*event), group_node); } + /* + * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in() + * finds there are unschedulable events, it will set it again. + */ + ctx->rotate_necessary = 0; + return event; } -- cgit v1.2.3 From a6763625ae6f8aa5ee82fcd8fa4e5e38db20dbc6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Mar 2020 13:56:37 +0300 Subject: perf/core: Fix reversed NULL check in perf_event_groups_less() This NULL check is reversed so it leads to a Smatch warning and presumably a NULL dereference. kernel/events/core.c:1598 perf_event_groups_less() error: we previously assumed 'right->cgrp->css.cgroup' could be null (see line 1590) Fixes: 95ed6c707f26 ("perf/cgroup: Order events in RB tree by cgroup id") Signed-off-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200312105637.GA8960@mwanda --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b5a68d26f0b9..d22e4ba59dfa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1586,7 +1586,7 @@ perf_event_groups_less(struct perf_event *left, struct perf_event *right) */ return true; } - if (!right->cgrp || right->cgrp->css.cgroup) { + if (!right->cgrp || !right->cgrp->css.cgroup) { /* * Right has no cgroup but left does, no cgroups come * first. -- cgit v1.2.3 From bc88a2fe216a51e8ab46d61f89d0c1b5a400470e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 17 Mar 2020 11:38:32 -0700 Subject: perf/x86/intel/uncore: Add box_offsets for free-running counters The offset between uncore boxes of free-running counters varies, e.g. IIO free-running counters on Ice Lake server. Add box_offsets, an array of offsets between adjacent uncore boxes. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1584470314-46657-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 1204dcc9fe9b..b30429f8a53a 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -154,6 +154,7 @@ struct freerunning_counters { unsigned int box_offset; unsigned int num_counters; unsigned int bits; + unsigned *box_offsets; }; struct pci2phy_map { @@ -310,7 +311,9 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, return pmu->type->freerunning[type].counter_base + pmu->type->freerunning[type].counter_offset * idx + - pmu->type->freerunning[type].box_offset * pmu->pmu_idx; + (pmu->type->freerunning[type].box_offsets ? + pmu->type->freerunning[type].box_offsets[pmu->pmu_idx] : + pmu->type->freerunning[type].box_offset * pmu->pmu_idx); } static inline -- cgit v1.2.3 From 3442a9ecb8e72a33c28a2b969b766c659830e410 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 17 Mar 2020 11:38:33 -0700 Subject: perf/x86/intel/uncore: Factor out __snr_uncore_mmio_init_box The IMC uncore unit in Ice Lake server can only be accessed by MMIO, which is similar as Snow Ridge. Factor out __snr_uncore_mmio_init_box which can be shared with Ice Lake server in the following patch. No functional changes. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1584470314-46657-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index ad20220af303..01023f0d935b 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4380,10 +4380,10 @@ static struct pci_dev *snr_uncore_get_mc_dev(int id) return mc_dev; } -static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset) { struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); - unsigned int box_ctl = uncore_mmio_box_ctl(box); resource_size_t addr; u32 pci_dword; @@ -4393,7 +4393,7 @@ static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; - pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); + pci_read_config_dword(pdev, mem_offset, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; addr += box_ctl; @@ -4405,6 +4405,12 @@ static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); } +static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), + SNR_IMC_MMIO_MEM0_OFFSET); +} + static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) { u32 config; -- cgit v1.2.3 From ba5bade4cc0d2013cdf5634dae554693c968a090 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:46 +0100 Subject: x86/devicetable: Move x86 specific macro out of generic code There is no reason that this gunk is in a generic header file. The wildcard defines need to stay as they are required by file2alias. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131508.736205164@linutronix.de --- arch/x86/include/asm/cpu_device_id.h | 13 ++++++++++++- arch/x86/kvm/svm.c | 1 + arch/x86/kvm/vmx/vmx.c | 1 + drivers/cpufreq/acpi-cpufreq.c | 1 + drivers/cpufreq/amd_freq_sensitivity.c | 1 + include/linux/mod_devicetable.h | 4 +--- 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 31c379c1da41..a28dc6ba5be1 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -6,9 +6,20 @@ * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions */ - #include +/* + * The wildcard initializers are in mod_devicetable.h because + * file2alias needs them. Sigh. + */ + +#define X86_FEATURE_MATCH(x) { \ + .vendor = X86_VENDOR_ANY, \ + .family = X86_FAMILY_ANY, \ + .model = X86_MODEL_ANY, \ + .feature = x, \ +} + /* * Match specific microcode revisions. * diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bef0ba35f121..5753fc3bcfc1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "trace.h" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3be25ecae145..31c80fa4653d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d6f7df33ab8c..7ff659525587 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -30,6 +30,7 @@ #include #include #include +#include MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index e2df9d112106..5107cbe2d64d 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -18,6 +18,7 @@ #include #include +#include #include "cpufreq_ondemand.h" diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e3596db077dc..f8b66d43acf6 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -667,9 +667,7 @@ struct x86_cpu_id { kernel_ulong_t driver_data; }; -#define X86_FEATURE_MATCH(x) \ - { X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x } - +/* Wild cards for x86_cpu_id::vendor, family, model and feature */ #define X86_VENDOR_ANY 0xffff #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 -- cgit v1.2.3 From 20d437447c0089cda46c683db219d3b4e2cde40e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:47 +0100 Subject: x86/cpu: Add consistent CPU match macros Finding all places which build x86_cpu_id match tables is tedious and the logic is hidden in lots of differently named macro wrappers. Most of these initializer macros use plain C89 initializers which rely on the ordering of the struct members. So new members could only be added at the end of the struct, but that's ugly as hell and C99 initializers are really the right thing to use. Provide a set of macros which: - Have a proper naming scheme, starting with X86_MATCH_ - Use C99 initializers The set of provided macros are all subsets of the base macro X86_MATCH_VENDOR_FAM_MODEL_FEATURE() which allows to supply all possible selection criteria: vendor, family, model, feature The other macros shorten this to avoid typing all arguments when they are not needed and would require one of the _ANY constants. They have been created due to the requirements of the existing usage sites. Also add a few model constants for Centaur CPUs and QUARK. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131508.826011988@linutronix.de --- arch/x86/include/asm/cpu_device_id.h | 140 ++++++++++++++++++++++++++++++++--- arch/x86/include/asm/intel-family.h | 6 ++ arch/x86/kernel/cpu/match.c | 13 +++- 3 files changed, 146 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index a28dc6ba5be1..f11770fac73a 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -5,21 +5,143 @@ /* * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions - */ -#include - -/* + * * The wildcard initializers are in mod_devicetable.h because * file2alias needs them. Sigh. */ +#include +/* Get the INTEL_FAM* model defines */ +#include +/* And the X86_VENDOR_* ones */ +#include -#define X86_FEATURE_MATCH(x) { \ - .vendor = X86_VENDOR_ANY, \ - .family = X86_FAMILY_ANY, \ - .model = X86_MODEL_ANY, \ - .feature = x, \ +/* Centaur FAM6 models */ +#define X86_CENTAUR_FAM6_C7_A 0xa +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +/** + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Use only if you need all selectors. Otherwise use one of the shorter + * macros of the X86_MATCH_* family. If there is no matching shorthand + * macro, consider to add one. If you really need to wrap one of the macros + * into another macro at the usage site for good reasons, then please + * start this local macro with X86_MATCH to allow easy grepping. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \ + _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ } +/** + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ + X86_MODEL_ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) + +/** + * X86_MATCH_FEATURE - Macro for matching a CPU feature + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_VENDOR_FEATURE(ANY, feature, data) + +/* Transitional to keep the existing code working */ +#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL) + +/** + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @model: The model number, model constant or X86_MODEL_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ + X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VENDOR_FAM - Match vendor and family + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set of wildcards. + */ +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) + +/** + * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model + * @model: The model name without the INTEL_FAM6_ prefix or ANY + * The model name is expanded to INTEL_FAM6_@model internally + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The vendor is set to INTEL, the family to 6 and all other missing + * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards. + * + * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information. + */ +#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) + /* * Match specific microcode revisions. * diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 4981c293f926..236a87aa84d6 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -35,6 +35,9 @@ * The #define line may optionally include a comment including platform names. */ +/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ +#define INTEL_FAM6_ANY X86_MODEL_ANY + #define INTEL_FAM6_CORE_YONAH 0x0E #define INTEL_FAM6_CORE2_MEROM 0x0F @@ -118,6 +121,9 @@ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +/* Family 5 */ +#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ + /* Useful macros */ #define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ { \ diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 6dd78d8235e4..d3482eb43ff3 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -16,12 +16,17 @@ * respective wildcard entries. * * A typical table entry would be to match a specific CPU - * { X86_VENDOR_INTEL, 6, 0x12 } - * or to match a specific CPU feature - * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } + * + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, + * X86_FEATURE_ANY, NULL); * * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, - * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) + * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) + * + * asm/cpu_device_id.h contains a set of useful macros which are shortcuts + * for various common selections. The above can be shortened to: + * + * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL); * * Arrays used to match for this should also be declared using * MODULE_DEVICE_TABLE(x86cpu, ...) -- cgit v1.2.3 From f6d502fcfc51ae025f18a8de8f1ed2ab34503742 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:48 +0100 Subject: x86/cpu/bugs: Convert to new matching macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. The local wrappers have to stay as they are tailored to tame the hardware vulnerability mess. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131508.934926587@linutronix.de --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 52c9bfbbdb2a..f4c672e5ce9e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1008,8 +1008,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) -#define VULNWL(_vendor, _family, _model, _whitelist) \ - { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } +#define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) #define VULNWL_INTEL(model, whitelist) \ VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) -- cgit v1.2.3 From ef37219ab828c9ead544589ed33cd94f9273d7c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:49 +0100 Subject: x86/perf/events: Convert to new CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.029267418@linutronix.de --- arch/x86/events/amd/power.c | 2 +- arch/x86/events/intel/cstate.c | 83 ++++++++++++++++++++---------------------- arch/x86/events/intel/rapl.c | 58 ++++++++++++++--------------- arch/x86/events/intel/uncore.c | 63 +++++++++++++++----------------- 4 files changed, 97 insertions(+), 109 deletions(-) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index abef51320e3a..43b09e9c93a2 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -259,7 +259,7 @@ static int power_cpu_init(unsigned int cpu) } static const struct x86_cpu_id cpu_match[] = { - { .vendor = X86_VENDOR_AMD, .family = 0x15 }, + X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), {}, }; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4814c964692c..e4aa20c0426f 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -594,63 +594,60 @@ static const struct cstate_model glm_cstates __initconst = { }; -#define X86_CSTATES_MODEL(model, states) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } - static const struct x86_cpu_id intel_cstates_match[] __initconst = { - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 09913121e726..a5dbd25852cb 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -668,9 +668,6 @@ static int __init init_rapl_pmus(void) return 0; } -#define X86_RAPL_MODEL_MATCH(model, init) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } - static struct rapl_model model_snb = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | @@ -716,36 +713,35 @@ static struct rapl_model model_skl = { }; static const struct x86_cpu_id rapl_model_match[] __initconst = { - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_L, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_G, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_D, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), {}, }; - MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86467f85c383..ac93705a2127 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1392,10 +1392,6 @@ err: return ret; } - -#define X86_UNCORE_MODEL_MATCH(model, init) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } - struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); @@ -1477,38 +1473,37 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = { }; static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL, hsw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_L, hsw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_G, hsw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL, bdw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, bdw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, bdx_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; - MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); static int __init intel_uncore_init(void) -- cgit v1.2.3 From 320debe5ef6d4990a70535b5cf891472e6e14d93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:50 +0100 Subject: x86/kvm: Convert to new CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.136884777@linutronix.de --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5753fc3bcfc1..7c959ae20cd4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -59,7 +59,7 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); static const struct x86_cpu_id svm_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_SVM), + X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 31c80fa4653d..5b017efcc944 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -66,7 +66,7 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); static const struct x86_cpu_id vmx_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_VMX), + X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); -- cgit v1.2.3 From adefe55e725821e8ae23207992ded5994f1650a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:51 +0100 Subject: x86/kernel: Convert to new CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.250559388@linutronix.de --- arch/x86/kernel/apic/apic.c | 32 +++++++++++++------------------- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tsc_msr.c | 14 +++++++------- arch/x86/power/cpu.c | 16 ++-------------- 4 files changed, 23 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5f973fed3c9f..81b9c63dae1b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -546,12 +546,6 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -#define DEADLINE_MODEL_MATCH_FUNC(model, func) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func } - -#define DEADLINE_MODEL_MATCH_REV(model, rev) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } - static u32 hsx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { @@ -588,23 +582,23 @@ static u32 skx_deadline_rev(void) } static const struct x86_cpu_id deadline_match[] = { - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev), - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev), + X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev), + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev), + X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17), + X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), + X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), + X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17), + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25), + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2), + X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2), + X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52), + X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52), + X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52), {}, }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 69881b2d446c..3076ef0864dd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -466,7 +466,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) */ static const struct x86_cpu_id snc_cpu[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X }, + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), {} }; diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index e0cbe4f2af49..bf528aae8ece 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -63,13 +63,13 @@ static const struct freq_desc freq_desc_lgm = { }; static const struct x86_cpu_id tsc_msr_cpu_ids[] = { - INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw), - INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv), - INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt), - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng), - INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht), - INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann), - INTEL_CPU_FAM6(ATOM_AIRMONT_NP, freq_desc_lgm), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, &freq_desc_pnw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_TABLET,&freq_desc_clv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &freq_desc_byt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &freq_desc_tng), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &freq_desc_cht), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &freq_desc_ann), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_NP, &freq_desc_lgm), {} }; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 915bb1639763..aaff9ed7ff45 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -475,20 +475,8 @@ static int msr_save_cpuid_features(const struct x86_cpu_id *c) } static const struct x86_cpu_id msr_save_cpu_table[] = { - { - .vendor = X86_VENDOR_AMD, - .family = 0x15, - .model = X86_MODEL_ANY, - .feature = X86_FEATURE_ANY, - .driver_data = (kernel_ulong_t)msr_save_cpuid_features, - }, - { - .vendor = X86_VENDOR_AMD, - .family = 0x16, - .model = X86_MODEL_ANY, - .feature = X86_FEATURE_ANY, - .driver_data = (kernel_ulong_t)msr_save_cpuid_features, - }, + X86_MATCH_VENDOR_FAM(AMD, 0x15, &msr_save_cpuid_features), + X86_MATCH_VENDOR_FAM(AMD, 0x16, &msr_save_cpuid_features), {} }; -- cgit v1.2.3 From 9595198f8dc4111f8cab39de2c0c4432787bc690 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:52 +0100 Subject: x86/platform: Convert to new CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.359448901@linutronix.de --- arch/x86/platform/atom/punit_atom_debug.c | 13 ++++++------- arch/x86/platform/efi/quirks.c | 7 ++----- arch/x86/platform/intel-mid/device_libs/platform_bt.c | 5 +---- arch/x86/platform/intel-quark/imr.c | 2 +- arch/x86/platform/intel-quark/imr_selftest.c | 2 +- 5 files changed, 11 insertions(+), 18 deletions(-) diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index ee6b0780bea1..f8ed5f66cd20 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -117,17 +117,16 @@ static void punit_dbgfs_unregister(void) debugfs_remove_recursive(punit_dbg_file); } -#define ICPU(model, drv_data) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\ - (kernel_ulong_t)&drv_data } +#define X86_MATCH(model, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ + X86_FEATURE_MWAIT, data) static const struct x86_cpu_id intel_punit_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt), - ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng), - ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), + X86_MATCH(ATOM_SILVERMONT, &punit_device_byt), + X86_MATCH(ATOM_SILVERMONT_MID, &punit_device_tng), + X86_MATCH(ATOM_AIRMONT, &punit_device_cht), {} }; - MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); static int __init punit_atom_debug_init(void) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 88d32c06cffa..c000e03ecfe3 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -659,12 +659,9 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, return 1; } -#define ICPU(family, model, quirk_handler) \ - { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ - (unsigned long)&quirk_handler } - static const struct x86_cpu_id efi_capsule_quirk_ids[] = { - ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, + &qrk_capsule_setup_info), { } }; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c index e3f4bfc08f78..31dda18bb370 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c @@ -60,11 +60,8 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = { .setup = tng_bt_sfi_setup, }; -#define ICPU(model, ddata) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata } - static const struct x86_cpu_id bt_sfi_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &tng_bt_sfi_data), {} }; diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index e9d97d52475e..0286fe1b14b5 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -569,7 +569,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev) } static const struct x86_cpu_id imr_ids[] __initconst = { - { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), {} }; diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index 4307830e1b6f..570e3062faac 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -105,7 +105,7 @@ static void __init imr_self_test(void) } static const struct x86_cpu_id imr_ids[] __initconst = { - { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), {} }; -- cgit v1.2.3 From e36cf2f768467cff824e7da87aedc4e99e4c8396 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:53 +0100 Subject: ACPI: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Rename the local macro wrapper to X86_MATCH for consistency. It stays for readability sake. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.467730627@linutronix.de --- drivers/acpi/acpi_lpss.c | 6 ++---- drivers/acpi/x86/utils.c | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index db18df6cb330..dee999938213 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -306,11 +306,9 @@ static const struct lpss_device_desc bsw_spi_dev_desc = { .setup = lpss_deassert_reset, }; -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } - static const struct x86_cpu_id lpss_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */ - ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), {} }; diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index 697a6b12d6b9..bdc1ba00aee9 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -37,7 +37,7 @@ struct always_present_id { const char *uid; }; -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } +#define X86_MATCH(model) X86_MATCH_INTEL_FAM6_MODEL(model, NULL) #define ENTRY(hid, uid, cpu_models, dmi...) { \ { { hid, }, {} }, \ @@ -51,29 +51,29 @@ static const struct always_present_id always_present_ids[] = { * Bay / Cherry Trail PWM directly poked by GPU driver in win10, * but Linux uses a separate PWM driver, harmless if not used. */ - ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT), {}), - ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}), + ENTRY("80860F09", "1", X86_MATCH(ATOM_SILVERMONT), {}), + ENTRY("80862288", "1", X86_MATCH(ATOM_AIRMONT), {}), /* Lenovo Yoga Book uses PWM2 for keyboard backlight control */ - ENTRY("80862289", "2", ICPU(INTEL_FAM6_ATOM_AIRMONT), { + ENTRY("80862289", "2", X86_MATCH(ATOM_AIRMONT), { DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), }), /* * The INT0002 device is necessary to clear wakeup interrupt sources * on Cherry Trail devices, without it we get nobody cared IRQ msgs. */ - ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}), + ENTRY("INT0002", "1", X86_MATCH(ATOM_AIRMONT), {}), /* * On the Dell Venue 11 Pro 7130 and 7139, the DSDT hides * the touchscreen ACPI device until a certain time * after _SB.PCI0.GFX0.LCD.LCD1._ON gets called has passed * *and* _STA has been called at least 3 times since. */ - ENTRY("SYNA7500", "1", ICPU(INTEL_FAM6_HASWELL_L), { + ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"), }), - ENTRY("SYNA7500", "1", ICPU(INTEL_FAM6_HASWELL_L), { + ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7139"), }), @@ -89,19 +89,19 @@ static const struct always_present_id always_present_ids[] = { * was copy-pasted from the GPD win, so it has a disabled KIOX000A * node which we should not enable, thus we also check the BIOS date. */ - ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), { + ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), DMI_MATCH(DMI_BIOS_DATE, "02/21/2017") }), - ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), { + ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), DMI_MATCH(DMI_BIOS_DATE, "03/20/2017") }), - ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), { + ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), -- cgit v1.2.3 From b11d77fa300d98704519238a2161bc6352c28245 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 24 Mar 2020 14:51:51 +0100 Subject: cpufreq: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of most local macro wrappers for consistency. The ones which make sense for readability are renamed to X86_MATCH*. In the centrino driver this also removes the two extra duplicates of family 6 model 13 which have no value at all. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Acked-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/87eetheu88.fsf@nanos.tec.linutronix.de --- drivers/cpufreq/acpi-cpufreq.c | 4 +- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- drivers/cpufreq/e_powersaver.c | 2 +- drivers/cpufreq/elanfreq.c | 2 +- drivers/cpufreq/intel_pstate.c | 71 +++++++++++++++++----------------- drivers/cpufreq/longhaul.c | 2 +- drivers/cpufreq/longrun.c | 3 +- drivers/cpufreq/p4-clockmod.c | 2 +- drivers/cpufreq/powernow-k6.c | 4 +- drivers/cpufreq/powernow-k7.c | 2 +- drivers/cpufreq/powernow-k8.c | 2 +- drivers/cpufreq/sc520_freq.c | 2 +- drivers/cpufreq/speedstep-centrino.c | 14 ++----- drivers/cpufreq/speedstep-ich.c | 10 ++--- drivers/cpufreq/speedstep-smi.c | 10 ++--- 15 files changed, 59 insertions(+), 73 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 7ff659525587..289e8ce3fd13 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -992,8 +992,8 @@ late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); static const struct x86_cpu_id acpi_cpufreq_ids[] = { - X86_FEATURE_MATCH(X86_FEATURE_ACPI), - X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE), + X86_MATCH_FEATURE(X86_FEATURE_ACPI, NULL), + X86_MATCH_FEATURE(X86_FEATURE_HW_PSTATE, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids); diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 5107cbe2d64d..f7c4206d4c90 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -145,7 +145,7 @@ static void __exit amd_freq_sensitivity_exit(void) module_exit(amd_freq_sensitivity_exit); static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { - X86_FEATURE_MATCH(X86_FEATURE_PROC_FEEDBACK), + X86_MATCH_FEATURE(X86_FEATURE_PROC_FEEDBACK, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, amd_freq_sensitivity_ids); diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index 45c18c6b8081..776a58bab0ff 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -385,7 +385,7 @@ static struct cpufreq_driver eps_driver = { /* This driver will work only on Centaur C7 processors with * Enhanced SpeedStep/PowerSaver registers */ static const struct x86_cpu_id eps_cpu_id[] = { - { X86_VENDOR_CENTAUR, 6, X86_MODEL_ANY, X86_FEATURE_EST }, + X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 6, X86_FEATURE_EST, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, eps_cpu_id); diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c index 2242541f7ae3..4ce5eb35dc46 100644 --- a/drivers/cpufreq/elanfreq.c +++ b/drivers/cpufreq/elanfreq.c @@ -198,7 +198,7 @@ static struct cpufreq_driver elanfreq_driver = { }; static const struct x86_cpu_id elan_id[] = { - { X86_VENDOR_AMD, 4, 10, }, + X86_MATCH_VENDOR_FAM_MODEL(AMD, 4, 10, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, elan_id); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c81e1ff29069..780c387f05c0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1908,51 +1908,51 @@ static const struct pstate_funcs knl_funcs = { .get_val = core_get_val, }; -#define ICPU(model, policy) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ - (unsigned long)&policy } +#define X86_MATCH(model, policy) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ + X86_FEATURE_APERFMPERF, &policy) static const struct x86_cpu_id intel_pstate_cpu_ids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), - ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), - ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs), - ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), - ICPU(INTEL_FAM6_HASWELL, core_funcs), - ICPU(INTEL_FAM6_BROADWELL, core_funcs), - ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs), - ICPU(INTEL_FAM6_HASWELL_X, core_funcs), - ICPU(INTEL_FAM6_HASWELL_L, core_funcs), - ICPU(INTEL_FAM6_HASWELL_G, core_funcs), - ICPU(INTEL_FAM6_BROADWELL_G, core_funcs), - ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs), - ICPU(INTEL_FAM6_SKYLAKE_L, core_funcs), - ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), - ICPU(INTEL_FAM6_SKYLAKE, core_funcs), - ICPU(INTEL_FAM6_BROADWELL_D, core_funcs), - ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), - ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), - ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs), - ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), + X86_MATCH(SANDYBRIDGE, core_funcs), + X86_MATCH(SANDYBRIDGE_X, core_funcs), + X86_MATCH(ATOM_SILVERMONT, silvermont_funcs), + X86_MATCH(IVYBRIDGE, core_funcs), + X86_MATCH(HASWELL, core_funcs), + X86_MATCH(BROADWELL, core_funcs), + X86_MATCH(IVYBRIDGE_X, core_funcs), + X86_MATCH(HASWELL_X, core_funcs), + X86_MATCH(HASWELL_L, core_funcs), + X86_MATCH(HASWELL_G, core_funcs), + X86_MATCH(BROADWELL_G, core_funcs), + X86_MATCH(ATOM_AIRMONT, airmont_funcs), + X86_MATCH(SKYLAKE_L, core_funcs), + X86_MATCH(BROADWELL_X, core_funcs), + X86_MATCH(SKYLAKE, core_funcs), + X86_MATCH(BROADWELL_D, core_funcs), + X86_MATCH(XEON_PHI_KNL, knl_funcs), + X86_MATCH(XEON_PHI_KNM, knl_funcs), + X86_MATCH(ATOM_GOLDMONT, core_funcs), + X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs), + X86_MATCH(SKYLAKE_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { - ICPU(INTEL_FAM6_BROADWELL_D, core_funcs), - ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), - ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), + X86_MATCH(BROADWELL_D, core_funcs), + X86_MATCH(BROADWELL_X, core_funcs), + X86_MATCH(SKYLAKE_X, core_funcs), {} }; static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { - ICPU(INTEL_FAM6_KABYLAKE, core_funcs), + X86_MATCH(KABYLAKE, core_funcs), {} }; static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { - ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), - ICPU(INTEL_FAM6_SKYLAKE, core_funcs), + X86_MATCH(SKYLAKE_X, core_funcs), + X86_MATCH(SKYLAKE, core_funcs), {} }; @@ -2725,13 +2725,14 @@ static inline void intel_pstate_request_control_from_smm(void) {} #define INTEL_PSTATE_HWP_BROADWELL 0x01 -#define ICPU_HWP(model, hwp_mode) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode } +#define X86_MATCH_HWP(model, hwp_mode) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ + X86_FEATURE_APERFMPERF, hwp_mode) static const struct x86_cpu_id hwp_support_ids[] __initconst = { - ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), - ICPU_HWP(INTEL_FAM6_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), - ICPU_HWP(X86_MODEL_ANY, 0), + X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), + X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), + X86_MATCH_HWP(ANY, 0), {} }; diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 92d92e67ae0a..123fb006810d 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -910,7 +910,7 @@ static struct cpufreq_driver longhaul_driver = { }; static const struct x86_cpu_id longhaul_id[] = { - { X86_VENDOR_CENTAUR, 6 }, + X86_MATCH_VENDOR_FAM(CENTAUR, 6, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, longhaul_id); diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c index 0b08be8bff76..1caaec7c280b 100644 --- a/drivers/cpufreq/longrun.c +++ b/drivers/cpufreq/longrun.c @@ -281,8 +281,7 @@ static struct cpufreq_driver longrun_driver = { }; static const struct x86_cpu_id longrun_ids[] = { - { X86_VENDOR_TRANSMETA, X86_FAMILY_ANY, X86_MODEL_ANY, - X86_FEATURE_LONGRUN }, + X86_MATCH_VENDOR_FEATURE(TRANSMETA, X86_FEATURE_LONGRUN, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, longrun_ids); diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index efc0b46efada..bb61677c11c7 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -231,7 +231,7 @@ static struct cpufreq_driver p4clockmod_driver = { }; static const struct x86_cpu_id cpufreq_p4_id[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_ACC }, + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_ACC, NULL), {} }; diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index 0196f8129597..41eefef95d87 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -258,8 +258,8 @@ static struct cpufreq_driver powernow_k6_driver = { }; static const struct x86_cpu_id powernow_k6_ids[] = { - { X86_VENDOR_AMD, 5, 12 }, - { X86_VENDOR_AMD, 5, 13 }, + X86_MATCH_VENDOR_FAM_MODEL(AMD, 5, 12, NULL), + X86_MATCH_VENDOR_FAM_MODEL(AMD, 5, 13, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, powernow_k6_ids); diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 5e5171d3eece..5d515fc34836 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -109,7 +109,7 @@ static int check_fsb(unsigned int fsbspeed) } static const struct x86_cpu_id powernow_k7_cpuids[] = { - { X86_VENDOR_AMD, 6, }, + X86_MATCH_VENDOR_FAM(AMD, 6, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, powernow_k7_cpuids); diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 2db2f1739e09..3984959eed1d 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -452,7 +452,7 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, static const struct x86_cpu_id powernow_k8_ids[] = { /* IO based frequency switching */ - { X86_VENDOR_AMD, 0xf }, + X86_MATCH_VENDOR_FAM(AMD, 0xf, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, powernow_k8_ids); diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c index c6f647babaad..73a208559fe2 100644 --- a/drivers/cpufreq/sc520_freq.c +++ b/drivers/cpufreq/sc520_freq.c @@ -95,7 +95,7 @@ static struct cpufreq_driver sc520_freq_driver = { }; static const struct x86_cpu_id sc520_ids[] = { - { X86_VENDOR_AMD, 4, 9 }, + X86_MATCH_VENDOR_FAM_MODEL(AMD, 4, 9, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, sc520_ids); diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index b49f494e0285..75b10ecdb60f 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -520,18 +520,12 @@ static struct cpufreq_driver centrino_driver = { * or ASCII model IDs. */ static const struct x86_cpu_id centrino_ids[] = { - { X86_VENDOR_INTEL, 6, 9, X86_FEATURE_EST }, - { X86_VENDOR_INTEL, 6, 13, X86_FEATURE_EST }, - { X86_VENDOR_INTEL, 6, 13, X86_FEATURE_EST }, - { X86_VENDOR_INTEL, 6, 13, X86_FEATURE_EST }, - { X86_VENDOR_INTEL, 15, 3, X86_FEATURE_EST }, - { X86_VENDOR_INTEL, 15, 4, X86_FEATURE_EST }, + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, 9, X86_FEATURE_EST, NULL), + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, 13, X86_FEATURE_EST, NULL), + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 15, 3, X86_FEATURE_EST, NULL), + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 15, 4, X86_FEATURE_EST, NULL), {} }; -#if 0 -/* Autoload or not? Do not for now. */ -MODULE_DEVICE_TABLE(x86cpu, centrino_ids); -#endif /** * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index 547fd7af5bf5..f2076d72bf39 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c @@ -319,15 +319,11 @@ static struct cpufreq_driver speedstep_driver = { }; static const struct x86_cpu_id ss_smi_ids[] = { - { X86_VENDOR_INTEL, 6, 0xb, }, - { X86_VENDOR_INTEL, 6, 0x8, }, - { X86_VENDOR_INTEL, 15, 2 }, + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0x8, 0), + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0xb, 0), + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 15, 0x2, 0), {} }; -#if 0 -/* Autoload or not? Do not for now. */ -MODULE_DEVICE_TABLE(x86cpu, ss_smi_ids); -#endif /** * speedstep_init - initializes the SpeedStep CPUFreq driver diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index eeb31bc21cc9..0ce9d4b6dfcc 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -299,15 +299,11 @@ static struct cpufreq_driver speedstep_driver = { }; static const struct x86_cpu_id ss_smi_ids[] = { - { X86_VENDOR_INTEL, 6, 0xb, }, - { X86_VENDOR_INTEL, 6, 0x8, }, - { X86_VENDOR_INTEL, 15, 2 }, + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0x8, 0), + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0xb, 0), + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 15, 0x2, 0), {} }; -#if 0 -/* Not auto loaded currently */ -MODULE_DEVICE_TABLE(x86cpu, ss_smi_ids); -#endif /** * speedstep_init - initializes the SpeedStep CPUFreq driver -- cgit v1.2.3 From 298426211c4b36e1e2475deb941f8fa59d6686c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:55 +0100 Subject: EDAC: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Acked-by: Tony Luck Link: https://lkml.kernel.org/r/20200320131509.673579000@linutronix.de --- drivers/edac/amd64_edac.c | 14 +++++++------- drivers/edac/i10nm_base.c | 8 ++++---- drivers/edac/pnd2_edac.c | 4 ++-- drivers/edac/sb_edac.c | 14 +++++++------- drivers/edac/skx_base.c | 2 +- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9fbad908a854..f91f3bc1e0b2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3626,13 +3626,13 @@ static void setup_pci_device(void) } static const struct x86_cpu_id amd64_cpuids[] = { - { X86_VENDOR_AMD, 0xF, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, - { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, - { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, - { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, - { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, - { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, - { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + X86_MATCH_VENDOR_FAM(AMD, 0x0F, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x10, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x16, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL), + X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL), + X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 059eccf0582b..df08de963d10 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -123,10 +123,10 @@ static int i10nm_get_all_munits(void) } static const struct x86_cpu_id i10nm_cpuids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_D, 0, 0 }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_X, 0, 0 }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_D, 0, 0 }, - { } + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), + {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 933f7722b893..bc47328eb485 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1537,8 +1537,8 @@ static struct dunit_ops dnv_ops = { }; static const struct x86_cpu_id pnd2_cpuids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_D, 0, (kernel_ulong_t)&dnv_ops }, + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &apl_ops), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &dnv_ops), { } }; MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4957e8ee1879..7d51c82be62b 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3420,13 +3420,13 @@ fail0: } static const struct x86_cpu_id sbridge_cpuids[] = { - INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table), - INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table), - INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table), - INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table), - INTEL_CPU_FAM6(BROADWELL_D, pci_dev_descr_broadwell_table), - INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table), - INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &pci_dev_descr_sbridge_table), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &pci_dev_descr_ibridge_table), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &pci_dev_descr_haswell_table), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &pci_dev_descr_broadwell_table), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &pci_dev_descr_broadwell_table), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &pci_dev_descr_knl_table), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &pci_dev_descr_knl_table), { } }; MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 83545b4facb7..46a3a3440f5e 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -158,7 +158,7 @@ fail: } static const struct x86_cpu_id skx_cpuids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 }, + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), { } }; MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); -- cgit v1.2.3 From a69b3b1d4cf061d9197d835dcf539d2dd7b9e46f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:56 +0100 Subject: platform/x86: Convert to new CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Andy Shevchenko Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.766573641@linutronix.de --- drivers/platform/x86/intel-uncore-frequency.c | 14 ++++++------- drivers/platform/x86/intel_int0002_vgpio.c | 4 ++-- drivers/platform/x86/intel_mid_powerbtn.c | 4 ++-- drivers/platform/x86/intel_pmc_core.c | 24 +++++++++++----------- drivers/platform/x86/intel_pmc_core_pltdrv.c | 16 +++++++-------- .../x86/intel_speed_select_if/isst_if_mbox_msr.c | 4 +--- drivers/platform/x86/intel_telemetry_debugfs.c | 5 ++--- drivers/platform/x86/intel_telemetry_pltdrv.c | 7 ++----- drivers/platform/x86/intel_turbo_max_3.c | 6 ++---- 9 files changed, 37 insertions(+), 47 deletions(-) diff --git a/drivers/platform/x86/intel-uncore-frequency.c b/drivers/platform/x86/intel-uncore-frequency.c index 2b1a0734c3f8..859272075a8f 100644 --- a/drivers/platform/x86/intel-uncore-frequency.c +++ b/drivers/platform/x86/intel-uncore-frequency.c @@ -358,15 +358,13 @@ static struct notifier_block uncore_pm_nb = { .notifier_call = uncore_pm_notify, }; -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } - static const struct x86_cpu_id intel_uncore_cpu_ids[] = { - ICPU(INTEL_FAM6_BROADWELL_G), - ICPU(INTEL_FAM6_BROADWELL_X), - ICPU(INTEL_FAM6_BROADWELL_D), - ICPU(INTEL_FAM6_SKYLAKE_X), - ICPU(INTEL_FAM6_ICELAKE_X), - ICPU(INTEL_FAM6_ICELAKE_D), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, NULL), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, NULL), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), {} }; diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c index f14e2c5f9da5..7b23efc46a43 100644 --- a/drivers/platform/x86/intel_int0002_vgpio.c +++ b/drivers/platform/x86/intel_int0002_vgpio.c @@ -148,8 +148,8 @@ static struct irq_chip int0002_cht_irqchip = { }; static const struct x86_cpu_id int0002_cpu_ids[] = { - INTEL_CPU_FAM6(ATOM_SILVERMONT, int0002_byt_irqchip), /* Valleyview, Bay Trail */ - INTEL_CPU_FAM6(ATOM_AIRMONT, int0002_cht_irqchip), /* Braswell, Cherry Trail */ + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &int0002_byt_irqchip), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &int0002_cht_irqchip), {} }; diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 6f436836fe50..9c9f209c8a33 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -113,8 +113,8 @@ static const struct mid_pb_ddata mrfld_ddata = { }; static const struct x86_cpu_id mid_pb_cpu_ids[] = { - INTEL_CPU_FAM6(ATOM_SALTWELL_MID, mfld_ddata), - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, mrfld_ddata), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, &mfld_ddata), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &mrfld_ddata), {} }; diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index 144faa8bad3d..3df33ff50faa 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -871,18 +871,18 @@ static inline void pmc_core_dbgfs_unregister(struct pmc_dev *pmcdev) #endif /* CONFIG_DEBUG_FS */ static const struct x86_cpu_id intel_pmc_core_ids[] = { - INTEL_CPU_FAM6(SKYLAKE_L, spt_reg_map), - INTEL_CPU_FAM6(SKYLAKE, spt_reg_map), - INTEL_CPU_FAM6(KABYLAKE_L, spt_reg_map), - INTEL_CPU_FAM6(KABYLAKE, spt_reg_map), - INTEL_CPU_FAM6(CANNONLAKE_L, cnp_reg_map), - INTEL_CPU_FAM6(ICELAKE_L, icl_reg_map), - INTEL_CPU_FAM6(ICELAKE_NNPI, icl_reg_map), - INTEL_CPU_FAM6(COMETLAKE, cnp_reg_map), - INTEL_CPU_FAM6(COMETLAKE_L, cnp_reg_map), - INTEL_CPU_FAM6(TIGERLAKE_L, tgl_reg_map), - INTEL_CPU_FAM6(TIGERLAKE, tgl_reg_map), - INTEL_CPU_FAM6(ATOM_TREMONT, tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &spt_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &spt_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &spt_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &spt_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnp_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &cnp_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &cnp_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &tgl_reg_map), {} }; diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel_pmc_core_pltdrv.c index e1266f5c6359..731281855cc8 100644 --- a/drivers/platform/x86/intel_pmc_core_pltdrv.c +++ b/drivers/platform/x86/intel_pmc_core_pltdrv.c @@ -38,14 +38,14 @@ static struct platform_device pmc_core_device = { * other list may grow, but this list should not. */ static const struct x86_cpu_id intel_pmc_core_platform_ids[] = { - INTEL_CPU_FAM6(SKYLAKE_L, pmc_core_device), - INTEL_CPU_FAM6(SKYLAKE, pmc_core_device), - INTEL_CPU_FAM6(KABYLAKE_L, pmc_core_device), - INTEL_CPU_FAM6(KABYLAKE, pmc_core_device), - INTEL_CPU_FAM6(CANNONLAKE_L, pmc_core_device), - INTEL_CPU_FAM6(ICELAKE_L, pmc_core_device), - INTEL_CPU_FAM6(COMETLAKE, pmc_core_device), - INTEL_CPU_FAM6(COMETLAKE_L, pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &pmc_core_device), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &pmc_core_device), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pmc_core_platform_ids); diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c index 89b042aecef3..1b6eab071068 100644 --- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c @@ -160,10 +160,8 @@ static struct notifier_block isst_pm_nb = { .notifier_call = isst_pm_notify, }; -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } - static const struct x86_cpu_id isst_if_cpu_ids[] = { - ICPU(INTEL_FAM6_SKYLAKE_X), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, isst_if_cpu_ids); diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c index 8e3fb55ac1ae..8a53d3b485b3 100644 --- a/drivers/platform/x86/intel_telemetry_debugfs.c +++ b/drivers/platform/x86/intel_telemetry_debugfs.c @@ -308,11 +308,10 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = { }; static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = { - INTEL_CPU_FAM6(ATOM_GOLDMONT, telem_apl_debugfs_conf), - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &telem_apl_debugfs_conf), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &telem_apl_debugfs_conf), {} }; - MODULE_DEVICE_TABLE(x86cpu, telemetry_debugfs_cpu_ids); static int telemetry_debugfs_check_evts(void) diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c index c4c742bb23cf..987a24e3344e 100644 --- a/drivers/platform/x86/intel_telemetry_pltdrv.c +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c @@ -67,9 +67,6 @@ #define TELEM_CLEAR_VERBOSITY_BITS(x) ((x) &= ~(BIT(27) | BIT(28))) #define TELEM_SET_VERBOSITY_BITS(x, y) ((x) |= ((y) << 27)) -#define TELEM_CPU(model, data) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&data } - enum telemetry_action { TELEM_UPDATE = 0, TELEM_ADD, @@ -183,8 +180,8 @@ static struct telemetry_plt_config telem_glk_config = { }; static const struct x86_cpu_id telemetry_cpu_ids[] = { - TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config), - TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_glk_config), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &telem_apl_config), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &telem_glk_config), {} }; diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel_turbo_max_3.c index 7b9cc841ab65..892140b62898 100644 --- a/drivers/platform/x86/intel_turbo_max_3.c +++ b/drivers/platform/x86/intel_turbo_max_3.c @@ -113,11 +113,9 @@ static int itmt_legacy_cpu_online(unsigned int cpu) return 0; } -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } - static const struct x86_cpu_id itmt_legacy_cpu_ids[] = { - ICPU(INTEL_FAM6_BROADWELL_X), - ICPU(INTEL_FAM6_SKYLAKE_X), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), {} }; -- cgit v1.2.3 From 5cfc7ac7c1bf6014e9d22c41a724258d6c37a471 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:57 +0100 Subject: hwmon: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.859324598@linutronix.de --- drivers/hwmon/coretemp.c | 2 +- drivers/hwmon/via-cputemp.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index d855c78fb8be..bb9211215a68 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -709,7 +709,7 @@ static int coretemp_cpu_offline(unsigned int cpu) return 0; } static const struct x86_cpu_id __initconst coretemp_ids[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_DTHERM, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, coretemp_ids); diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c index 8264e849e588..e5d18dac8ee7 100644 --- a/drivers/hwmon/via-cputemp.c +++ b/drivers/hwmon/via-cputemp.c @@ -270,10 +270,10 @@ static int via_cputemp_down_prep(unsigned int cpu) } static const struct x86_cpu_id __initconst cputemp_ids[] = { - { X86_VENDOR_CENTAUR, 6, 0xa, }, /* C7 A */ - { X86_VENDOR_CENTAUR, 6, 0xd, }, /* C7 D */ - { X86_VENDOR_CENTAUR, 6, 0xf, }, /* Nano */ - { X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, }, + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_A, NULL), + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_D, NULL), + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_NANO, NULL), + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, cputemp_ids); -- cgit v1.2.3 From 9c51044cbc5fe9bb2c3bf71cb0dbcbd96ed03301 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:58 +0100 Subject: thermal: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local QUARK defines and use the proper ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131509.967017771@linutronix.de --- drivers/thermal/intel/intel_powerclamp.c | 2 +- drivers/thermal/intel/intel_quark_dts_thermal.c | 5 +---- drivers/thermal/intel/intel_soc_dts_thermal.c | 3 +-- drivers/thermal/intel/x86_pkg_temp_thermal.c | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 53216dcbe173..f74b2473440d 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -651,7 +651,7 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = { }; static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT }, + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c index 5d33b350da1c..d704fc104cfd 100644 --- a/drivers/thermal/intel/intel_quark_dts_thermal.c +++ b/drivers/thermal/intel/intel_quark_dts_thermal.c @@ -64,9 +64,6 @@ #include #include -#define X86_FAMILY_QUARK 0x5 -#define X86_MODEL_QUARK_X1000 0x9 - /* DTS reset is programmed via QRK_MBI_UNIT_SOC */ #define QRK_DTS_REG_OFFSET_RESET 0x34 #define QRK_DTS_RESET_BIT BIT(0) @@ -433,7 +430,7 @@ err_ret: } static const struct x86_cpu_id qrk_thermal_ids[] __initconst = { - { X86_VENDOR_INTEL, X86_FAMILY_QUARK, X86_MODEL_QUARK_X1000 }, + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, qrk_thermal_ids); diff --git a/drivers/thermal/intel/intel_soc_dts_thermal.c b/drivers/thermal/intel/intel_soc_dts_thermal.c index f4be9c14e5d9..92e5c19d03f6 100644 --- a/drivers/thermal/intel/intel_soc_dts_thermal.c +++ b/drivers/thermal/intel/intel_soc_dts_thermal.c @@ -36,8 +36,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data) } static const struct x86_cpu_id soc_thermal_ids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0, - BYT_SOC_DTS_APIC_IRQ}, + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, BYT_SOC_DTS_APIC_IRQ), {} }; MODULE_DEVICE_TABLE(x86cpu, soc_thermal_ids); diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index ddb4a973c698..23d9990a97c1 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -478,7 +478,7 @@ static int pkg_thermal_cpu_online(unsigned int cpu) } static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS }, + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); -- cgit v1.2.3 From 20d320c664f9bdedb4a1b0029b539aec8c0a71b1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:59 +0100 Subject: extcon: axp288: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.075227793@linutronix.de --- drivers/extcon/extcon-axp288.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index a7f216191493..34b7afffac28 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -107,7 +107,7 @@ struct axp288_extcon_info { }; static const struct x86_cpu_id cherry_trail_cpu_ids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT, X86_FEATURE_ANY }, + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), {} }; -- cgit v1.2.3 From 4a9f45a0533f47bcff27761821ee568875c5aee4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:00 +0100 Subject: intel_idle: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.193755545@linutronix.de --- drivers/idle/intel_idle.c | 79 +++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d55606608ac8..88f6f926d967 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1068,51 +1068,48 @@ static const struct idle_cpu idle_cpu_dnv = { }; static const struct x86_cpu_id intel_idle_ids[] __initconst = { - INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx), - INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), - INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), - INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), - INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx), - INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx), - INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), - INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), - INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx), - INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), - INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx), - INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), - INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), - INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), - INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), - INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), - INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw), - INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx), - INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw), - INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw), - INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn), - INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx), - INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx), - INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl), - INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl), - INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl), - INTEL_CPU_FAM6(KABYLAKE, idle_cpu_skl), - INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), - INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), - INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), - INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), - INTEL_CPU_FAM6(ATOM_GOLDMONT_D, idle_cpu_dnv), - INTEL_CPU_FAM6(ATOM_TREMONT_D, idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), {} }; -#define INTEL_CPU_FAM6_MWAIT \ - { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 } - static const struct x86_cpu_id intel_mwait_ids[] __initconst = { - INTEL_CPU_FAM6_MWAIT, + X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), {} }; -- cgit v1.2.3 From 1e41eb1524798d6863169ccbcce8bf8f0a63db18 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:01 +0100 Subject: mmc: sdhci-acpi: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.285691129@linutronix.de --- drivers/mmc/host/sdhci-acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 9651dca6863e..a439754aa841 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -234,7 +234,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = { static bool sdhci_acpi_byt(void) { static const struct x86_cpu_id byt[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL), {} }; @@ -244,7 +244,7 @@ static bool sdhci_acpi_byt(void) static bool sdhci_acpi_cht(void) { static const struct x86_cpu_id cht[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), {} }; -- cgit v1.2.3 From 91e503e6f8af97c7da71b3f1e7b2f8a07a36f2f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:02 +0100 Subject: PCI: intel-mid: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Link: https://lkml.kernel.org/r/20200320131510.393113444@linutronix.de --- drivers/pci/pci-mid.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c index 30fbe2ea6eab..aafd58da3a89 100644 --- a/drivers/pci/pci-mid.c +++ b/drivers/pci/pci-mid.c @@ -55,15 +55,13 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = { .need_resume = mid_pci_need_resume, }; -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } - /* * This table should be in sync with the one in * arch/x86/platform/intel-mid/pwr.c. */ static const struct x86_cpu_id lpss_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_SALTWELL_MID), - ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, NULL), {} }; -- cgit v1.2.3 From f07225128865b30093c9ccf946564673c77d0233 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:03 +0100 Subject: powercap/intel_rapl: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.501728797@linutronix.de --- drivers/powercap/intel_rapl_common.c | 87 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 73257cf107d9..eb328655bc01 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -951,52 +951,51 @@ static const struct rapl_defaults rapl_defaults_cht = { }; static const struct x86_cpu_id rapl_ids[] __initconst = { - INTEL_CPU_FAM6(SANDYBRIDGE, rapl_defaults_core), - INTEL_CPU_FAM6(SANDYBRIDGE_X, rapl_defaults_core), - - INTEL_CPU_FAM6(IVYBRIDGE, rapl_defaults_core), - INTEL_CPU_FAM6(IVYBRIDGE_X, rapl_defaults_core), - - INTEL_CPU_FAM6(HASWELL, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_L, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_G, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_X, rapl_defaults_hsw_server), - - INTEL_CPU_FAM6(BROADWELL, rapl_defaults_core), - INTEL_CPU_FAM6(BROADWELL_G, rapl_defaults_core), - INTEL_CPU_FAM6(BROADWELL_D, rapl_defaults_core), - INTEL_CPU_FAM6(BROADWELL_X, rapl_defaults_hsw_server), - - INTEL_CPU_FAM6(SKYLAKE, rapl_defaults_core), - INTEL_CPU_FAM6(SKYLAKE_L, rapl_defaults_core), - INTEL_CPU_FAM6(SKYLAKE_X, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(KABYLAKE_L, rapl_defaults_core), - INTEL_CPU_FAM6(KABYLAKE, rapl_defaults_core), - INTEL_CPU_FAM6(CANNONLAKE_L, rapl_defaults_core), - INTEL_CPU_FAM6(ICELAKE_L, rapl_defaults_core), - INTEL_CPU_FAM6(ICELAKE, rapl_defaults_core), - INTEL_CPU_FAM6(ICELAKE_NNPI, rapl_defaults_core), - INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(ICELAKE_D, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(COMETLAKE_L, rapl_defaults_core), - INTEL_CPU_FAM6(COMETLAKE, rapl_defaults_core), - INTEL_CPU_FAM6(TIGERLAKE_L, rapl_defaults_core), - - INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), - INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, rapl_defaults_tng), - INTEL_CPU_FAM6(ATOM_AIRMONT_MID, rapl_defaults_ann), - INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_GOLDMONT_D, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_TREMONT_D, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_TREMONT_L, rapl_defaults_core), - - INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core), + + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core), + + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server), + + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server), + + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), + + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core), + + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), {} }; - MODULE_DEVICE_TABLE(x86cpu, rapl_ids); /* Read once for all raw primitive data for domains */ -- cgit v1.2.3 From d51ba9c6663d7171681be357f672503f4e2ccdc1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:04 +0100 Subject: ASoC: Intel: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Get rid the of the local macro wrappers for consistency. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.594671507@linutronix.de --- sound/soc/intel/common/soc-intel-quirks.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/common/soc-intel-quirks.h b/sound/soc/intel/common/soc-intel-quirks.h index 863a477d3405..a9176150c6ed 100644 --- a/sound/soc/intel/common/soc-intel-quirks.h +++ b/sound/soc/intel/common/soc-intel-quirks.h @@ -15,13 +15,11 @@ #include #include -#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } - #define SOC_INTEL_IS_CPU(soc, type) \ static inline bool soc_intel_is_##soc(void) \ { \ static const struct x86_cpu_id soc##_cpu_ids[] = { \ - ICPU(type), \ + X86_MATCH_INTEL_FAM6_MODEL(type, NULL), \ {} \ }; \ const struct x86_cpu_id *id; \ @@ -32,11 +30,11 @@ static inline bool soc_intel_is_##soc(void) \ return false; \ } -SOC_INTEL_IS_CPU(byt, INTEL_FAM6_ATOM_SILVERMONT); -SOC_INTEL_IS_CPU(cht, INTEL_FAM6_ATOM_AIRMONT); -SOC_INTEL_IS_CPU(apl, INTEL_FAM6_ATOM_GOLDMONT); -SOC_INTEL_IS_CPU(glk, INTEL_FAM6_ATOM_GOLDMONT_PLUS); -SOC_INTEL_IS_CPU(cml, INTEL_FAM6_KABYLAKE_L); +SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT); +SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT); +SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); +SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); +SOC_INTEL_IS_CPU(cml, KABYLAKE_L); static inline bool soc_intel_is_byt_cr(struct platform_device *pdev) { -- cgit v1.2.3 From f30cfacad1ee948c821a82e63b28958b92bf8c6c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:05 +0100 Subject: crypto: Convert to new CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.700250889@linutronix.de --- arch/x86/crypto/aesni-intel_glue.c | 2 +- arch/x86/crypto/crc32-pclmul_glue.c | 2 +- arch/x86/crypto/crc32c-intel_glue.c | 2 +- arch/x86/crypto/crct10dif-pclmul_glue.c | 2 +- arch/x86/crypto/ghash-clmulni-intel_glue.c | 2 +- drivers/crypto/padlock-aes.c | 2 +- drivers/crypto/padlock-sha.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index bbbebbd35b5d..75b6ea20491e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1064,7 +1064,7 @@ static struct aead_alg aesni_aeads[0]; static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; static const struct x86_cpu_id aesni_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_AES), + X86_MATCH_FEATURE(X86_FEATURE_AES, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 418bd88acac8..7c4c7b2fbf05 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -170,7 +170,7 @@ static struct shash_alg alg = { }; static const struct x86_cpu_id crc32pclmul_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index c20d1b8a82c3..d2d069bd459b 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -221,7 +221,7 @@ static struct shash_alg alg = { }; static const struct x86_cpu_id crc32c_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), + X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 3c81e15b0873..71291d5af9f4 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -114,7 +114,7 @@ static struct shash_alg alg = { }; static const struct x86_cpu_id crct10dif_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index a4b728518e28..1f1a95f3dd0c 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -313,7 +313,7 @@ static struct ahash_alg ghash_async_alg = { }; static const struct x86_cpu_id pcmul_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */ {} }; MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 594d6b1695d5..62c6fe88b212 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -474,7 +474,7 @@ static struct skcipher_alg cbc_aes_alg = { }; static const struct x86_cpu_id padlock_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_XCRYPT), + X86_MATCH_FEATURE(X86_FEATURE_XCRYPT, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_cpu_id); diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index c826abe79e79..a697a4a3f2d0 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -490,7 +490,7 @@ static struct shash_alg sha256_alg_nano = { }; static const struct x86_cpu_id padlock_sha_ids[] = { - X86_FEATURE_MATCH(X86_FEATURE_PHE), + X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); -- cgit v1.2.3 From 315d01d1ad39f940c5156d6b2653bf89182422a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:06 +0100 Subject: hwrng: via_rng: Convert to new X86 CPU match macros The new macro set has a consistent namespace and uses C99 initializers instead of the grufty C89 ones. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.793641638@linutronix.de --- drivers/char/hw_random/via-rng.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index ffe9b0c6c647..39943bc3651a 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -209,20 +209,19 @@ static int __init mod_init(void) out: return err; } +module_init(mod_init); static void __exit mod_exit(void) { hwrng_unregister(&via_rng); } - -module_init(mod_init); module_exit(mod_exit); static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_XSTORE), + X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL), {} }; +MODULE_DEVICE_TABLE(x86cpu, via_rng_cpu_id); MODULE_DESCRIPTION("H/W RNG driver for VIA CPU with PadLock"); MODULE_LICENSE("GPL"); -MODULE_DEVICE_TABLE(x86cpu, via_rng_cpu_id); -- cgit v1.2.3 From 1826d56bcef9c38287f7c1a8e3b7778863e0b9d7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:14:07 +0100 Subject: x86/cpu: Cleanup the now unused CPU match macros No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131510.900226233@linutronix.de --- arch/x86/include/asm/cpu_device_id.h | 3 --- arch/x86/include/asm/intel-family.h | 13 ------------- 2 files changed, 16 deletions(-) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index f11770fac73a..cf3d621c6892 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -91,9 +91,6 @@ #define X86_MATCH_FEATURE(feature, data) \ X86_MATCH_VENDOR_FEATURE(ANY, feature, data) -/* Transitional to keep the existing code working */ -#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL) - /** * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 236a87aa84d6..8f1e94f29a16 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -124,17 +124,4 @@ /* Family 5 */ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ -/* Useful macros */ -#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ -{ \ - .vendor = X86_VENDOR_INTEL, \ - .family = _family, \ - .model = _model, \ - .feature = X86_FEATURE_ANY, \ - .driver_data = (kernel_ulong_t)&_driver_data \ -} - -#define INTEL_CPU_FAM6(_model, _driver_data) \ - INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data) - #endif /* _ASM_X86_INTEL_FAMILY_H */ -- cgit v1.2.3 From d97828072d0bcecb6655f0966efc38a2647d3dfb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2020 13:21:55 +0100 Subject: cpufreq/intel_pstate: Fix wrong macro conversion The feature flag hwp_support_ids are supposed to match on is X86_FEATURE_HWP, not X86_FEATURE_APERFMPERF. Fix it. [ bp: Write commit message. ] Fixes: b11d77fa300d ("cpufreq: Convert to new X86 CPU match macros") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200324060124.GC11705@shao2-debian --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 780c387f05c0..46bce09e5aff 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2727,7 +2727,7 @@ static inline void intel_pstate_request_control_from_smm(void) {} #define X86_MATCH_HWP(model, hwp_mode) \ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ - X86_FEATURE_APERFMPERF, hwp_mode) + X86_FEATURE_HWP, hwp_mode) static const struct x86_cpu_id hwp_support_ids[] __initconst = { X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), -- cgit v1.2.3