diff options
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r-- | arch/powerpc/perf/core-book3s.c | 39 | ||||
-rw-r--r-- | arch/powerpc/perf/imc-pmu.c | 6 | ||||
-rw-r--r-- | arch/powerpc/perf/isa207-common.c | 58 | ||||
-rw-r--r-- | arch/powerpc/perf/isa207-common.h | 9 | ||||
-rw-r--r-- | arch/powerpc/perf/perf_regs.c | 7 | ||||
-rw-r--r-- | arch/powerpc/perf/power9-pmu.c | 22 |
6 files changed, 97 insertions, 44 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 81f8a0c838ae..b0723002a396 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/perf_event.h> #include <linux/percpu.h> #include <linux/hardirq.h> @@ -130,6 +131,14 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ +bool is_sier_available(void) +{ + if (ppmu->flags & PPMU_HAS_SIER) + return true; + + return false; +} + static bool regs_use_siar(struct pt_regs *regs) { /* @@ -864,6 +873,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, int i, j; unsigned long addf = ppmu->add_fields; unsigned long tadd = ppmu->test_adder; + unsigned long grp_mask = ppmu->group_constraint_mask; + unsigned long grp_val = ppmu->group_constraint_val; if (n_ev > ppmu->n_counter) return -1; @@ -884,15 +895,23 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, for (i = 0; i < n_ev; ++i) { nv = (value | cpuhw->avalues[i][0]) + (value & cpuhw->avalues[i][0] & addf); - if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ cpuhw->avalues[i][0]) & - cpuhw->amasks[i][0]) != 0) + + if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0) + break; + + if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0]) + & (~grp_mask)) != 0) break; + value = nv; mask |= cpuhw->amasks[i][0]; } - if (i == n_ev) - return 0; /* all OK */ + if (i == n_ev) { + if ((value & mask & grp_mask) != (mask & grp_val)) + return -1; + else + return 0; /* all OK */ + } /* doesn't work, gather alternatives... */ if (!ppmu->get_alternatives) @@ -2148,7 +2167,7 @@ static bool pmc_overflow(unsigned long val) /* * Performance monitor interrupt stuff */ -static void perf_event_interrupt(struct pt_regs *regs) +static void __perf_event_interrupt(struct pt_regs *regs) { int i, j; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); @@ -2232,6 +2251,14 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } +static void perf_event_interrupt(struct pt_regs *regs) +{ + u64 start_clock = sched_clock(); + + __perf_event_interrupt(regs); + perf_sample_event_took(sched_clock() - start_clock); +} + static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 6954636b16d1..f292a3f284f1 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -28,13 +28,13 @@ static DEFINE_MUTEX(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; -struct imc_pmu_ref *nest_imc_refc; +static struct imc_pmu_ref *nest_imc_refc; static int nest_pmus; /* Core IMC data structures and variables */ static cpumask_t core_imc_cpumask; -struct imc_pmu_ref *core_imc_refc; +static struct imc_pmu_ref *core_imc_refc; static struct imc_pmu *core_imc_pmu; /* Thread IMC data structures and variables */ @@ -43,7 +43,7 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem); static struct imc_pmu *thread_imc_pmu; static int thread_imc_mem_size; -struct imc_pmu *imc_event_to_pmu(struct perf_event *event) +static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 177de814286f..a6c24d866b2f 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -148,6 +148,14 @@ static bool is_thresh_cmp_valid(u64 event) return true; } +static unsigned int dc_ic_rld_quad_l1_sel(u64 event) +{ + unsigned int cache; + + cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK; + return cache; +} + static inline u64 isa207_find_source(u64 idx, u32 sub_idx) { u64 ret = PERF_MEM_NA; @@ -226,8 +234,13 @@ void isa207_get_mem_weight(u64 *weight) u64 mmcra = mfspr(SPRN_MMCRA); u64 exp = MMCRA_THR_CTR_EXP(mmcra); u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); + u64 sier = mfspr(SPRN_SIER); + u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; - *weight = mantissa << (2 * exp); + if (val == 0 || val == 7) + *weight = 0; + else + *weight = mantissa << (2 * exp); } int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) @@ -274,19 +287,27 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - /* - * L2/L3 events contain a cache selector field, which is - * supposed to be programmed into MMCRC. However MMCRC is only - * HV writable, and there is no API for guest kernels to modify - * it. The solution is for the hypervisor to initialise the - * field to zeroes, and for us to only ever allow events that - * have a cache selector of zero. The bank selector (bit 3) is - * irrelevant, as long as the rest of the value is 0. - */ - if (cache & 0x7) + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + mask |= CNST_CACHE_GROUP_MASK; + value |= CNST_CACHE_GROUP_VAL(event & 0xff); + + mask |= CNST_CACHE_PMC4_MASK; + if (pmc == 4) + value |= CNST_CACHE_PMC4_VAL; + } else if (cache & 0x7) { + /* + * L2/L3 events contain a cache selector field, which is + * supposed to be programmed into MMCRC. However MMCRC is only + * HV writable, and there is no API for guest kernels to modify + * it. The solution is for the hypervisor to initialise the + * field to zeroes, and for us to only ever allow events that + * have a cache selector of zero. The bank selector (bit 3) is + * irrelevant, as long as the rest of the value is 0. + */ return -1; + } - } else if (event & EVENT_IS_L1) { + } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) { mask |= CNST_L1_QUAL_MASK; value |= CNST_L1_QUAL_VAL(cache); } @@ -389,11 +410,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, /* In continuous sampling mode, update SDAR on TLB miss */ mmcra_sdar_mode(event[i], &mmcra); - if (event[i] & EVENT_IS_L1) { - cache = event[i] >> EVENT_CACHE_SEL_SHIFT; - mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; - cache >>= 1; - mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + cache = dc_ic_rld_quad_l1_sel(event[i]); + mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; + } else { + if (event[i] & EVENT_IS_L1) { + cache = dc_ic_rld_quad_l1_sel(event[i]); + mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; + } } if (is_event_marked(event[i])) { diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 0028f4b9490d..91350f42a662 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -134,6 +134,11 @@ #define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16) #define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK) +#define CNST_CACHE_GROUP_VAL(v) (((v) & 0xffull) << 55) +#define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff) +#define CNST_CACHE_PMC4_VAL (1ull << 54) +#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -163,8 +168,8 @@ #define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) #define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) #define MMCR1_FAB_SHIFT 36 -#define MMCR1_DC_QUAL_SHIFT 47 -#define MMCR1_IC_QUAL_SHIFT 46 +#define MMCR1_DC_IC_QUAL_MASK 0x3 +#define MMCR1_DC_IC_QUAL_SHIFT 46 /* MMCR1 Combine bits macro for power9 */ #define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2)) diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 09ceea6175ba..5c36b3a8d47a 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -69,6 +69,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap), PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar), PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr), + PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar), }; u64 perf_reg_value(struct pt_regs *regs, int idx) @@ -76,6 +77,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX)) return 0; + if (idx == PERF_REG_POWERPC_SIER && + (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || + IS_ENABLED(CONFIG_PPC32) || + !is_sier_available())) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index e012b1030a5b..0ff9c43733e9 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -63,16 +63,8 @@ * MMCRA[9:11] = thresh_cmp[0:2] * MMCRA[12:18] = thresh_cmp[3:9] * - * if unit == 6 or unit == 7 - * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL) - * else if unit == 8 or unit == 9: - * if cache_sel[0] == 0: # L3 bank - * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0) - * else if cache_sel[0] == 1: - * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1) - * else if cache_sel[1]: # L1 event - * MMCR1[16] = cache_sel[2] - * MMCR1[17] = cache_sel[3] + * MMCR1[16] = cache_sel[2] + * MMCR1[17] = cache_sel[3] * * if mark: * MMCRA[63] = 1 (SAMPLE_ENABLE) @@ -179,8 +171,6 @@ CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE); CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); -CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); -CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); @@ -205,8 +195,6 @@ static struct attribute *power9_events_attr[] = { CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), CACHE_EVENT_PTR(PM_DATA_FROM_L3), CACHE_EVENT_PTR(PM_L3_PREF_ALL), - CACHE_EVENT_PTR(PM_L2_ST_MISS), - CACHE_EVENT_PTR(PM_L2_ST), CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), CACHE_EVENT_PTR(PM_BR_CMPL), CACHE_EVENT_PTR(PM_DTLB_MISS), @@ -354,8 +342,8 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = PM_L2_ST, - [ C(RESULT_MISS) ] = PM_L2_ST_MISS, + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL, @@ -427,6 +415,8 @@ static struct power_pmu power9_pmu = { .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, .test_adder = ISA207_TEST_ADDER, + .group_constraint_mask = CNST_CACHE_PMC4_MASK, + .group_constraint_val = CNST_CACHE_PMC4_VAL, .compute_mmcr = isa207_compute_mmcr, .config_bhrb = power9_config_bhrb, .bhrb_filter_map = power9_bhrb_filter_map, |