summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 19:43:24 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 19:43:24 +0100
commit8d6973327ee84c2f40dd9efd8928d4a1186c96e2 (patch)
tree1c6accd71b6e9c4e05d5aaae766b958ad440d320 /arch/powerpc/perf
parentsched/fair: Fix warning on non-SMP build (diff)
parentMerge branch 'next' of https://git.kernel.org/pub/scm/linux/kernel/git/scottw... (diff)
downloadlinux-8d6973327ee84c2f40dd9efd8928d4a1186c96e2.tar.xz
linux-8d6973327ee84c2f40dd9efd8928d4a1186c96e2.zip
Merge tag 'powerpc-4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Notable changes: - Mitigations for Spectre v2 on some Freescale (NXP) CPUs. - A large series adding support for pass-through of Nvidia V100 GPUs to guests on Power9. - Another large series to enable hardware assistance for TLB table walk on MPC8xx CPUs. - Some preparatory changes to our DMA code, to make way for further cleanups from Christoph. - Several fixes for our Transactional Memory handling discovered by fuzzing the signal return path. - Support for generating our system call table(s) from a text file like other architectures. - A fix to our page fault handler so that instead of generating a WARN_ON_ONCE, user accesses of kernel addresses instead print a ratelimited and appropriately scary warning. - A cosmetic change to make our unhandled page fault messages more similar to other arches and also more compact and informative. - Freescale updates from Scott: "Highlights include elimination of legacy clock bindings use from dts files, an 83xx watchdog handler, fixes to old dts interrupt errors, and some minor cleanup." And many clean-ups, reworks and minor fixes etc. Thanks to: Alexandre Belloni, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Arnd Bergmann, Benjamin Herrenschmidt, Breno Leitao, Christian Lamparter, Christophe Leroy, Christoph Hellwig, Daniel Axtens, Darren Stevens, David Gibson, Diana Craciun, Dmitry V. Levin, Firoz Khan, Geert Uytterhoeven, Greg Kurz, Gustavo Romero, Hari Bathini, Joel Stanley, Kees Cook, Madhavan Srinivasan, Mahesh Salgaonkar, Markus Elfring, Mathieu Malaterre, Michal Suchánek, Naveen N. Rao, Nick Desaulniers, Oliver O'Halloran, Paul Mackerras, Ram Pai, Ravi Bangoria, Rob Herring, Russell Currey, Sabyasachi Gupta, Sam Bobroff, Satheesh Rajendran, Scott Wood, Segher Boessenkool, Stephen Rothwell, Tang Yuantian, Thiago Jung Bauermann, Yangtao Li, Yuantian Tang, Yue Haibing" * tag 'powerpc-4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (201 commits) Revert "powerpc/fsl_pci: simplify fsl_pci_dma_set_mask" powerpc/zImage: Also check for stdout-path powerpc: Fix HMIs on big-endian with CONFIG_RELOCATABLE=y macintosh: Use of_node_name_{eq, prefix} for node name comparisons ide: Use of_node_name_eq for node name comparisons powerpc: Use of_node_name_eq for node name comparisons powerpc/pseries/pmem: Convert to %pOFn instead of device_node.name powerpc/mm: Remove very old comment in hash-4k.h powerpc/pseries: Fix node leak in update_lmb_associativity_index() powerpc/configs/85xx: Enable CONFIG_DEBUG_KERNEL powerpc/dts/fsl: Fix dtc-flagged interrupt errors clk: qoriq: add more compatibles strings powerpc/fsl: Use new clockgen binding powerpc/83xx: handle machine check caused by watchdog timer powerpc/fsl-rio: fix spelling mistake "reserverd" -> "reserved" powerpc/fsl_pci: simplify fsl_pci_dma_set_mask arch/powerpc/fsl_rmu: Use dma_zalloc_coherent vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver vfio_pci: Allow regions to add own capabilities vfio_pci: Allow mapping extra regions ...
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/core-book3s.c39
-rw-r--r--arch/powerpc/perf/imc-pmu.c6
-rw-r--r--arch/powerpc/perf/isa207-common.c58
-rw-r--r--arch/powerpc/perf/isa207-common.h9
-rw-r--r--arch/powerpc/perf/perf_regs.c7
-rw-r--r--arch/powerpc/perf/power9-pmu.c22
6 files changed, 97 insertions, 44 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 81f8a0c838ae..b0723002a396 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
@@ -130,6 +131,14 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
+bool is_sier_available(void)
+{
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return true;
+
+ return false;
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
/*
@@ -864,6 +873,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
int i, j;
unsigned long addf = ppmu->add_fields;
unsigned long tadd = ppmu->test_adder;
+ unsigned long grp_mask = ppmu->group_constraint_mask;
+ unsigned long grp_val = ppmu->group_constraint_val;
if (n_ev > ppmu->n_counter)
return -1;
@@ -884,15 +895,23 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (i = 0; i < n_ev; ++i) {
nv = (value | cpuhw->avalues[i][0]) +
(value & cpuhw->avalues[i][0] & addf);
- if ((((nv + tadd) ^ value) & mask) != 0 ||
- (((nv + tadd) ^ cpuhw->avalues[i][0]) &
- cpuhw->amasks[i][0]) != 0)
+
+ if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0)
+ break;
+
+ if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0])
+ & (~grp_mask)) != 0)
break;
+
value = nv;
mask |= cpuhw->amasks[i][0];
}
- if (i == n_ev)
- return 0; /* all OK */
+ if (i == n_ev) {
+ if ((value & mask & grp_mask) != (mask & grp_val))
+ return -1;
+ else
+ return 0; /* all OK */
+ }
/* doesn't work, gather alternatives... */
if (!ppmu->get_alternatives)
@@ -2148,7 +2167,7 @@ static bool pmc_overflow(unsigned long val)
/*
* Performance monitor interrupt stuff
*/
-static void perf_event_interrupt(struct pt_regs *regs)
+static void __perf_event_interrupt(struct pt_regs *regs)
{
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
@@ -2232,6 +2251,14 @@ static void perf_event_interrupt(struct pt_regs *regs)
irq_exit();
}
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+ u64 start_clock = sched_clock();
+
+ __perf_event_interrupt(regs);
+ perf_sample_event_took(sched_clock() - start_clock);
+}
+
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 6954636b16d1..f292a3f284f1 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -28,13 +28,13 @@ static DEFINE_MUTEX(nest_init_lock);
static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
static struct imc_pmu **per_nest_pmu_arr;
static cpumask_t nest_imc_cpumask;
-struct imc_pmu_ref *nest_imc_refc;
+static struct imc_pmu_ref *nest_imc_refc;
static int nest_pmus;
/* Core IMC data structures and variables */
static cpumask_t core_imc_cpumask;
-struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu_ref *core_imc_refc;
static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
-struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 177de814286f..a6c24d866b2f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -148,6 +148,14 @@ static bool is_thresh_cmp_valid(u64 event)
return true;
}
+static unsigned int dc_ic_rld_quad_l1_sel(u64 event)
+{
+ unsigned int cache;
+
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK;
+ return cache;
+}
+
static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
{
u64 ret = PERF_MEM_NA;
@@ -226,8 +234,13 @@ void isa207_get_mem_weight(u64 *weight)
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+ u64 sier = mfspr(SPRN_SIER);
+ u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- *weight = mantissa << (2 * exp);
+ if (val == 0 || val == 7)
+ *weight = 0;
+ else
+ *weight = mantissa << (2 * exp);
}
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
@@ -274,19 +287,27 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (unit >= 6 && unit <= 9) {
- /*
- * L2/L3 events contain a cache selector field, which is
- * supposed to be programmed into MMCRC. However MMCRC is only
- * HV writable, and there is no API for guest kernels to modify
- * it. The solution is for the hypervisor to initialise the
- * field to zeroes, and for us to only ever allow events that
- * have a cache selector of zero. The bank selector (bit 3) is
- * irrelevant, as long as the rest of the value is 0.
- */
- if (cache & 0x7)
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ mask |= CNST_CACHE_GROUP_MASK;
+ value |= CNST_CACHE_GROUP_VAL(event & 0xff);
+
+ mask |= CNST_CACHE_PMC4_MASK;
+ if (pmc == 4)
+ value |= CNST_CACHE_PMC4_VAL;
+ } else if (cache & 0x7) {
+ /*
+ * L2/L3 events contain a cache selector field, which is
+ * supposed to be programmed into MMCRC. However MMCRC is only
+ * HV writable, and there is no API for guest kernels to modify
+ * it. The solution is for the hypervisor to initialise the
+ * field to zeroes, and for us to only ever allow events that
+ * have a cache selector of zero. The bank selector (bit 3) is
+ * irrelevant, as long as the rest of the value is 0.
+ */
return -1;
+ }
- } else if (event & EVENT_IS_L1) {
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) {
mask |= CNST_L1_QUAL_MASK;
value |= CNST_L1_QUAL_VAL(cache);
}
@@ -389,11 +410,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
/* In continuous sampling mode, update SDAR on TLB miss */
mmcra_sdar_mode(event[i], &mmcra);
- if (event[i] & EVENT_IS_L1) {
- cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
- mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
- cache >>= 1;
- mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ } else {
+ if (event[i] & EVENT_IS_L1) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ }
}
if (is_event_marked(event[i])) {
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 0028f4b9490d..91350f42a662 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -134,6 +134,11 @@
#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
+#define CNST_CACHE_GROUP_VAL(v) (((v) & 0xffull) << 55)
+#define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff)
+#define CNST_CACHE_PMC4_VAL (1ull << 54)
+#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
+
/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -163,8 +168,8 @@
#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
#define MMCR1_FAB_SHIFT 36
-#define MMCR1_DC_QUAL_SHIFT 47
-#define MMCR1_IC_QUAL_SHIFT 46
+#define MMCR1_DC_IC_QUAL_MASK 0x3
+#define MMCR1_DC_IC_QUAL_SHIFT 46
/* MMCR1 Combine bits macro for power9 */
#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2))
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 09ceea6175ba..5c36b3a8d47a 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -69,6 +69,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
};
u64 perf_reg_value(struct pt_regs *regs, int idx)
@@ -76,6 +77,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
+ if (idx == PERF_REG_POWERPC_SIER &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32) ||
+ !is_sier_available()))
+ return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
}
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index e012b1030a5b..0ff9c43733e9 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -63,16 +63,8 @@
* MMCRA[9:11] = thresh_cmp[0:2]
* MMCRA[12:18] = thresh_cmp[3:9]
*
- * if unit == 6 or unit == 7
- * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
- * else if unit == 8 or unit == 9:
- * if cache_sel[0] == 0: # L3 bank
- * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
- * else if cache_sel[0] == 1:
- * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
- * else if cache_sel[1]: # L1 event
- * MMCR1[16] = cache_sel[2]
- * MMCR1[17] = cache_sel[3]
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
*
* if mark:
* MMCRA[63] = 1 (SAMPLE_ENABLE)
@@ -179,8 +171,6 @@ CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
-CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
-CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
@@ -205,8 +195,6 @@ static struct attribute *power9_events_attr[] = {
CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
CACHE_EVENT_PTR(PM_DATA_FROM_L3),
CACHE_EVENT_PTR(PM_L3_PREF_ALL),
- CACHE_EVENT_PTR(PM_L2_ST_MISS),
- CACHE_EVENT_PTR(PM_L2_ST),
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
CACHE_EVENT_PTR(PM_BR_CMPL),
CACHE_EVENT_PTR(PM_DTLB_MISS),
@@ -354,8 +342,8 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = PM_L2_ST,
- [ C(RESULT_MISS) ] = PM_L2_ST_MISS,
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
@@ -427,6 +415,8 @@ static struct power_pmu power9_pmu = {
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,