From 7755cec63adeecea3cbbf4032047812c37cf7cc3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:20 -0400 Subject: arm64: perf: Move PMUv3 driver to drivers/perf Having the ARM PMUv3 driver sitting in arch/arm64/kernel is getting in the way of being able to use perf on ARMv8 cores running a 32bit kernel, such as 32bit KVM guests. This patch moves it into drivers/perf/arm_pmuv3.c, with an include file in include/linux/perf/arm_pmuv3.h. The only thing left in arch/arm64 is some mundane perf stuff. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-2-zalbassam@google.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 11 + drivers/perf/Makefile | 1 + drivers/perf/arm_pmuv3.c | 1468 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1480 insertions(+) create mode 100644 drivers/perf/arm_pmuv3.c (limited to 'drivers/perf') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 66c259000a44..defe6b47854b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -100,6 +100,17 @@ config ARM_SMMU_V3_PMU through the SMMU and allow the resulting information to be filtered based on the Stream ID of the corresponding master. +config ARM_PMUV3 + depends on HW_PERF_EVENTS && ARM64 + bool "ARM PMUv3 support" if !ARM64 + default y + help + Say y if you want to use the ARM performance monitor unit (PMU) + version 3. The PMUv3 is the CPU performance monitors on ARMv8 + (aarch32 and aarch64) systems that implement the PMUv3 + architecture. + Currently, PMUv3 is only supported on aarch64 (arm64) + config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" depends on ARM64 diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 13e45da61100..dabc859540ce 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_CMN) += arm-cmn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o +obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c new file mode 100644 index 000000000000..c25203f8b940 --- /dev/null +++ b/drivers/perf/arm_pmuv3.c @@ -0,0 +1,1468 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ARMv8 PMUv3 Performance Events handling code. + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon + * + * This code is based heavily on the ARMv7 perf event code. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ARMv8 Cortex-A53 specific event types. */ +#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 + +/* ARMv8 Cavium ThunderX specific event types. */ +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9 +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB +#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC +#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED + +/* + * ARMv8 Architectural defined events, not all of these may + * be supported on any given implementation. Unsupported events will + * be disabled at run-time based on the PMCEID registers. + */ +static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, +}; + +static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, +}; + +static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + +static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + +static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, +}; + +static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, + + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, +}; + +static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + +static ssize_t +armv8pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); +} + +#define ARMV8_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) + +static struct attribute *armv8_pmuv3_event_attrs[] = { + ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), + ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), + ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), + ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), + ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), + ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), + ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), + ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), + ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), + ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), + ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), + ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), + ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), + ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), + ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), + ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), + ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), + ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), + ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), + ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), + ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), + ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), + ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), + ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), + /* Don't expose the chain event in /sys, since it's useless in isolation */ + ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), + ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), + ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), + ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), + ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), + ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), + ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), + ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), + ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), + ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), + ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), + ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), + ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), + ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), + ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), + ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), + ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), + ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), + ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED), + ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC), + ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL), + ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND), + ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND), + ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT), + ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), + ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), + ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), + ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), + ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES), + ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM), + ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), + ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), + ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), + ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), + ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), + ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), + ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), + ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), + ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), + ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), + ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), + ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), + ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), + ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED), + ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD), + ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR), + NULL, +}; + +static umode_t +armv8pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) + return attr->mode; + + if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { + u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + + if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(id, cpu_pmu->pmceid_ext_bitmap)) + return attr->mode; + } + + return 0; +} + +static const struct attribute_group armv8_pmuv3_events_attr_group = { + .name = "events", + .attrs = armv8_pmuv3_event_attrs, + .is_visible = armv8pmu_event_attr_is_visible, +}; + +PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); +PMU_FORMAT_ATTR(rdpmc, "config1:1"); + +static int sysctl_perf_user_access __read_mostly; + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} + +static inline bool armv8pmu_event_want_user_access(struct perf_event *event) +{ + return event->attr.config1 & 0x2; +} + +static struct attribute *armv8_pmuv3_format_attrs[] = { + &format_attr_event.attr, + &format_attr_long.attr, + &format_attr_rdpmc.attr, + NULL, +}; + +static const struct attribute_group armv8_pmuv3_format_attr_group = { + .name = "format", + .attrs = armv8_pmuv3_format_attrs, +}; + +static ssize_t slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + + return sysfs_emit(page, "0x%08x\n", slots); +} + +static DEVICE_ATTR_RO(slots); + +static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT) + & ARMV8_PMU_BUS_SLOTS_MASK; + + return sysfs_emit(page, "0x%08x\n", bus_slots); +} + +static DEVICE_ATTR_RO(bus_slots); + +static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT) + & ARMV8_PMU_BUS_WIDTH_MASK; + u32 val = 0; + + /* Encoded as Log2(number of bytes), plus one */ + if (bus_width > 2 && bus_width < 13) + val = 1 << (bus_width - 1); + + return sysfs_emit(page, "0x%08x\n", val); +} + +static DEVICE_ATTR_RO(bus_width); + +static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_slots.attr, + &dev_attr_bus_slots.attr, + &dev_attr_bus_width.attr, + NULL, +}; + +static const struct attribute_group armv8_pmuv3_caps_attr_group = { + .name = "caps", + .attrs = armv8_pmuv3_caps_attrs, +}; + +/* + * Perf Events' indices + */ +#define ARMV8_IDX_CYCLE_COUNTER 0 +#define ARMV8_IDX_COUNTER0 1 +#define ARMV8_IDX_CYCLE_COUNTER_USER 32 + +/* + * We unconditionally enable ARMv8.5-PMU long event counter support + * (64-bit events) where supported. Indicate if this arm_pmu has long + * event counter support. + */ +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) +{ + return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); +} + +static inline bool armv8pmu_event_has_user_read(struct perf_event *event) +{ + return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT; +} + +/* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event) or when user space counter access is enabled. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + return !armv8pmu_event_has_user_read(event) && + armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + +/* + * ARMv8 low level PMU access + */ + +/* + * Perf Event to low level counters mapping + */ +#define ARMV8_IDX_TO_COUNTER(x) \ + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) + +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline u32 armv8pmu_pmcr_read(void) +{ + return read_sysreg(pmcr_el0); +} + +static inline void armv8pmu_pmcr_write(u32 val) +{ + val &= ARMV8_PMU_PMCR_MASK; + isb(); + write_sysreg(val, pmcr_el0); +} + +static inline int armv8pmu_has_overflowed(u32 pmovsr) +{ + return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; +} + +static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +{ + return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); +} + +static inline u64 armv8pmu_read_evcntr(int idx) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + return read_pmevcntrn(counter); +} + +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = armv8pmu_read_evcntr(idx); + + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; +} + +/* + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP + * is set the event counters also become 64-bit counters. Unless the + * user has requested a long counter (attr.config1) then we want to + * interrupt upon 32-bit overflow - we achieve this by applying a bias. + */ +static bool armv8pmu_event_needs_bias(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_64bit(event)) + return false; + + if (armv8pmu_has_long_event(cpu_pmu) || + idx == ARMV8_IDX_CYCLE_COUNTER) + return true; + + return false; +} + +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value |= GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value &= ~GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 value; + + if (idx == ARMV8_IDX_CYCLE_COUNTER) + value = read_sysreg(pmccntr_el0); + else + value = armv8pmu_read_hw_counter(event); + + return armv8pmu_unbias_long_counter(event, value); +} + +static inline void armv8pmu_write_evcntr(int idx, u64 value) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); +} + +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + +static void armv8pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + value = armv8pmu_bias_long_counter(event, value); + + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(value, pmccntr_el0); + else + armv8pmu_write_hw_counter(event, value); +} + +static inline void armv8pmu_write_evtype(int idx, u32 val) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + val &= ARMV8_PMU_EVTYPE_MASK; + write_pmevtypern(counter, val); +} + +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); + } +} + +static u32 armv8pmu_event_cnten_mask(struct perf_event *event) +{ + int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + u32 mask = BIT(counter); + + if (armv8pmu_event_is_chained(event)) + mask |= BIT(counter - 1); + return mask; +} + +static inline void armv8pmu_enable_counter(u32 mask) +{ + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); + write_sysreg(mask, pmcntenset_el0); +} + +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u32 mask = armv8pmu_event_cnten_mask(event); + + kvm_set_pmu_events(mask, attr); + + /* We rely on the hypervisor switch code to enable guest counters */ + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_enable_counter(mask); +} + +static inline void armv8pmu_disable_counter(u32 mask) +{ + write_sysreg(mask, pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); +} + +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u32 mask = armv8pmu_event_cnten_mask(event); + + kvm_clr_pmu_events(mask); + + /* We rely on the hypervisor switch code to disable guest counters */ + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_disable_counter(mask); +} + +static inline void armv8pmu_enable_intens(u32 mask) +{ + write_sysreg(mask, pmintenset_el1); +} + +static inline void armv8pmu_enable_event_irq(struct perf_event *event) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_enable_intens(BIT(counter)); +} + +static inline void armv8pmu_disable_intens(u32 mask) +{ + write_sysreg(mask, pmintenclr_el1); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + write_sysreg(mask, pmovsclr_el0); + isb(); +} + +static inline void armv8pmu_disable_event_irq(struct perf_event *event) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_disable_intens(BIT(counter)); +} + +static inline u32 armv8pmu_getreset_flags(void) +{ + u32 value; + + /* Read */ + value = read_sysreg(pmovsclr_el0); + + /* Write to clear flags */ + value &= ARMV8_PMU_OVSR_MASK; + write_sysreg(value, pmovsclr_el0); + + return value; +} + +static void armv8pmu_disable_user_access(void) +{ + write_sysreg(0, pmuserenr_el0); +} + +static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) +{ + int i; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + + /* Clear any unused counters to avoid leaking their contents */ + for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { + if (i == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(0, pmccntr_el0); + else + armv8pmu_write_evcntr(i, 0); + } + + write_sysreg(0, pmuserenr_el0); + write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); +} + +static void armv8pmu_enable_event(struct perf_event *event) +{ + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* + * Disable counter + */ + armv8pmu_disable_event_counter(event); + + /* + * Set event. + */ + armv8pmu_write_event_type(event); + + /* + * Enable interrupt for this counter + */ + armv8pmu_enable_event_irq(event); + + /* + * Enable counter + */ + armv8pmu_enable_event_counter(event); +} + +static void armv8pmu_disable_event(struct perf_event *event) +{ + /* + * Disable counter + */ + armv8pmu_disable_event_counter(event); + + /* + * Disable interrupt for this counter + */ + armv8pmu_disable_event_irq(event); +} + +static void armv8pmu_start(struct arm_pmu *cpu_pmu) +{ + struct perf_event_context *ctx; + int nr_user = 0; + + ctx = perf_cpu_task_ctx(); + if (ctx) + nr_user = ctx->nr_user; + + if (sysctl_perf_user_access && nr_user) + armv8pmu_enable_user_access(cpu_pmu); + else + armv8pmu_disable_user_access(); + + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); +} + +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +{ + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); +} + +static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + u32 pmovsr; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmovsr = armv8pmu_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv8pmu_has_overflowed(pmovsr)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + /* + * Stop the PMU while processing the counter overflows + * to prevent skews in group events. + */ + armv8pmu_stop(cpu_pmu); + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + armv8pmu_start(cpu_pmu); + + return IRQ_HANDLED; +} + +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; +} + +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; +} + +static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + + /* Always prefer to place a cycle counter into the cycle counter. */ + if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { + if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return ARMV8_IDX_CYCLE_COUNTER; + else if (armv8pmu_event_is_64bit(event) && + armv8pmu_event_want_user_access(event) && + !armv8pmu_has_long_event(cpu_pmu)) + return -EAGAIN; + } + + /* + * Otherwise use events counters + */ + if (armv8pmu_event_is_chained(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); +} + +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); +} + +static int armv8pmu_user_event_idx(struct perf_event *event) +{ + if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) + return 0; + + /* + * We remap the cycle counter index to 32 to + * match the offset applied to the rest of + * the counter indices. + */ + if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) + return ARMV8_IDX_CYCLE_COUNTER_USER; + + return event->hw.idx; +} + +/* + * Add an event filter to a given event. + */ +static int armv8pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) + return -EPERM; + + /* + * If we're running in hyp mode, then we *are* the hypervisor. + * Therefore we ignore exclude_hv in this configuration, since + * there's no hypervisor to sample anyway. This is consistent + * with other architectures (x86 and Power). + */ + if (is_kernel_in_hyp_mode()) { + if (!attr->exclude_kernel && !attr->exclude_host) + config_base |= ARMV8_PMU_INCLUDE_EL2; + if (attr->exclude_guest) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + if (attr->exclude_host) + config_base |= ARMV8_PMU_EXCLUDE_EL0; + } else { + if (!attr->exclude_hv && !attr->exclude_host) + config_base |= ARMV8_PMU_INCLUDE_EL2; + } + + /* + * Filter out !VHE kernels and guest kernels + */ + if (attr->exclude_kernel) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + + if (attr->exclude_user) + config_base |= ARMV8_PMU_EXCLUDE_EL0; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv8pmu_reset(void *info) +{ + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; + u32 pmcr; + + /* The counter and interrupt enable registers are unknown at reset. */ + armv8pmu_disable_counter(U32_MAX); + armv8pmu_disable_intens(U32_MAX); + + /* Clear the counters we flip at guest entry/exit */ + kvm_clr_pmu_events(U32_MAX); + + /* + * Initialize & Reset PMNC. Request overflow interrupt for + * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + */ + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + + /* Enable long event counter support where available */ + if (armv8pmu_has_long_event(cpu_pmu)) + pmcr |= ARMV8_PMU_PMCR_LP; + + armv8pmu_pmcr_write(pmcr); +} + +static int __armv8_pmuv3_map_event(struct perf_event *event, + const unsigned (*extra_event_map) + [PERF_COUNT_HW_MAX], + const unsigned (*extra_cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]) +{ + int hw_event_id; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + + hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); + + /* + * CHAIN events only work when paired with an adjacent counter, and it + * never makes sense for a user to open one in isolation, as they'll be + * rotated arbitrarily. + */ + if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN) + return -EINVAL; + + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + + /* + * User events must be allocated into a single counter, and so + * must not be chained. + * + * Most 64-bit events require long counter support, but 64-bit + * CPU_CYCLES events can be placed into the dedicated cycle + * counter when this is free. + */ + if (armv8pmu_event_want_user_access(event)) { + if (!(event->attach_state & PERF_ATTACH_TASK)) + return -EINVAL; + if (armv8pmu_event_is_64bit(event) && + (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !armv8pmu_has_long_event(armpmu)) + return -EOPNOTSUPP; + + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; + } + + /* Only expose micro/arch events supported by this PMU */ + if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) + && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return hw_event_id; + } + + return armpmu_map_event(event, extra_event_map, extra_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, NULL); +} + +static int armv8_a53_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map); +} + +static int armv8_a57_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); +} + +static int armv8_a73_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map); +} + +static int armv8_thunder_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, + &armv8_thunder_perf_cache_map); +} + +static int armv8_vulcan_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, + &armv8_vulcan_perf_cache_map); +} + +struct armv8pmu_probe_info { + struct arm_pmu *pmu; + bool present; +}; + +static void __armv8pmu_probe_pmu(void *info) +{ + struct armv8pmu_probe_info *probe = info; + struct arm_pmu *cpu_pmu = probe->pmu; + u64 dfr0; + u64 pmceid_raw[2]; + u32 pmceid[2]; + int pmuver; + + dfr0 = read_sysreg(id_aa64dfr0_el1); + pmuver = cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_EL1_PMUVer_SHIFT); + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || + pmuver == ID_AA64DFR0_EL1_PMUVer_NI) + return; + + cpu_pmu->pmuver = pmuver; + probe->present = true; + + /* Read the nb of CNTx counters supported from PMNC */ + cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) + & ARMV8_PMU_PMCR_N_MASK; + + /* Add the CPU cycles counter */ + cpu_pmu->num_events += 1; + + pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); + pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); + + bitmap_from_arr32(cpu_pmu->pmceid_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + pmceid[0] = pmceid_raw[0] >> 32; + pmceid[1] = pmceid_raw[1] >> 32; + + bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + /* store PMMIR_EL1 register for sysfs */ + if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) + cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + else + cpu_pmu->reg_pmmir = 0; +} + +static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) +{ + struct armv8pmu_probe_info probe = { + .pmu = cpu_pmu, + .present = false, + }; + int ret; + + ret = smp_call_function_any(&cpu_pmu->supported_cpus, + __armv8pmu_probe_pmu, + &probe, 1); + if (ret) + return ret; + + return probe.present ? 0 : -ENODEV; +} + +static void armv8pmu_disable_user_access_ipi(void *unused) +{ + armv8pmu_disable_user_access(); +} + +static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write || sysctl_perf_user_access) + return ret; + + on_each_cpu(armv8pmu_disable_user_access_ipi, NULL, 1); + return 0; +} + +static struct ctl_table armv8_pmu_sysctl_table[] = { + { + .procname = "perf_user_access", + .data = &sysctl_perf_user_access, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = armv8pmu_proc_user_access_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static void armv8_pmu_register_sysctl_table(void) +{ + static u32 tbl_registered = 0; + + if (!cmpxchg_relaxed(&tbl_registered, 0, 1)) + register_sysctl("kernel", armv8_pmu_sysctl_table); +} + +static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event), + const struct attribute_group *events, + const struct attribute_group *format, + const struct attribute_group *caps) +{ + int ret = armv8pmu_probe_pmu(cpu_pmu); + if (ret) + return ret; + + cpu_pmu->handle_irq = armv8pmu_handle_irq; + cpu_pmu->enable = armv8pmu_enable_event; + cpu_pmu->disable = armv8pmu_disable_event; + cpu_pmu->read_counter = armv8pmu_read_counter; + cpu_pmu->write_counter = armv8pmu_write_counter; + cpu_pmu->get_event_idx = armv8pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx; + cpu_pmu->start = armv8pmu_start; + cpu_pmu->stop = armv8pmu_stop; + cpu_pmu->reset = armv8pmu_reset; + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + + cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; + + cpu_pmu->name = name; + cpu_pmu->map_event = map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? + events : &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? + format : &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? + caps : &armv8_pmuv3_caps_attr_group; + + armv8_pmu_register_sysctl_table(); + return 0; +} + +static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event)) +{ + return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); +} + +#define PMUV3_INIT_SIMPLE(name) \ +static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ +{ \ + return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\ +} + +PMUV3_INIT_SIMPLE(armv8_pmuv3) + +PMUV3_INIT_SIMPLE(armv8_cortex_a34) +PMUV3_INIT_SIMPLE(armv8_cortex_a55) +PMUV3_INIT_SIMPLE(armv8_cortex_a65) +PMUV3_INIT_SIMPLE(armv8_cortex_a75) +PMUV3_INIT_SIMPLE(armv8_cortex_a76) +PMUV3_INIT_SIMPLE(armv8_cortex_a77) +PMUV3_INIT_SIMPLE(armv8_cortex_a78) +PMUV3_INIT_SIMPLE(armv9_cortex_a510) +PMUV3_INIT_SIMPLE(armv9_cortex_a710) +PMUV3_INIT_SIMPLE(armv8_cortex_x1) +PMUV3_INIT_SIMPLE(armv9_cortex_x2) +PMUV3_INIT_SIMPLE(armv8_neoverse_e1) +PMUV3_INIT_SIMPLE(armv8_neoverse_n1) +PMUV3_INIT_SIMPLE(armv9_neoverse_n2) +PMUV3_INIT_SIMPLE(armv8_neoverse_v1) + +PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) +PMUV3_INIT_SIMPLE(armv8_nvidia_denver) + +static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event); +} + +static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event); +} + +static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event); +} + +static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event); +} + +static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event); +} + +static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event); +} + +static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event); +} + +static const struct of_device_id armv8_pmu_of_device_ids[] = { + {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init}, + {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init}, + {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, + {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, + {.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init}, + {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, + {.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init}, + {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, + {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, + {.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init}, + {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init}, + {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, + {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, + {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, + {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, + {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, + {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, + {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, + {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, + {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, + {.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init}, + {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, + {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, + {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, + {.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init}, + {}, +}; + +static int armv8_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); +} + +static struct platform_driver armv8_pmu_driver = { + .driver = { + .name = ARMV8_PMU_PDEV_NAME, + .of_match_table = armv8_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = armv8_pmu_device_probe, +}; + +static int __init armv8_pmu_driver_init(void) +{ + if (acpi_disabled) + return platform_driver_register(&armv8_pmu_driver); + else + return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); +} +device_initcall(armv8_pmu_driver_init) + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + struct clock_read_data *rd; + unsigned int seq; + u64 ns; + + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; + userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event); + + if (userpg->cap_user_rdpmc) { + if (event->hw.flags & ARMPMU_EVT_64BIT) + userpg->pmc_width = 64; + else + userpg->pmc_width = 32; + } + + do { + rd = sched_clock_read_begin(&seq); + + if (rd->read_sched_clock != arch_timer_read_counter) + return; + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; + + /* + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; + + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (userpg->time_shift == 32) { + userpg->time_shift = 31; + userpg->time_mult >>= 1; + } + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; +} -- cgit v1.2.3 From df29ddf4f04b00cf60669686dc7f534c3ed7c433 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:21 -0400 Subject: arm64: perf: Abstract system register accesses away As we want to enable 32bit support, we need to distanciate the PMUv3 driver from the AArch64 system register names. This patch moves all system register accesses to an architecture specific include file, allowing the 32bit counterpart to be slotted in at a later time. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-3-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 137 +++++++++++++++++++++++++++++++++++++ drivers/perf/arm_pmuv3.c | 115 +++++++------------------------ include/linux/perf/arm_pmuv3.h | 45 ++++++++++++ 3 files changed, 205 insertions(+), 92 deletions(-) create mode 100644 arch/arm64/include/asm/arm_pmuv3.h (limited to 'drivers/perf') diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..c444cbfb3acd --- /dev/null +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_cpuid(PMMIR_EL1); +} + +static inline u32 read_pmuver(void) +{ + u64 dfr0 = read_sysreg(id_aa64dfr0_el1); + + return cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_EL1_PMUVer_SHIFT); +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, pmcr_el0); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(pmcr_el0); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, pmselr_el0); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, pmccntr_el0); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, pmxevcntr_el0); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(pmxevcntr_el0); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, pmxevtyper_el0); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, pmcntenset_el0); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, pmcntenclr_el0); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, pmintenset_el1); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, pmintenclr_el1); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, pmccfiltr_el0); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, pmovsclr_el0); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(pmovsclr_el0); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, pmuserenr_el0); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(pmceid0_el0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(pmceid1_el0); +} + +#endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index c25203f8b940..f783f068d612 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -25,6 +24,8 @@ #include #include +#include + /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -425,83 +426,16 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) -/* - * This code is really good - */ - -#define PMEVN_CASE(n, case_macro) \ - case n: case_macro(n); break - -#define PMEVN_SWITCH(x, case_macro) \ - do { \ - switch (x) { \ - PMEVN_CASE(0, case_macro); \ - PMEVN_CASE(1, case_macro); \ - PMEVN_CASE(2, case_macro); \ - PMEVN_CASE(3, case_macro); \ - PMEVN_CASE(4, case_macro); \ - PMEVN_CASE(5, case_macro); \ - PMEVN_CASE(6, case_macro); \ - PMEVN_CASE(7, case_macro); \ - PMEVN_CASE(8, case_macro); \ - PMEVN_CASE(9, case_macro); \ - PMEVN_CASE(10, case_macro); \ - PMEVN_CASE(11, case_macro); \ - PMEVN_CASE(12, case_macro); \ - PMEVN_CASE(13, case_macro); \ - PMEVN_CASE(14, case_macro); \ - PMEVN_CASE(15, case_macro); \ - PMEVN_CASE(16, case_macro); \ - PMEVN_CASE(17, case_macro); \ - PMEVN_CASE(18, case_macro); \ - PMEVN_CASE(19, case_macro); \ - PMEVN_CASE(20, case_macro); \ - PMEVN_CASE(21, case_macro); \ - PMEVN_CASE(22, case_macro); \ - PMEVN_CASE(23, case_macro); \ - PMEVN_CASE(24, case_macro); \ - PMEVN_CASE(25, case_macro); \ - PMEVN_CASE(26, case_macro); \ - PMEVN_CASE(27, case_macro); \ - PMEVN_CASE(28, case_macro); \ - PMEVN_CASE(29, case_macro); \ - PMEVN_CASE(30, case_macro); \ - default: WARN(1, "Invalid PMEV* index\n"); \ - } \ - } while (0) - -#define RETURN_READ_PMEVCNTRN(n) \ - return read_sysreg(pmevcntr##n##_el0) -static unsigned long read_pmevcntrn(int n) -{ - PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); - return 0; -} - -#define WRITE_PMEVCNTRN(n) \ - write_sysreg(val, pmevcntr##n##_el0) -static void write_pmevcntrn(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVCNTRN); -} - -#define WRITE_PMEVTYPERN(n) \ - write_sysreg(val, pmevtyper##n##_el0) -static void write_pmevtypern(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVTYPERN); -} - static inline u32 armv8pmu_pmcr_read(void) { - return read_sysreg(pmcr_el0); + return read_pmcr(); } static inline void armv8pmu_pmcr_write(u32 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); - write_sysreg(val, pmcr_el0); + write_pmcr(val); } static inline int armv8pmu_has_overflowed(u32 pmovsr) @@ -576,7 +510,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) - value = read_sysreg(pmccntr_el0); + value = read_pmccntr(); else value = armv8pmu_read_hw_counter(event); @@ -611,7 +545,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) value = armv8pmu_bias_long_counter(event, value); if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(value, pmccntr_el0); + write_pmccntr(value); else armv8pmu_write_hw_counter(event, value); } @@ -642,7 +576,7 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx, chain_evt); } else { if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(hwc->config_base, pmccfiltr_el0); + write_pmccfiltr(hwc->config_base); else armv8pmu_write_evtype(idx, hwc->config_base); } @@ -665,7 +599,7 @@ static inline void armv8pmu_enable_counter(u32 mask) * enable the counter. * */ isb(); - write_sysreg(mask, pmcntenset_el0); + write_pmcntenset(mask); } static inline void armv8pmu_enable_event_counter(struct perf_event *event) @@ -682,7 +616,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) static inline void armv8pmu_disable_counter(u32 mask) { - write_sysreg(mask, pmcntenclr_el0); + write_pmcntenclr(mask); /* * Make sure the effects of disabling the counter are visible before we * start configuring the event. @@ -704,7 +638,7 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event) static inline void armv8pmu_enable_intens(u32 mask) { - write_sysreg(mask, pmintenset_el1); + write_pmintenset(mask); } static inline void armv8pmu_enable_event_irq(struct perf_event *event) @@ -715,10 +649,10 @@ static inline void armv8pmu_enable_event_irq(struct perf_event *event) static inline void armv8pmu_disable_intens(u32 mask) { - write_sysreg(mask, pmintenclr_el1); + write_pmintenclr(mask); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - write_sysreg(mask, pmovsclr_el0); + write_pmovsclr(mask); isb(); } @@ -733,18 +667,18 @@ static inline u32 armv8pmu_getreset_flags(void) u32 value; /* Read */ - value = read_sysreg(pmovsclr_el0); + value = read_pmovsclr(); /* Write to clear flags */ value &= ARMV8_PMU_OVSR_MASK; - write_sysreg(value, pmovsclr_el0); + write_pmovsclr(value); return value; } static void armv8pmu_disable_user_access(void) { - write_sysreg(0, pmuserenr_el0); + write_pmuserenr(0); } static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) @@ -755,13 +689,13 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) /* Clear any unused counters to avoid leaking their contents */ for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { if (i == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(0, pmccntr_el0); + write_pmccntr(0); else armv8pmu_write_evcntr(i, 0); } - write_sysreg(0, pmuserenr_el0); - write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); + write_pmuserenr(0); + write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); } static void armv8pmu_enable_event(struct perf_event *event) @@ -1145,14 +1079,11 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu; - u64 dfr0; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver; - dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_EL1_PMUVer_SHIFT); + pmuver = read_pmuver(); if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == ID_AA64DFR0_EL1_PMUVer_NI) return; @@ -1167,8 +1098,8 @@ static void __armv8pmu_probe_pmu(void *info) /* Add the CPU cycles counter */ cpu_pmu->num_events += 1; - pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); - pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); + pmceid[0] = pmceid_raw[0] = read_pmceid0(); + pmceid[1] = pmceid_raw[1] = read_pmceid1(); bitmap_from_arr32(cpu_pmu->pmceid_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); @@ -1179,9 +1110,9 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); - /* store PMMIR_EL1 register for sysfs */ + /* store PMMIR register for sysfs */ if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) - cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; } diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 53173abfc022..e3899bd77f5c 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -255,4 +255,49 @@ #define ARMV8_PMU_BUS_WIDTH_SHIFT 16 #define ARMV8_PMU_BUS_WIDTH_MASK 0xf +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + #endif -- cgit v1.2.3 From 711432770f78e5a9ae235a1a55decfb71993c958 Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:22 -0400 Subject: perf: pmuv3: Abstract PMU version checks The current PMU version definitions are available for arm64 only, As we want to add PMUv3 support to arm (32-bit), abstracts these definitions by using arch-specific helpers. Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-4-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 16 ++++++++++++++++ drivers/perf/arm_pmuv3.c | 7 +++---- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'drivers/perf') diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index c444cbfb3acd..80cdfa4c3e88 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -134,4 +134,20 @@ static inline u32 read_pmceid1(void) return read_sysreg(pmceid1_el0); } +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || + pmuver == ID_AA64DFR0_EL1_PMUVer_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; +} + #endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f783f068d612..f7d890af8cc1 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -392,7 +392,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); + return (is_pmuv3p5(cpu_pmu->pmuver)); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) @@ -1084,8 +1084,7 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; pmuver = read_pmuver(); - if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || - pmuver == ID_AA64DFR0_EL1_PMUVer_NI) + if (!pmuv3_implemented(pmuver)) return; cpu_pmu->pmuver = pmuver; @@ -1111,7 +1110,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR register for sysfs */ - if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) + if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31))) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; -- cgit v1.2.3 From 11fba29a8a1f2fca08f301426b15c62d8a0b8040 Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:23 -0400 Subject: perf: pmuv3: Move inclusion of kvm_host.h to the arch-specific helper KVM host support is available only on arm64. By moving the inclusion of kvm_host.h to an arm64-specific file, the 32bit architecture will be able to implement dummy helpers. Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-5-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 2 ++ drivers/perf/arm_pmuv3.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 80cdfa4c3e88..d6b51deb7bf0 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -6,6 +6,8 @@ #ifndef __ASM_PMUV3_H #define __ASM_PMUV3_H +#include + #include #include diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f7d890af8cc1..2cab600af4fd 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From b3a070869f39be4fad9ea4d99f2c8fab1fb7eead Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:24 -0400 Subject: perf: pmuv3: Change GENMASK to GENMASK_ULL GENMASK macro uses "unsigned long" (32-bit wide on arm and 64-bit on arm64), This causes build issues when enabling PMUv3 on arm as it tries to access bits > 31. This patch switches the GENMASK to GENMASK_ULL, which uses "unsigned long long" (64-bit on both arm and arm64). Signed-off-by: Zaid Al-Bassam Acked-by: Marc Zyngier Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-6-zalbassam@google.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 2cab600af4fd..fc8ed3cd0330 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -489,7 +489,7 @@ static bool armv8pmu_event_needs_bias(struct perf_event *event) static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) { if (armv8pmu_event_needs_bias(event)) - value |= GENMASK(63, 32); + value |= GENMASK_ULL(63, 32); return value; } @@ -497,7 +497,7 @@ static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) { if (armv8pmu_event_needs_bias(event)) - value &= ~GENMASK(63, 32); + value &= ~GENMASK_ULL(63, 32); return value; } -- cgit v1.2.3 From 009d6dc87a568db62290b8dc0a517b612217b6da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:26 -0400 Subject: ARM: perf: Allow the use of the PMUv3 driver on 32bit ARM The only thing stopping the PMUv3 driver from compiling on 32bit is the lack of defined system registers names and the handful of required helpers. This is easily solved by providing the sysreg accessors and updating the Kconfig entry. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-8-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/include/asm/arm_pmuv3.h | 247 +++++++++++++++++++++++++++++++++++++++ drivers/perf/Kconfig | 5 +- drivers/perf/arm_pmuv3.c | 5 +- 3 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/arm_pmuv3.h (limited to 'drivers/perf') diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..78d3d4b82c6c --- /dev/null +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define PMCCNTR __ACCESS_CP15_64(0, c9) + +#define PMCR __ACCESS_CP15(c9, 0, c12, 0) +#define PMCNTENSET __ACCESS_CP15(c9, 0, c12, 1) +#define PMCNTENCLR __ACCESS_CP15(c9, 0, c12, 2) +#define PMOVSR __ACCESS_CP15(c9, 0, c12, 3) +#define PMSELR __ACCESS_CP15(c9, 0, c12, 5) +#define PMCEID0 __ACCESS_CP15(c9, 0, c12, 6) +#define PMCEID1 __ACCESS_CP15(c9, 0, c12, 7) +#define PMXEVTYPER __ACCESS_CP15(c9, 0, c13, 1) +#define PMXEVCNTR __ACCESS_CP15(c9, 0, c13, 2) +#define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0) +#define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1) +#define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2) +#define PMMIR __ACCESS_CP15(c9, 0, c14, 6) +#define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7) + +#define PMEVCNTR0 __ACCESS_CP15(c14, 0, c8, 0) +#define PMEVCNTR1 __ACCESS_CP15(c14, 0, c8, 1) +#define PMEVCNTR2 __ACCESS_CP15(c14, 0, c8, 2) +#define PMEVCNTR3 __ACCESS_CP15(c14, 0, c8, 3) +#define PMEVCNTR4 __ACCESS_CP15(c14, 0, c8, 4) +#define PMEVCNTR5 __ACCESS_CP15(c14, 0, c8, 5) +#define PMEVCNTR6 __ACCESS_CP15(c14, 0, c8, 6) +#define PMEVCNTR7 __ACCESS_CP15(c14, 0, c8, 7) +#define PMEVCNTR8 __ACCESS_CP15(c14, 0, c9, 0) +#define PMEVCNTR9 __ACCESS_CP15(c14, 0, c9, 1) +#define PMEVCNTR10 __ACCESS_CP15(c14, 0, c9, 2) +#define PMEVCNTR11 __ACCESS_CP15(c14, 0, c9, 3) +#define PMEVCNTR12 __ACCESS_CP15(c14, 0, c9, 4) +#define PMEVCNTR13 __ACCESS_CP15(c14, 0, c9, 5) +#define PMEVCNTR14 __ACCESS_CP15(c14, 0, c9, 6) +#define PMEVCNTR15 __ACCESS_CP15(c14, 0, c9, 7) +#define PMEVCNTR16 __ACCESS_CP15(c14, 0, c10, 0) +#define PMEVCNTR17 __ACCESS_CP15(c14, 0, c10, 1) +#define PMEVCNTR18 __ACCESS_CP15(c14, 0, c10, 2) +#define PMEVCNTR19 __ACCESS_CP15(c14, 0, c10, 3) +#define PMEVCNTR20 __ACCESS_CP15(c14, 0, c10, 4) +#define PMEVCNTR21 __ACCESS_CP15(c14, 0, c10, 5) +#define PMEVCNTR22 __ACCESS_CP15(c14, 0, c10, 6) +#define PMEVCNTR23 __ACCESS_CP15(c14, 0, c10, 7) +#define PMEVCNTR24 __ACCESS_CP15(c14, 0, c11, 0) +#define PMEVCNTR25 __ACCESS_CP15(c14, 0, c11, 1) +#define PMEVCNTR26 __ACCESS_CP15(c14, 0, c11, 2) +#define PMEVCNTR27 __ACCESS_CP15(c14, 0, c11, 3) +#define PMEVCNTR28 __ACCESS_CP15(c14, 0, c11, 4) +#define PMEVCNTR29 __ACCESS_CP15(c14, 0, c11, 5) +#define PMEVCNTR30 __ACCESS_CP15(c14, 0, c11, 6) + +#define PMEVTYPER0 __ACCESS_CP15(c14, 0, c12, 0) +#define PMEVTYPER1 __ACCESS_CP15(c14, 0, c12, 1) +#define PMEVTYPER2 __ACCESS_CP15(c14, 0, c12, 2) +#define PMEVTYPER3 __ACCESS_CP15(c14, 0, c12, 3) +#define PMEVTYPER4 __ACCESS_CP15(c14, 0, c12, 4) +#define PMEVTYPER5 __ACCESS_CP15(c14, 0, c12, 5) +#define PMEVTYPER6 __ACCESS_CP15(c14, 0, c12, 6) +#define PMEVTYPER7 __ACCESS_CP15(c14, 0, c12, 7) +#define PMEVTYPER8 __ACCESS_CP15(c14, 0, c13, 0) +#define PMEVTYPER9 __ACCESS_CP15(c14, 0, c13, 1) +#define PMEVTYPER10 __ACCESS_CP15(c14, 0, c13, 2) +#define PMEVTYPER11 __ACCESS_CP15(c14, 0, c13, 3) +#define PMEVTYPER12 __ACCESS_CP15(c14, 0, c13, 4) +#define PMEVTYPER13 __ACCESS_CP15(c14, 0, c13, 5) +#define PMEVTYPER14 __ACCESS_CP15(c14, 0, c13, 6) +#define PMEVTYPER15 __ACCESS_CP15(c14, 0, c13, 7) +#define PMEVTYPER16 __ACCESS_CP15(c14, 0, c14, 0) +#define PMEVTYPER17 __ACCESS_CP15(c14, 0, c14, 1) +#define PMEVTYPER18 __ACCESS_CP15(c14, 0, c14, 2) +#define PMEVTYPER19 __ACCESS_CP15(c14, 0, c14, 3) +#define PMEVTYPER20 __ACCESS_CP15(c14, 0, c14, 4) +#define PMEVTYPER21 __ACCESS_CP15(c14, 0, c14, 5) +#define PMEVTYPER22 __ACCESS_CP15(c14, 0, c14, 6) +#define PMEVTYPER23 __ACCESS_CP15(c14, 0, c14, 7) +#define PMEVTYPER24 __ACCESS_CP15(c14, 0, c15, 0) +#define PMEVTYPER25 __ACCESS_CP15(c14, 0, c15, 1) +#define PMEVTYPER26 __ACCESS_CP15(c14, 0, c15, 2) +#define PMEVTYPER27 __ACCESS_CP15(c14, 0, c15, 3) +#define PMEVTYPER28 __ACCESS_CP15(c14, 0, c15, 4) +#define PMEVTYPER29 __ACCESS_CP15(c14, 0, c15, 5) +#define PMEVTYPER30 __ACCESS_CP15(c14, 0, c15, 6) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(PMEVCNTR##n) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, PMEVCNTR##n) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, PMEVTYPER##n) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_sysreg(PMMIR); +} + +static inline u32 read_pmuver(void) +{ + /* PMUVers is not a signed field */ + u32 dfr0 = read_cpuid_ext(CPUID_EXT_DFR0); + + return (dfr0 >> 24) & 0xf; +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, PMCR); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(PMCR); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, PMSELR); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, PMCCNTR); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(PMCCNTR); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, PMXEVCNTR); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(PMXEVCNTR); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, PMXEVTYPER); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, PMCNTENSET); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, PMCNTENCLR); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, PMINTENSET); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, PMINTENCLR); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, PMCCFILTR); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, PMOVSR); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(PMOVSR); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, PMUSERENR); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(PMCEID0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(PMCEID1); +} + +static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} +static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) +{ + return false; +} + +/* PMU Version in DFR Register */ +#define ARMV8_PMU_DFR_VER_NI 0 +#define ARMV8_PMU_DFR_VER_V3P4 0x5 +#define ARMV8_PMU_DFR_VER_V3P5 0x6 +#define ARMV8_PMU_DFR_VER_IMP_DEF 0xF + +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ARMV8_PMU_DFR_VER_IMP_DEF || + pmuver == ARMV8_PMU_DFR_VER_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P5; +} + +#endif diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index defe6b47854b..711f82400086 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -101,15 +101,14 @@ config ARM_SMMU_V3_PMU based on the Stream ID of the corresponding master. config ARM_PMUV3 - depends on HW_PERF_EVENTS && ARM64 + depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64) bool "ARM PMUv3 support" if !ARM64 - default y + default ARM64 help Say y if you want to use the ARM performance monitor unit (PMU) version 3. The PMUv3 is the CPU performance monitors on ARMv8 (aarch32 and aarch64) systems that implement the PMUv3 architecture. - Currently, PMUv3 is only supported on aarch64 (arm64) config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index fc8ed3cd0330..34ed0d5d7898 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -388,10 +388,13 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { * We unconditionally enable ARMv8.5-PMU long event counter support * (64-bit events) where supported. Indicate if this arm_pmu has long * event counter support. + * + * On AArch32, long counters make no sense (you can't access the top + * bits), so we only enable this on AArch64. */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (is_pmuv3p5(cpu_pmu->pmuver)); + return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver)); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) -- cgit v1.2.3 From f87e9114b5e590c2c6658ca21d7b714ca240bdd0 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 1 Mar 2023 09:55:40 -0800 Subject: perf/arm-cmn: Move overlapping wp_combine field As eventid field was expanded to support new mesh versions, it started to overlap with wp_combine field. Move wp_combine to fix the issue. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230301175540.19891-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index c9689861be3f..9f2edc28d16e 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -166,7 +166,7 @@ #define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) #define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) -#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) +#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) /* Note that we don't yet support the tertiary match group on newer IPs */ -- cgit v1.2.3 From d7f4679dc8d1a5cec497dd9f1b315ce77ab8db28 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Mar 2023 10:30:17 +0800 Subject: perf: arm: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230315023017.35789-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 54aa4658fb36..5de06f9a4dd3 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -655,8 +655,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) .attr_groups = dmc620_pmu_attr_groups, }; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dmc620_pmu->base = devm_ioremap_resource(&pdev->dev, res); + dmc620_pmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(dmc620_pmu->base)) return PTR_ERR(dmc620_pmu->base); -- cgit v1.2.3 From 8540504c5136146fd5c867afd86bb4618c1ee61a Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Mar 2023 10:31:08 +0800 Subject: perf: qcom: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230315023108.36953-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l3_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 346311a05460..2887edb4eb0b 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -763,8 +763,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; - memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0); - l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc); + l3pmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc); if (IS_ERR(l3pmu->regs)) return PTR_ERR(l3pmu->regs); -- cgit v1.2.3 From a64021d3726a096442d63a958ff0c49c12c3a1f4 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 17 Feb 2023 14:10:43 +0000 Subject: kbuild, drivers/perf: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230217141059.392471-9-nick.alcock@oracle.com Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 979a7c2b4f56..7123beeb992f 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -581,4 +581,3 @@ static struct platform_driver m1_pmu_driver = { }; module_platform_driver(m1_pmu_driver); -MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From bc12f344d5de246d0e2bd1ffcd5f950314e460d9 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 16 Feb 2023 14:34:03 +0800 Subject: drivers/perf: Use devm_platform_get_and_ioremap_resource() Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/20230216063403.9753-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index a7689fecb49d..5c5be9fc1b15 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -656,8 +656,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev) drw_pmu->dev = &pdev->dev; platform_set_drvdata(pdev, drw_pmu); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res); + drw_pmu->cfg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(drw_pmu->cfg_base)) return PTR_ERR(drw_pmu->cfg_base); -- cgit v1.2.3 From c61e5720f23273269cc67ffb2908cf9831c8ca9d Mon Sep 17 00:00:00 2001 From: Jiucheng Xu Date: Thu, 9 Feb 2023 19:54:01 +0800 Subject: perf/amlogic: Fix config1/config2 parsing issue The 3th argument of for_each_set_bit is incorrect, fix them. Fixes: 2016e2113d35 ("perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver") Signed-off-by: Jiucheng Xu Link: https://lore.kernel.org/r/20230209115403.521868-1-jiucheng.xu@amlogic.com Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index b84346dbac2c..0b24dee1ed3c 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -156,10 +156,14 @@ static int meson_ddr_perf_event_add(struct perf_event *event, int flags) u64 config2 = event->attr.config2; int i; - for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1)) + for_each_set_bit(i, + (const unsigned long *)&config1, + BITS_PER_TYPE(config1)) meson_ddr_set_axi_filter(event, i); - for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2)) + for_each_set_bit(i, + (const unsigned long *)&config2, + BITS_PER_TYPE(config2)) meson_ddr_set_axi_filter(event, i + 64); if (flags & PERF_EF_START) -- cgit v1.2.3 From 16e15834659e9f5c05b9f12da6e86d76165c60a3 Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Thu, 2 Mar 2023 14:57:01 -0600 Subject: perf: arm_cspmu: Fix variable dereference warning Fix warning message from smatch tool: | smatch warnings: | drivers/perf/arm_cspmu/arm_cspmu.c:1075 arm_cspmu_find_cpu_container() | warn: variable dereferenced before check 'cpu_dev' (see line 1073) Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202302191227.kc0V8fM7-lkp@intel.com/ Reviewed-by: Suzuki K Poulose Signed-off-by: Besar Wicaksono Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20230302205701.35323-1-bwicaksono@nvidia.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index e31302ab7e37..a3f1c410b417 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1078,12 +1078,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) { u32 acpi_uid; - struct device *cpu_dev = get_cpu_device(cpu); - struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev); + struct device *cpu_dev; + struct acpi_device *acpi_dev; + cpu_dev = get_cpu_device(cpu); if (!cpu_dev) return -ENODEV; + acpi_dev = ACPI_COMPANION(cpu_dev); while (acpi_dev) { if (!strcmp(acpi_device_hid(acpi_dev), ACPI_PROCESSOR_CONTAINER_HID) && -- cgit v1.2.3 From 7d0bfb7c997753ef88f4c3a29f3409c8cb052ab1 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 14 Feb 2023 11:38:02 +0100 Subject: drivers/perf: apple_m1: Add Apple M2 support The PMU itself is compatible with the one found on M1. We still know next to nothing about the counters so keep using CPU uarch specific compatibles/PMU names. Signed-off-by: Janne Grunau Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230214-apple_m2_pmu-v1-2-9c9213ab9b63@jannau.net Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/perf') diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 7123beeb992f..8574c6e58c83 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -559,7 +559,21 @@ static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) return m1_pmu_init(cpu_pmu); } +static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_avalanche_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_blizzard_pmu"; + return m1_pmu_init(cpu_pmu); +} + static const struct of_device_id m1_pmu_of_device_ids[] = { + { .compatible = "apple,avalanche-pmu", .data = m2_pmu_avalanche_init, }, + { .compatible = "apple,blizzard-pmu", .data = m2_pmu_blizzard_init, }, { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, { }, -- cgit v1.2.3 From 23b2fd83948952ffee2e96ce6a982be9d8e96f9f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 3 Apr 2023 12:49:05 +0100 Subject: perf/arm-cmn: Validate cycles events fully DTC cycle count events don't have anything to validate or initialise in themselves, but we should not forget to still validate their whole group context. Otherwise, we may fail to correctly reject a contrived group containing an impossible number of cycles events. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/3124e8c276a1f513c1a415dc839ca4181b3c8bc8.1680522545.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 9f2edc28d16e..5a57c3f10d27 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1546,7 +1546,7 @@ static int arm_cmn_event_init(struct perf_event *event) type = CMN_EVENT_TYPE(event); /* DTC events (i.e. cycles) already have everything they need */ if (type == CMN_TYPE_DTC) - return 0; + return arm_cmn_validate_group(cmn, event); eventid = CMN_EVENT_EVENTID(event); /* For watchpoints we need the actual XP node here */ -- cgit v1.2.3 From a30b87e6bd7db112deb1e3014b23fb90111bc6c4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Apr 2023 11:38:09 +0200 Subject: arm64: pmuv3: dynamically map PERF_COUNT_HW_BRANCH_INSTRUCTIONS The mapping of perf_events generic hardware events to actual PMU events on ARM PMUv3 may not always be correct. This is in particular true for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event. Although the mapping points to an architected event, it may not always be available. This can be seen with a simple: $ perf stat -e branches sleep 0 Performance counter stats for 'sleep 0': branches 0.001401081 seconds time elapsed Yet the hardware does have an event that could be used for branches. Dynamically check for a supported hardware event which can be used for PERF_COUNT_HW_BRANCH_INSTRUCTIONS at mapping time. And with that: $ perf stat -e branches sleep 0 Performance counter stats for 'sleep 0': 166,739 branches 0.000832163 seconds time elapsed Co-developed-by: Stephane Eranian Signed-off-by: Stephane Eranian Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Co-developed-by: Peter Newman Signed-off-by: Peter Newman Link: https://lore.kernel.org/all/YvunKCJHSXKz%2FkZB@FVFF77S0Q05N Link: https://lore.kernel.org/r/20230411093809.657501-1-peternewman@google.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 34ed0d5d7898..c98e4039386d 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -46,7 +46,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, @@ -985,6 +984,28 @@ static void armv8pmu_reset(void *info) armv8pmu_pmcr_write(pmcr); } +static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, + struct perf_event *event) +{ + if (event->attr.type == PERF_TYPE_HARDWARE && + event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) { + + if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED; + + if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_BR_RETIRED; + + return HW_OP_UNSUPPORTED; + } + + return armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + static int __armv8_pmuv3_map_event(struct perf_event *event, const unsigned (*extra_event_map) [PERF_COUNT_HW_MAX], @@ -996,9 +1017,7 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, int hw_event_id; struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event); /* * CHAIN events only work when paired with an adjacent counter, and it -- cgit v1.2.3 From 2ad91e44e6b0c7ef1ed151b3bb2242a2144e6085 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 12 Apr 2023 11:29:40 +0100 Subject: perf/arm-cmn: Fix port detection for CMN-700 When the "extra device ports" configuration was first added, the additional mxp_device_port_connect_info registers were added around the existing mxp_mesh_port_connect_info registers. What I missed about CMN-700 is that it shuffled them around to remove this discontinuity. As such, tweak the definitions and factor out a helper for reading these registers so we can deal with this discrepancy easily, which does at least allow nicely tidying up the callsites. With this we can then also do the nice thing and skip accesses completely rather than relying on RES0 behaviour where we know the extra registers aren't defined. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Reported-by: Jing Zhang Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/71d129241d4d7923cde72a0e5b4c8d2f6084525f.1681295193.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 57 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 5a57c3f10d27..47d359f72957 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -57,14 +57,12 @@ #define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0) /* XPs also have some local topology info which has uses too */ -#define CMN_MXP__CONNECT_INFO_P0 0x0008 -#define CMN_MXP__CONNECT_INFO_P1 0x0010 -#define CMN_MXP__CONNECT_INFO_P2 0x0028 -#define CMN_MXP__CONNECT_INFO_P3 0x0030 -#define CMN_MXP__CONNECT_INFO_P4 0x0038 -#define CMN_MXP__CONNECT_INFO_P5 0x0040 +#define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p)) #define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0) +#define CMN_MAX_PORTS 6 +#define CI700_CONNECT_INFO_P2_5_OFFSET 0x10 + /* PMU registers occupy the 3rd 4KB page of each node's region */ #define CMN_PMU_OFFSET 0x2000 @@ -396,6 +394,25 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } +static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, + const struct arm_cmn_node *xp, int port) +{ + int offset = CMN_MXP__CONNECT_INFO(port); + + if (port >= 2) { + if (cmn->model & (CMN600 | CMN650)) + return 0; + /* + * CI-700 may have extra ports, but still has the + * mesh_port_connect_info registers in the way. + */ + if (cmn->model == CI700) + offset += CI700_CONNECT_INFO_P2_5_OFFSET; + } + + return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset); +} + static struct dentry *arm_cmn_debugfs; #ifdef CONFIG_DEBUG_FS @@ -469,7 +486,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) y = cmn->mesh_y; while (y--) { int xp_base = cmn->mesh_x * y; - u8 port[6][CMN_MAX_DIMENSION]; + u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION]; for (x = 0; x < cmn->mesh_x; x++) seq_puts(s, "--------+"); @@ -477,14 +494,9 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_printf(s, "\n%d |", y); for (x = 0; x < cmn->mesh_x; x++) { struct arm_cmn_node *xp = cmn->xps + xp_base + x; - void __iomem *base = xp->pmu_base - CMN_PMU_OFFSET; - - port[0][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P0); - port[1][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P1); - port[2][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P2); - port[3][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P3); - port[4][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P4); - port[5][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P5); + + for (p = 0; p < CMN_MAX_PORTS; p++) + port[p][x] = arm_cmn_device_connect_info(cmn, xp, p); seq_printf(s, " XP #%-2d |", xp_base + x); } @@ -2083,18 +2095,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) * from this, since in that case we will see at least one XP * with port 2 connected, for the HN-D. */ - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P0)) - xp_ports |= BIT(0); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P1)) - xp_ports |= BIT(1); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P2)) - xp_ports |= BIT(2); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P3)) - xp_ports |= BIT(3); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P4)) - xp_ports |= BIT(4); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P5)) - xp_ports |= BIT(5); + for (int p = 0; p < CMN_MAX_PORTS; p++) + if (arm_cmn_device_connect_info(cmn, xp, p)) + xp_ports |= BIT(p); if (cmn->multi_dtm && (xp_ports & 0xc)) arm_cmn_init_dtm(dtm++, xp, 1); -- cgit v1.2.3 From 25d8c25025a46e7621edde2eb6d5f55c6d29ee86 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 3 Apr 2023 16:14:22 +0800 Subject: drivers/perf: hisi: Remove redundant initialized of pmu->name "pmu->name" is initialized by perf_pmu_register() function, so remove the redundant initialized in hisi_pmu_init(). Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230403081423.62460-2-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 3 +-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 8 files changed, 8 insertions(+), 11 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 4c67d57217a7..40f1bc9f9b91 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -316,7 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - hisi_pmu_init(cpa_pmu, name, THIS_MODULE); + hisi_pmu_init(cpa_pmu, THIS_MODULE); /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8c3ffcbfd4c0..8a3d74ddcd6d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -516,7 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - hisi_pmu_init(ddrc_pmu, name, THIS_MODULE); + hisi_pmu_init(ddrc_pmu, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 806698b9eabf..5701a84edb0e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -519,7 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", hha_pmu->sccl_id, hha_pmu->index_id); - hisi_pmu_init(hha_pmu, name, THIS_MODULE); + hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 5b2c35f1658a..68596b566344 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -557,7 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", l3c_pmu->sccl_id, l3c_pmu->ccl_id); - hisi_pmu_init(l3c_pmu, name, THIS_MODULE); + hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index afe3419f3f6d..71b6687d6696 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -412,7 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(pa_pmu, name, THIS_MODULE); + hisi_pmu_init(pa_pmu, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index f1b0f5e1a28f..2823f381930d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,12 +531,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module) +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; - pmu->name = name; pmu->module = module; pmu->task_ctx_nr = perf_invalid_context; pmu->event_init = hisi_uncore_pmu_event_init; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index f8e3cc6903d7..07890a8e96ca 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -121,6 +121,5 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module); +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 1e354433776a..6fe534a665ed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,7 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(sllc_pmu, name, THIS_MODULE); + hisi_pmu_init(sllc_pmu, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { -- cgit v1.2.3 From 257aedb72e731082ab514058e57b132f0b29d707 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 3 Apr 2023 16:14:23 +0800 Subject: drivers/perf: hisi: add NULL check for name When allocations fails that can be NULL now. If the name provided is NULL, then the initialization process of the PMU type and dev will be skipped in function perf_pmu_register(). Consequently, the PMU will not be able to register into the kernel. Moreover, in the case of unregister the PMU, the function device_del() will need to handle NULL pointers, which potentially can cause issues. So move this allocation above the cpuhp_state_add_instance() and directly return if it does fail. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230403081423.62460-3-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 17 ++++++++++------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 7 +++++-- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 11 +++++------ 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8a3d74ddcd6d..ffb039d05d07 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -499,13 +499,6 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); - if (ret) { - dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); - return ret; - } - if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u_%u", @@ -516,6 +509,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + &ddrc_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); + return ret; + } + hisi_pmu_init(ddrc_pmu, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 5701a84edb0e..15caf99e1eef 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -510,6 +510,11 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", + hha_pmu->sccl_id, hha_pmu->index_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); if (ret) { @@ -517,8 +522,6 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 68596b566344..794dbcd19b7a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -544,6 +544,11 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", + l3c_pmu->sccl_id, l3c_pmu->ccl_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); if (ret) { @@ -551,12 +556,6 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } - /* - * CCL_ID is used to identify the L3C in the same SCCL which was - * used _UID by mistake. - */ - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); -- cgit v1.2.3