diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-06 20:51:49 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-06 20:51:49 +0200 |
commit | 18fd049731e67651009f316195da9281b756f2cf (patch) | |
tree | 082c87950f33bc466672ea3fd2ea24e5b2a37253 /drivers/perf | |
parent | Merge tag 'arm-soc-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc (diff) | |
parent | arm64: alternatives: Use vdso/bits.h instead of linux/bits.h (diff) | |
download | linux-18fd049731e67651009f316195da9281b756f2cf.tar.xz linux-18fd049731e67651009f316195da9281b756f2cf.zip |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
- arm64 perf: DDR PMU driver for Alibaba's T-Head Yitian 710 SoC, SVE
vector granule register added to the user regs together with SVE perf
extensions documentation.
- SVE updates: add HWCAP for SVE EBF16, update the SVE ABI
documentation to match the actual kernel behaviour (zeroing the
registers on syscall rather than "zeroed or preserved" previously).
- More conversions to automatic system registers generation.
- vDSO: use self-synchronising virtual counter access in gettimeofday()
if the architecture supports it.
- arm64 stacktrace cleanups and improvements.
- arm64 atomics improvements: always inline assembly, remove LL/SC
trampolines.
- Improve the reporting of EL1 exceptions: rework BTI and FPAC
exception handling, better EL1 undefs reporting.
- Cortex-A510 erratum 2658417: remove BF16 support due to incorrect
result.
- arm64 defconfig updates: build CoreSight as a module, enable options
necessary for docker, memory hotplug/hotremove, enable all PMUs
provided by Arm.
- arm64 ptrace() support for TPIDR2_EL0 (register provided with the SME
extensions).
- arm64 ftraces updates/fixes: fix module PLTs with mcount, remove
unused function.
- kselftest updates for arm64: simple HWCAP validation, FP stress test
improvements, validation of ZA regs in signal handlers, include
larger SVE and SME vector lengths in signal tests, various cleanups.
- arm64 alternatives (code patching) improvements to robustness and
consistency: replace cpucap static branches with equivalent
alternatives, associate callback alternatives with a cpucap.
- Miscellaneous updates: optimise kprobe performance of patching
single-step slots, simplify uaccess_mask_ptr(), move MTE registers
initialisation to C, support huge vmalloc() mappings, run softirqs on
the per-CPU IRQ stack, compat (arm32) misalignment fixups for
multiword accesses.
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (126 commits)
arm64: alternatives: Use vdso/bits.h instead of linux/bits.h
arm64/kprobe: Optimize the performance of patching single-step slot
arm64: defconfig: Add Coresight as module
kselftest/arm64: Handle EINTR while reading data from children
kselftest/arm64: Flag fp-stress as exiting when we begin finishing up
kselftest/arm64: Don't repeat termination handler for fp-stress
ARM64: reloc_test: add __init/__exit annotations to module init/exit funcs
arm64/mm: fold check for KFENCE into can_set_direct_map()
arm64: ftrace: fix module PLTs with mcount
arm64: module: Remove unused plt_entry_is_initialized()
arm64: module: Make plt_equals_entry() static
arm64: fix the build with binutils 2.27
kselftest/arm64: Don't enable v8.5 for MTE selftest builds
arm64: uaccess: simplify uaccess_mask_ptr()
arm64: asm/perf_regs.h: Avoid C++-style comment in UAPI header
kselftest/arm64: Fix typo in hwcap check
arm64: mte: move register initialization to C
arm64: mm: handle ARM64_KERNEL_USES_PMD_MAPS in vmemmap_populate()
arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()
arm64/sve: Add Perf extensions documentation
...
Diffstat (limited to 'drivers/perf')
-rw-r--r-- | drivers/perf/Kconfig | 7 | ||||
-rw-r--r-- | drivers/perf/Makefile | 1 | ||||
-rw-r--r-- | drivers/perf/alibaba_uncore_drw_pmu.c | 810 | ||||
-rw-r--r-- | drivers/perf/arm_pmu.c | 2 | ||||
-rw-r--r-- | drivers/perf/arm_spe_pmu.c | 6 |
5 files changed, 822 insertions, 4 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 1e2d69453771..44c07ea487f4 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -183,6 +183,13 @@ config APPLE_M1_CPU_PMU Provides support for the non-architectural CPU PMUs present on the Apple M1 SoCs and derivatives. +config ALIBABA_UNCORE_DRW_PMU + tristate "Alibaba T-Head Yitian 710 DDR Sub-system Driveway PMU driver" + depends on ARM64 || COMPILE_TEST + help + Support for Driveway PMU events monitoring on Yitian 710 DDR + Sub-system. + source "drivers/perf/hisilicon/Kconfig" config MARVELL_CN10K_DDR_PMU diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 57a279c61df5..050d04ee19dd 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o +obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c new file mode 100644 index 000000000000..82729b874f09 --- /dev/null +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -0,0 +1,810 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Alibaba DDR Sub-System Driveway PMU driver + * + * Copyright (C) 2022 Alibaba Inc + */ + +#define ALI_DRW_PMUNAME "ali_drw" +#define ALI_DRW_DRVNAME ALI_DRW_PMUNAME "_pmu" +#define pr_fmt(fmt) ALI_DRW_DRVNAME ": " fmt + +#include <linux/acpi.h> +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/cpuhotplug.h> +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/printk.h> +#include <linux/rculist.h> +#include <linux/refcount.h> + + +#define ALI_DRW_PMU_COMMON_MAX_COUNTERS 16 +#define ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE 19 + +#define ALI_DRW_PMU_PA_SHIFT 12 +#define ALI_DRW_PMU_CNT_INIT 0x00000000 +#define ALI_DRW_CNT_MAX_PERIOD 0xffffffff +#define ALI_DRW_PMU_CYCLE_EVT_ID 0x80 + +#define ALI_DRW_PMU_CNT_CTRL 0xC00 +#define ALI_DRW_PMU_CNT_RST BIT(2) +#define ALI_DRW_PMU_CNT_STOP BIT(1) +#define ALI_DRW_PMU_CNT_START BIT(0) + +#define ALI_DRW_PMU_CNT_STATE 0xC04 +#define ALI_DRW_PMU_TEST_CTRL 0xC08 +#define ALI_DRW_PMU_CNT_PRELOAD 0xC0C + +#define ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK GENMASK(23, 0) +#define ALI_DRW_PMU_CYCLE_CNT_LOW_MASK GENMASK(31, 0) +#define ALI_DRW_PMU_CYCLE_CNT_HIGH 0xC10 +#define ALI_DRW_PMU_CYCLE_CNT_LOW 0xC14 + +/* PMU EVENT SEL 0-3 are paired in 32-bit registers on a 4-byte stride */ +#define ALI_DRW_PMU_EVENT_SEL0 0xC68 +/* counter 0-3 use sel0, counter 4-7 use sel1...*/ +#define ALI_DRW_PMU_EVENT_SELn(n) \ + (ALI_DRW_PMU_EVENT_SEL0 + (n / 4) * 0x4) +#define ALI_DRW_PMCOM_CNT_EN BIT(7) +#define ALI_DRW_PMCOM_CNT_EVENT_MASK GENMASK(5, 0) +#define ALI_DRW_PMCOM_CNT_EVENT_OFFSET(n) \ + (8 * (n % 4)) + +/* PMU COMMON COUNTER 0-15, are paired in 32-bit registers on a 4-byte stride */ +#define ALI_DRW_PMU_COMMON_COUNTER0 0xC78 +#define ALI_DRW_PMU_COMMON_COUNTERn(n) \ + (ALI_DRW_PMU_COMMON_COUNTER0 + 0x4 * (n)) + +#define ALI_DRW_PMU_OV_INTR_ENABLE_CTL 0xCB8 +#define ALI_DRW_PMU_OV_INTR_DISABLE_CTL 0xCBC +#define ALI_DRW_PMU_OV_INTR_ENABLE_STATUS 0xCC0 +#define ALI_DRW_PMU_OV_INTR_CLR 0xCC4 +#define ALI_DRW_PMU_OV_INTR_STATUS 0xCC8 +#define ALI_DRW_PMCOM_CNT_OV_INTR_MASK GENMASK(23, 8) +#define ALI_DRW_PMBW_CNT_OV_INTR_MASK GENMASK(7, 0) +#define ALI_DRW_PMU_OV_INTR_MASK GENMASK_ULL(63, 0) + +static int ali_drw_cpuhp_state_num; + +static LIST_HEAD(ali_drw_pmu_irqs); +static DEFINE_MUTEX(ali_drw_pmu_irqs_lock); + +struct ali_drw_pmu_irq { + struct hlist_node node; + struct list_head irqs_node; + struct list_head pmus_node; + int irq_num; + int cpu; + refcount_t refcount; +}; + +struct ali_drw_pmu { + void __iomem *cfg_base; + struct device *dev; + + struct list_head pmus_node; + struct ali_drw_pmu_irq *irq; + int irq_num; + int cpu; + DECLARE_BITMAP(used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS); + struct perf_event *events[ALI_DRW_PMU_COMMON_MAX_COUNTERS]; + int evtids[ALI_DRW_PMU_COMMON_MAX_COUNTERS]; + + struct pmu pmu; +}; + +#define to_ali_drw_pmu(p) (container_of(p, struct ali_drw_pmu, pmu)) + +#define DRW_CONFIG_EVENTID GENMASK(7, 0) +#define GET_DRW_EVENTID(event) FIELD_GET(DRW_CONFIG_EVENTID, (event)->attr.config) + +static ssize_t ali_drw_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sprintf(buf, "%s\n", (char *)eattr->var); +} + +/* + * PMU event attributes + */ +static ssize_t ali_drw_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); +} + +#define ALI_DRW_PMU_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ + })[0].attr.attr) + +#define ALI_DRW_PMU_FORMAT_ATTR(_name, _config) \ + ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_format_show, (void *)_config) +#define ALI_DRW_PMU_EVENT_ATTR(_name, _config) \ + ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_event_show, (unsigned long)_config) + +static struct attribute *ali_drw_pmu_events_attrs[] = { + ALI_DRW_PMU_EVENT_ATTR(hif_rd_or_wr, 0x0), + ALI_DRW_PMU_EVENT_ATTR(hif_wr, 0x1), + ALI_DRW_PMU_EVENT_ATTR(hif_rd, 0x2), + ALI_DRW_PMU_EVENT_ATTR(hif_rmw, 0x3), + ALI_DRW_PMU_EVENT_ATTR(hif_hi_pri_rd, 0x4), + ALI_DRW_PMU_EVENT_ATTR(dfi_wr_data_cycles, 0x7), + ALI_DRW_PMU_EVENT_ATTR(dfi_rd_data_cycles, 0x8), + ALI_DRW_PMU_EVENT_ATTR(hpr_xact_when_critical, 0x9), + ALI_DRW_PMU_EVENT_ATTR(lpr_xact_when_critical, 0xA), + ALI_DRW_PMU_EVENT_ATTR(wr_xact_when_critical, 0xB), + ALI_DRW_PMU_EVENT_ATTR(op_is_activate, 0xC), + ALI_DRW_PMU_EVENT_ATTR(op_is_rd_or_wr, 0xD), + ALI_DRW_PMU_EVENT_ATTR(op_is_rd_activate, 0xE), + ALI_DRW_PMU_EVENT_ATTR(op_is_rd, 0xF), + ALI_DRW_PMU_EVENT_ATTR(op_is_wr, 0x10), + ALI_DRW_PMU_EVENT_ATTR(op_is_mwr, 0x11), + ALI_DRW_PMU_EVENT_ATTR(op_is_precharge, 0x12), + ALI_DRW_PMU_EVENT_ATTR(precharge_for_rdwr, 0x13), + ALI_DRW_PMU_EVENT_ATTR(precharge_for_other, 0x14), + ALI_DRW_PMU_EVENT_ATTR(rdwr_transitions, 0x15), + ALI_DRW_PMU_EVENT_ATTR(write_combine, 0x16), + ALI_DRW_PMU_EVENT_ATTR(war_hazard, 0x17), + ALI_DRW_PMU_EVENT_ATTR(raw_hazard, 0x18), + ALI_DRW_PMU_EVENT_ATTR(waw_hazard, 0x19), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk0, 0x1A), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk1, 0x1B), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk2, 0x1C), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk3, 0x1D), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk0, 0x1E), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk1, 0x1F), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk2, 0x20), + ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk3, 0x21), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk0, 0x26), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk1, 0x27), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk2, 0x28), + ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk3, 0x29), + ALI_DRW_PMU_EVENT_ATTR(op_is_refresh, 0x2A), + ALI_DRW_PMU_EVENT_ATTR(op_is_crit_ref, 0x2B), + ALI_DRW_PMU_EVENT_ATTR(op_is_load_mode, 0x2D), + ALI_DRW_PMU_EVENT_ATTR(op_is_zqcl, 0x2E), + ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_rd, 0x30), + ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_wr, 0x31), + ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mpc, 0x34), + ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mrr, 0x35), + ALI_DRW_PMU_EVENT_ATTR(op_is_tcr_mrr, 0x36), + ALI_DRW_PMU_EVENT_ATTR(op_is_zqstart, 0x37), + ALI_DRW_PMU_EVENT_ATTR(op_is_zqlatch, 0x38), + ALI_DRW_PMU_EVENT_ATTR(chi_txreq, 0x39), + ALI_DRW_PMU_EVENT_ATTR(chi_txdat, 0x3A), + ALI_DRW_PMU_EVENT_ATTR(chi_rxdat, 0x3B), + ALI_DRW_PMU_EVENT_ATTR(chi_rxrsp, 0x3C), + ALI_DRW_PMU_EVENT_ATTR(tsz_vio, 0x3D), + ALI_DRW_PMU_EVENT_ATTR(cycle, 0x80), + NULL, +}; + +static struct attribute_group ali_drw_pmu_events_attr_group = { + .name = "events", + .attrs = ali_drw_pmu_events_attrs, +}; + +static struct attribute *ali_drw_pmu_format_attr[] = { + ALI_DRW_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL, +}; + +static const struct attribute_group ali_drw_pmu_format_group = { + .name = "format", + .attrs = ali_drw_pmu_format_attr, +}; + +static ssize_t ali_drw_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(drw_pmu->cpu)); +} + +static struct device_attribute ali_drw_pmu_cpumask_attr = + __ATTR(cpumask, 0444, ali_drw_pmu_cpumask_show, NULL); + +static struct attribute *ali_drw_pmu_cpumask_attrs[] = { + &ali_drw_pmu_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group ali_drw_pmu_cpumask_attr_group = { + .attrs = ali_drw_pmu_cpumask_attrs, +}; + +static const struct attribute_group *ali_drw_pmu_attr_groups[] = { + &ali_drw_pmu_events_attr_group, + &ali_drw_pmu_cpumask_attr_group, + &ali_drw_pmu_format_group, + NULL, +}; + +/* find a counter for event, then in add func, hw.idx will equal to counter */ +static int ali_drw_get_counter_idx(struct perf_event *event) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + int idx; + + for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) { + if (!test_and_set_bit(idx, drw_pmu->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EBUSY; +} + +static u64 ali_drw_pmu_read_counter(struct perf_event *event) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + u64 cycle_high, cycle_low; + + if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) { + cycle_high = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_HIGH); + cycle_high &= ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK; + cycle_low = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_LOW); + cycle_low &= ALI_DRW_PMU_CYCLE_CNT_LOW_MASK; + return (cycle_high << 32 | cycle_low); + } + + return readl(drw_pmu->cfg_base + + ALI_DRW_PMU_COMMON_COUNTERn(event->hw.idx)); +} + +static void ali_drw_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev, now; + + do { + prev = local64_read(&hwc->prev_count); + now = ali_drw_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); + + /* handle overflow. */ + delta = now - prev; + if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) + delta &= ALI_DRW_PMU_OV_INTR_MASK; + else + delta &= ALI_DRW_CNT_MAX_PERIOD; + local64_add(delta, &event->count); +} + +static void ali_drw_pmu_event_set_period(struct perf_event *event) +{ + u64 pre_val; + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + /* set a preload counter for test purpose */ + writel(ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE + event->hw.idx, + drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL); + + /* set conunter initial value */ + pre_val = ALI_DRW_PMU_CNT_INIT; + writel(pre_val, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD); + local64_set(&event->hw.prev_count, pre_val); + + /* set sel mode to zero to start test */ + writel(0x0, drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL); +} + +static void ali_drw_pmu_enable_counter(struct perf_event *event) +{ + u32 val, subval, reg, shift; + int counter = event->hw.idx; + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + reg = ALI_DRW_PMU_EVENT_SELn(counter); + val = readl(drw_pmu->cfg_base + reg); + subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 1) | + FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, drw_pmu->evtids[counter]); + + shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter); + val &= ~(GENMASK(7, 0) << shift); + val |= subval << shift; + + writel(val, drw_pmu->cfg_base + reg); +} + +static void ali_drw_pmu_disable_counter(struct perf_event *event) +{ + u32 val, reg, subval, shift; + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + int counter = event->hw.idx; + + reg = ALI_DRW_PMU_EVENT_SELn(counter); + val = readl(drw_pmu->cfg_base + reg); + subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 0) | + FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, 0); + + shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter); + val &= ~(GENMASK(7, 0) << shift); + val |= subval << shift; + + writel(val, drw_pmu->cfg_base + reg); +} + +static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data) +{ + struct ali_drw_pmu_irq *irq = data; + struct ali_drw_pmu *drw_pmu; + irqreturn_t ret = IRQ_NONE; + + rcu_read_lock(); + list_for_each_entry_rcu(drw_pmu, &irq->pmus_node, pmus_node) { + unsigned long status, clr_status; + struct perf_event *event; + unsigned int idx; + + for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) { + event = drw_pmu->events[idx]; + if (!event) + continue; + ali_drw_pmu_disable_counter(event); + } + + /* common counter intr status */ + status = readl(drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_STATUS); + status = FIELD_GET(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status); + if (status) { + for_each_set_bit(idx, &status, + ALI_DRW_PMU_COMMON_MAX_COUNTERS) { + event = drw_pmu->events[idx]; + if (WARN_ON_ONCE(!event)) + continue; + ali_drw_pmu_event_update(event); + ali_drw_pmu_event_set_period(event); + } + + /* clear common counter intr status */ + clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1); + writel(clr_status, + drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR); + } + + for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) { + event = drw_pmu->events[idx]; + if (!event) + continue; + if (!(event->hw.state & PERF_HES_STOPPED)) + ali_drw_pmu_enable_counter(event); + } + if (status) + ret = IRQ_HANDLED; + } + rcu_read_unlock(); + return ret; +} + +static struct ali_drw_pmu_irq *__ali_drw_pmu_init_irq(struct platform_device + *pdev, int irq_num) +{ + int ret; + struct ali_drw_pmu_irq *irq; + + list_for_each_entry(irq, &ali_drw_pmu_irqs, irqs_node) { + if (irq->irq_num == irq_num + && refcount_inc_not_zero(&irq->refcount)) + return irq; + } + + irq = kzalloc(sizeof(*irq), GFP_KERNEL); + if (!irq) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&irq->pmus_node); + + /* Pick one CPU to be the preferred one to use */ + irq->cpu = smp_processor_id(); + refcount_set(&irq->refcount, 1); + + /* + * FIXME: one of DDRSS Driveway PMU overflow interrupt shares the same + * irq number with MPAM ERR_IRQ. To register DDRSS PMU and MPAM drivers + * successfully, add IRQF_SHARED flag. Howerer, PMU interrupt should not + * share with other component. + */ + ret = devm_request_irq(&pdev->dev, irq_num, ali_drw_pmu_isr, + IRQF_SHARED, dev_name(&pdev->dev), irq); + if (ret < 0) { + dev_err(&pdev->dev, + "Fail to request IRQ:%d ret:%d\n", irq_num, ret); + goto out_free; + } + + ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu)); + if (ret) + goto out_free; + + ret = cpuhp_state_add_instance_nocalls(ali_drw_cpuhp_state_num, + &irq->node); + if (ret) + goto out_free; + + irq->irq_num = irq_num; + list_add(&irq->irqs_node, &ali_drw_pmu_irqs); + + return irq; + +out_free: + kfree(irq); + return ERR_PTR(ret); +} + +static int ali_drw_pmu_init_irq(struct ali_drw_pmu *drw_pmu, + struct platform_device *pdev) +{ + int irq_num; + struct ali_drw_pmu_irq *irq; + + /* Read and init IRQ */ + irq_num = platform_get_irq(pdev, 0); + if (irq_num < 0) + return irq_num; + + mutex_lock(&ali_drw_pmu_irqs_lock); + irq = __ali_drw_pmu_init_irq(pdev, irq_num); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + if (IS_ERR(irq)) + return PTR_ERR(irq); + + drw_pmu->irq = irq; + + mutex_lock(&ali_drw_pmu_irqs_lock); + list_add_rcu(&drw_pmu->pmus_node, &irq->pmus_node); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + return 0; +} + +static void ali_drw_pmu_uninit_irq(struct ali_drw_pmu *drw_pmu) +{ + struct ali_drw_pmu_irq *irq = drw_pmu->irq; + + mutex_lock(&ali_drw_pmu_irqs_lock); + list_del_rcu(&drw_pmu->pmus_node); + + if (!refcount_dec_and_test(&irq->refcount)) { + mutex_unlock(&ali_drw_pmu_irqs_lock); + return; + } + + list_del(&irq->irqs_node); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL)); + cpuhp_state_remove_instance_nocalls(ali_drw_cpuhp_state_num, + &irq->node); + kfree(irq); +} + +static int ali_drw_pmu_event_init(struct perf_event *event) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + struct device *dev = drw_pmu->pmu.dev; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event)) { + dev_err(dev, "Sampling not supported!\n"); + return -EOPNOTSUPP; + } + + if (event->attach_state & PERF_ATTACH_TASK) { + dev_err(dev, "Per-task counter cannot allocate!\n"); + return -EOPNOTSUPP; + } + + event->cpu = drw_pmu->cpu; + if (event->cpu < 0) { + dev_err(dev, "Per-task mode not supported!\n"); + return -EOPNOTSUPP; + } + + if (event->group_leader != event && + !is_software_event(event->group_leader)) { + dev_err(dev, "driveway only allow one event!\n"); + return -EINVAL; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling != event && !is_software_event(sibling)) { + dev_err(dev, "driveway event not allowed!\n"); + return -EINVAL; + } + } + + /* reset all the pmu counters */ + writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + + hwc->idx = -1; + + return 0; +} + +static void ali_drw_pmu_start(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + event->hw.state = 0; + + if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) { + writel(ALI_DRW_PMU_CNT_START, + drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + return; + } + + ali_drw_pmu_event_set_period(event); + if (flags & PERF_EF_RELOAD) { + unsigned long prev_raw_count = + local64_read(&event->hw.prev_count); + writel(prev_raw_count, + drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD); + } + + ali_drw_pmu_enable_counter(event); + + writel(ALI_DRW_PMU_CNT_START, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); +} + +static void ali_drw_pmu_stop(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + if (GET_DRW_EVENTID(event) != ALI_DRW_PMU_CYCLE_EVT_ID) + ali_drw_pmu_disable_counter(event); + + writel(ALI_DRW_PMU_CNT_STOP, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + + ali_drw_pmu_event_update(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int ali_drw_pmu_add(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = -1; + int evtid; + + evtid = GET_DRW_EVENTID(event); + + if (evtid != ALI_DRW_PMU_CYCLE_EVT_ID) { + idx = ali_drw_get_counter_idx(event); + if (idx < 0) + return idx; + drw_pmu->events[idx] = event; + drw_pmu->evtids[idx] = evtid; + } + hwc->idx = idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (flags & PERF_EF_START) + ali_drw_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return 0; +} + +static void ali_drw_pmu_del(struct perf_event *event, int flags) +{ + struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + ali_drw_pmu_stop(event, PERF_EF_UPDATE); + + if (idx >= 0 && idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS) { + drw_pmu->events[idx] = NULL; + drw_pmu->evtids[idx] = 0; + clear_bit(idx, drw_pmu->used_mask); + } + + perf_event_update_userpage(event); +} + +static void ali_drw_pmu_read(struct perf_event *event) +{ + ali_drw_pmu_event_update(event); +} + +static int ali_drw_pmu_probe(struct platform_device *pdev) +{ + struct ali_drw_pmu *drw_pmu; + struct resource *res; + char *name; + int ret; + + drw_pmu = devm_kzalloc(&pdev->dev, sizeof(*drw_pmu), GFP_KERNEL); + if (!drw_pmu) + return -ENOMEM; + + drw_pmu->dev = &pdev->dev; + platform_set_drvdata(pdev, drw_pmu); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res); + if (!drw_pmu->cfg_base) + return -ENOMEM; + + name = devm_kasprintf(drw_pmu->dev, GFP_KERNEL, "ali_drw_%llx", + (u64) (res->start >> ALI_DRW_PMU_PA_SHIFT)); + if (!name) + return -ENOMEM; + + writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL); + + /* enable the generation of interrupt by all common counters */ + writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, + drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_ENABLE_CTL); + + /* clearing interrupt status */ + writel(0xffffff, drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR); + + drw_pmu->cpu = smp_processor_id(); + + ret = ali_drw_pmu_init_irq(drw_pmu, pdev); + if (ret) + return ret; + + drw_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = ali_drw_pmu_event_init, + .add = ali_drw_pmu_add, + .del = ali_drw_pmu_del, + .start = ali_drw_pmu_start, + .stop = ali_drw_pmu_stop, + .read = ali_drw_pmu_read, + .attr_groups = ali_drw_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&drw_pmu->pmu, name, -1); + if (ret) { + dev_err(drw_pmu->dev, "DRW Driveway PMU PMU register failed!\n"); + ali_drw_pmu_uninit_irq(drw_pmu); + } + + return ret; +} + +static int ali_drw_pmu_remove(struct platform_device *pdev) +{ + struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev); + + /* disable the generation of interrupt by all common counters */ + writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, + drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_DISABLE_CTL); + + ali_drw_pmu_uninit_irq(drw_pmu); + perf_pmu_unregister(&drw_pmu->pmu); + + return 0; +} + +static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct ali_drw_pmu_irq *irq; + struct ali_drw_pmu *drw_pmu; + unsigned int target; + int ret; + cpumask_t node_online_cpus; + + irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node); + if (cpu != irq->cpu) + return 0; + + ret = cpumask_and(&node_online_cpus, + cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask); + if (ret) + target = cpumask_any_but(&node_online_cpus, cpu); + else + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target >= nr_cpu_ids) + return 0; + + /* We're only reading, but this isn't the place to be involving RCU */ + mutex_lock(&ali_drw_pmu_irqs_lock); + list_for_each_entry(drw_pmu, &irq->pmus_node, pmus_node) + perf_pmu_migrate_context(&drw_pmu->pmu, irq->cpu, target); + mutex_unlock(&ali_drw_pmu_irqs_lock); + + WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target))); + irq->cpu = target; + + return 0; +} + +/* + * Due to historical reasons, the HID used in the production environment is + * ARMHD700, so we leave ARMHD700 as Compatible ID. + */ +static const struct acpi_device_id ali_drw_acpi_match[] = { + {"BABA5000", 0}, + {"ARMHD700", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, ali_drw_acpi_match); + +static struct platform_driver ali_drw_pmu_driver = { + .driver = { + .name = "ali_drw_pmu", + .acpi_match_table = ali_drw_acpi_match, + }, + .probe = ali_drw_pmu_probe, + .remove = ali_drw_pmu_remove, +}; + +static int __init ali_drw_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "ali_drw_pmu:online", + NULL, ali_drw_pmu_offline_cpu); + + if (ret < 0) { + pr_err("DRW Driveway PMU: setup hotplug failed, ret = %d\n", + ret); + return ret; + } + ali_drw_cpuhp_state_num = ret; + + ret = platform_driver_register(&ali_drw_pmu_driver); + if (ret) + cpuhp_remove_multi_state(ali_drw_cpuhp_state_num); + + return ret; +} + +static void __exit ali_drw_pmu_exit(void) +{ + platform_driver_unregister(&ali_drw_pmu_driver); + cpuhp_remove_multi_state(ali_drw_cpuhp_state_num); +} + +module_init(ali_drw_pmu_init); +module_exit(ali_drw_pmu_exit); + +MODULE_AUTHOR("Hongbo Yao <yaohongbo@linux.alibaba.com>"); +MODULE_AUTHOR("Neng Chen <nengchen@linux.alibaba.com>"); +MODULE_AUTHOR("Shuai Xue <xueshuai@linux.alibaba.com>"); +MODULE_DESCRIPTION("Alibaba DDR Sub-System Driveway PMU driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 59d3980b8ca2..3f07df5a7e95 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -894,7 +894,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags) * pmu::filter_match callback and pmu::event_init group * validation). */ - .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS, + .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS, }; pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index b65a7d9640e1..6ce05ef4844d 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -674,9 +674,9 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) static u64 arm_spe_pmsevfr_res0(u16 pmsver) { switch (pmsver) { - case ID_AA64DFR0_PMSVER_8_2: + case ID_AA64DFR0_EL1_PMSVer_IMP: return SYS_PMSEVFR_EL1_RES0_8_2; - case ID_AA64DFR0_PMSVER_8_3: + case ID_AA64DFR0_EL1_PMSVer_V1P1: /* Return the highest version we support in default */ default: return SYS_PMSEVFR_EL1_RES0_8_3; @@ -958,7 +958,7 @@ static void __arm_spe_pmu_dev_probe(void *info) struct device *dev = &spe_pmu->pdev->dev; fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1), - ID_AA64DFR0_PMSVER_SHIFT); + ID_AA64DFR0_EL1_PMSVer_SHIFT); if (!fld) { dev_err(dev, "unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n", |