diff options
author | Palmer Dabbelt <palmer@rivosinc.com> | 2023-08-16 16:28:26 +0200 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2023-08-23 18:07:28 +0200 |
commit | 7aa7d502e4d5a42353325cb4bf2aa880b10168e9 (patch) | |
tree | be8c3acd23298081b6a3085c0a7e1b2b36a48656 | |
parent | riscv: Require FRAME_POINTER for some configurations (diff) | |
parent | perf: tests: Adapt mmap-basic.c for riscv (diff) | |
download | linux-7aa7d502e4d5a42353325cb4bf2aa880b10168e9.tar.xz linux-7aa7d502e4d5a42353325cb4bf2aa880b10168e9.zip |
Merge patch series "riscv: Allow userspace to directly access perf counters"
Alexandre Ghiti <alexghiti@rivosinc.com> says:
riscv used to allow direct access to cycle/time/instret counters,
bypassing the perf framework, this patchset intends to allow the user to
mmap any counter when accessed through perf.
**Important**: The default mode is now user access through perf only, not
the legacy so some applications will break. However, we introduce a sysctl
perf_user_access like arm64 does, which will allow to switch to the legacy
mode described above.
This version needs openSBI v1.3 *and* a kernel fix that went upstream lately
(https://lore.kernel.org/lkml/20230616114831.3186980-1-maz@kernel.org/T/).
* b4-shazam-merge:
perf: tests: Adapt mmap-basic.c for riscv
tools: lib: perf: Implement riscv mmap support
Documentation: admin-guide: Add riscv sysctl_perf_user_access
drivers: perf: Implement perf event mmap support in the SBI backend
drivers: perf: Implement perf event mmap support in the legacy backend
riscv: Prepare for user-space perf event mmap support
drivers: perf: Rename riscv pmu sbi driver
riscv: Make legacy counter enum match the HW numbering
include: riscv: Fix wrong include guard in riscv_pmu.h
perf: Fix wrong comment about default event_idx
Link: https://lore.kernel.org/r/20230802080328.1213905-1-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
-rw-r--r-- | Documentation/admin-guide/sysctl/kernel.rst | 27 | ||||
-rw-r--r-- | drivers/perf/riscv_pmu.c | 113 | ||||
-rw-r--r-- | drivers/perf/riscv_pmu_legacy.c | 28 | ||||
-rw-r--r-- | drivers/perf/riscv_pmu_sbi.c | 196 | ||||
-rw-r--r-- | include/linux/perf/riscv_pmu.h | 12 | ||||
-rw-r--r-- | include/linux/perf_event.h | 3 | ||||
-rw-r--r-- | tools/lib/perf/mmap.c | 66 | ||||
-rw-r--r-- | tools/perf/tests/mmap-basic.c | 6 |
8 files changed, 431 insertions, 20 deletions
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 3800fab1619b..8019103aac10 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -941,16 +941,35 @@ enabled, otherwise writing to this file will return ``-EBUSY``. The default value is 8. -perf_user_access (arm64 only) -================================= +perf_user_access (arm64 and riscv only) +======================================= + +Controls user space access for reading perf event counters. -Controls user space access for reading perf event counters. When set to 1, -user space can read performance monitor counter registers directly. +arm64 +===== The default value is 0 (access disabled). +When set to 1, user space can read performance monitor counter registers +directly. + See Documentation/arch/arm64/perf.rst for more information. +riscv +===== + +When set to 0, user space access is disabled. + +The default value is 1, user space can read performance monitor counter +registers through perf, any direct access without perf intervention will trigger +an illegal instruction. + +When set to 2, which enables legacy mode (user space has direct access to cycle +and insret CSRs only). Note that this legacy value is deprecated and will be +removed once all user space applications are fixed. + +Note that the time CSR is always directly accessible to all modes. pid_max ======= diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index ebca5eab9c9b..80c052e93f9e 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -14,9 +14,81 @@ #include <linux/perf/riscv_pmu.h> #include <linux/printk.h> #include <linux/smp.h> +#include <linux/sched_clock.h> #include <asm/sbi.h> +static bool riscv_perf_user_access(struct perf_event *event) +{ + return ((event->attr.type == PERF_TYPE_HARDWARE) || + (event->attr.type == PERF_TYPE_HW_CACHE) || + (event->attr.type == PERF_TYPE_RAW)) && + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); +} + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + struct clock_read_data *rd; + unsigned int seq; + u64 ns; + + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; + userpg->cap_user_rdpmc = riscv_perf_user_access(event); + +#ifdef CONFIG_RISCV_PMU + /* + * The counters are 64-bit but the priv spec doesn't mandate all the + * bits to be implemented: that's why, counter width can vary based on + * the cpu vendor. + */ + if (userpg->cap_user_rdpmc) + userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; +#endif + + do { + rd = sched_clock_read_begin(&seq); + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; + + /* + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; + + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (userpg->time_shift == 32) { + userpg->time_shift = 31; + userpg->time_mult >>= 1; + } + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; +} + static unsigned long csr_read_num(int csr_num) { #define switchcase_csr_read(__csr_num, __val) {\ @@ -171,6 +243,8 @@ int riscv_pmu_event_set_period(struct perf_event *event) local64_set(&hwc->prev_count, (u64)-left); + perf_event_update_userpage(event); + return overflow; } @@ -267,6 +341,9 @@ static int riscv_pmu_event_init(struct perf_event *event) hwc->idx = -1; hwc->event_base = mapped_event; + if (rvpmu->event_init) + rvpmu->event_init(event); + if (!is_sampling_event(event)) { /* * For non-sampling runs, limit the sample_period to half @@ -283,6 +360,39 @@ static int riscv_pmu_event_init(struct perf_event *event) return 0; } +static int riscv_pmu_event_idx(struct perf_event *event) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) + return 0; + + if (rvpmu->csr_index) + return rvpmu->csr_index(event) + 1; + + return 0; +} + +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + if (rvpmu->event_mapped) { + rvpmu->event_mapped(event, mm); + perf_event_update_userpage(event); + } +} + +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + if (rvpmu->event_unmapped) { + rvpmu->event_unmapped(event, mm); + perf_event_update_userpage(event); + } +} + struct riscv_pmu *riscv_pmu_alloc(void) { struct riscv_pmu *pmu; @@ -307,6 +417,9 @@ struct riscv_pmu *riscv_pmu_alloc(void) } pmu->pmu = (struct pmu) { .event_init = riscv_pmu_event_init, + .event_mapped = riscv_pmu_event_mapped, + .event_unmapped = riscv_pmu_event_unmapped, + .event_idx = riscv_pmu_event_idx, .add = riscv_pmu_add, .del = riscv_pmu_del, .start = riscv_pmu_start, diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index ca9e20bfc7ac..79fdd667922e 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -13,7 +13,7 @@ #include <linux/platform_device.h> #define RISCV_PMU_LEGACY_CYCLE 0 -#define RISCV_PMU_LEGACY_INSTRET 1 +#define RISCV_PMU_LEGACY_INSTRET 2 static bool pmu_init_done; @@ -71,6 +71,29 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival) local64_set(&hwc->prev_count, initial_val); } +static uint8_t pmu_legacy_csr_index(struct perf_event *event) +{ + return event->hw.idx; +} + +static void pmu_legacy_event_mapped(struct perf_event *event, struct mm_struct *mm) +{ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && + event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) + return; + + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; +} + +static void pmu_legacy_event_unmapped(struct perf_event *event, struct mm_struct *mm) +{ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && + event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) + return; + + event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT; +} + /* * This is just a simple implementation to allow legacy implementations * compatible with new RISC-V PMU driver framework. @@ -91,6 +114,9 @@ static void pmu_legacy_init(struct riscv_pmu *pmu) pmu->ctr_get_width = NULL; pmu->ctr_clear_idx = NULL; pmu->ctr_read = pmu_legacy_read_ctr; + pmu->event_mapped = pmu_legacy_event_mapped; + pmu->event_unmapped = pmu_legacy_event_unmapped; + pmu->csr_index = pmu_legacy_csr_index; perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); } diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 4163ff517471..9a51053b1f99 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -24,6 +24,14 @@ #include <asm/sbi.h> #include <asm/hwcap.h> +#define SYSCTL_NO_USER_ACCESS 0 +#define SYSCTL_USER_ACCESS 1 +#define SYSCTL_LEGACY 2 + +#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS) +#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS) +#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) + PMU_FORMAT_ATTR(event, "config:0-47"); PMU_FORMAT_ATTR(firmware, "config:63"); @@ -43,6 +51,9 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = { NULL, }; +/* Allow user mode access by default */ +static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS; + /* * RISC-V doesn't have heterogeneous harts yet. This need to be part of * per_cpu in case of harts with different pmu counters @@ -301,6 +312,11 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr) } EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info); +static uint8_t pmu_sbi_csr_index(struct perf_event *event) +{ + return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE; +} + static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event) { unsigned long cflags = 0; @@ -329,18 +345,34 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); struct sbiret ret; int idx; - uint64_t cbase = 0; + uint64_t cbase = 0, cmask = rvpmu->cmask; unsigned long cflags = 0; cflags = pmu_sbi_get_filter_flags(event); + + /* + * In legacy mode, we have to force the fixed counters for those events + * but not in the user access mode as we want to use the other counters + * that support sampling/filtering. + */ + if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { + if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { + cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; + cmask = 1; + } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) { + cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; + cmask = 1UL << (CSR_INSTRET - CSR_CYCLE); + } + } + /* retrieve the available counter index */ #if defined(CONFIG_32BIT) ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, - rvpmu->cmask, cflags, hwc->event_base, hwc->config, + cmask, cflags, hwc->event_base, hwc->config, hwc->config >> 32); #else ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, - rvpmu->cmask, cflags, hwc->event_base, hwc->config, 0); + cmask, cflags, hwc->event_base, hwc->config, 0); #endif if (ret.error) { pr_debug("Not able to find a counter for event %lx config %llx\n", @@ -474,6 +506,22 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event) return val; } +static void pmu_sbi_set_scounteren(void *arg) +{ + struct perf_event *event = (struct perf_event *)arg; + + csr_write(CSR_SCOUNTEREN, + csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); +} + +static void pmu_sbi_reset_scounteren(void *arg) +{ + struct perf_event *event = (struct perf_event *)arg; + + csr_write(CSR_SCOUNTEREN, + csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); +} + static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) { struct sbiret ret; @@ -490,6 +538,10 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED)) pr_err("Starting counter idx %d failed with error %d\n", hwc->idx, sbi_err_map_linux_errno(ret.error)); + + if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && + (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) + pmu_sbi_set_scounteren((void *)event); } static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) @@ -497,6 +549,10 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) struct sbiret ret; struct hw_perf_event *hwc = &event->hw; + if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && + (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) + pmu_sbi_reset_scounteren((void *)event); + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && flag != SBI_PMU_STOP_FLAG_RESET) @@ -704,10 +760,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); /* - * Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace, - * as is necessary to maintain uABI compatibility. + * We keep enabling userspace access to CYCLE, TIME and INSTRET via the + * legacy option but that will be removed in the future. */ - csr_write(CSR_SCOUNTEREN, 0x7); + if (sysctl_perf_user_access == SYSCTL_LEGACY) + csr_write(CSR_SCOUNTEREN, 0x7); + else + csr_write(CSR_SCOUNTEREN, 0x2); /* Stop all the counters so that they can be enabled from perf */ pmu_sbi_stop_all(pmu); @@ -838,6 +897,121 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu) cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); } +static void pmu_sbi_event_init(struct perf_event *event) +{ + /* + * The permissions are set at event_init so that we do not depend + * on the sysctl value that can change. + */ + if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS) + event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS; + else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS) + event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS; + else + event->hw.flags |= PERF_EVENT_FLAG_LEGACY; +} + +static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm) +{ + if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS) + return; + + if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) { + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && + event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) { + return; + } + } + + /* + * The user mmapped the event to directly access it: this is where + * we determine based on sysctl_perf_user_access if we grant userspace + * the direct access to this event. That means that within the same + * task, some events may be directly accessible and some other may not, + * if the user changes the value of sysctl_perf_user_accesss in the + * meantime. + */ + + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; + + /* + * We must enable userspace access *before* advertising in the user page + * that it is possible to do so to avoid any race. + * And we must notify all cpus here because threads that currently run + * on other cpus will try to directly access the counter too without + * calling pmu_sbi_ctr_start. + */ + if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS) + on_each_cpu_mask(mm_cpumask(mm), + pmu_sbi_set_scounteren, (void *)event, 1); +} + +static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm) +{ + if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS) + return; + + if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) { + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && + event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) { + return; + } + } + + /* + * Here we can directly remove user access since the user does not have + * access to the user page anymore so we avoid the racy window where the + * user could have read cap_user_rdpmc to true right before we disable + * it. + */ + event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT; + + if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS) + on_each_cpu_mask(mm_cpumask(mm), + pmu_sbi_reset_scounteren, (void *)event, 1); +} + +static void riscv_pmu_update_counter_access(void *info) +{ + if (sysctl_perf_user_access == SYSCTL_LEGACY) + csr_write(CSR_SCOUNTEREN, 0x7); + else + csr_write(CSR_SCOUNTEREN, 0x2); +} + +static int riscv_pmu_proc_user_access_handler(struct ctl_table *table, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + int prev = sysctl_perf_user_access; + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + /* + * Test against the previous value since we clear SCOUNTEREN when + * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should + * not do that if that was already the case. + */ + if (ret || !write || prev == sysctl_perf_user_access) + return ret; + + on_each_cpu(riscv_pmu_update_counter_access, NULL, 1); + + return 0; +} + +static struct ctl_table sbi_pmu_sysctl_table[] = { + { + .procname = "perf_user_access", + .data = &sysctl_perf_user_access, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = riscv_pmu_proc_user_access_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { } +}; + static int pmu_sbi_device_probe(struct platform_device *pdev) { struct riscv_pmu *pmu = NULL; @@ -881,6 +1055,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) pmu->ctr_get_width = pmu_sbi_ctr_get_width; pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx; pmu->ctr_read = pmu_sbi_ctr_read; + pmu->event_init = pmu_sbi_event_init; + pmu->event_mapped = pmu_sbi_event_mapped; + pmu->event_unmapped = pmu_sbi_event_unmapped; + pmu->csr_index = pmu_sbi_csr_index; ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); if (ret) @@ -894,6 +1072,8 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) goto out_unregister; + register_sysctl("kernel", sbi_pmu_sysctl_table); + return 0; out_unregister: @@ -907,7 +1087,7 @@ out_free: static struct platform_driver pmu_sbi_driver = { .probe = pmu_sbi_device_probe, .driver = { - .name = RISCV_PMU_PDEV_NAME, + .name = RISCV_PMU_SBI_PDEV_NAME, }, }; @@ -934,7 +1114,7 @@ static int __init pmu_sbi_devinit(void) if (ret) return ret; - pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0); + pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0); if (IS_ERR(pdev)) { platform_driver_unregister(&pmu_sbi_driver); return PTR_ERR(pdev); diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 43fc892aa7d9..43282e22ebe1 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -6,8 +6,8 @@ * */ -#ifndef _ASM_RISCV_PERF_EVENT_H -#define _ASM_RISCV_PERF_EVENT_H +#ifndef _RISCV_PMU_H +#define _RISCV_PMU_H #include <linux/perf_event.h> #include <linux/ptrace.h> @@ -21,7 +21,7 @@ #define RISCV_MAX_COUNTERS 64 #define RISCV_OP_UNSUPP (-EOPNOTSUPP) -#define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_SBI_PDEV_NAME "riscv-pmu-sbi" #define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" #define RISCV_PMU_STOP_FLAG_RESET 1 @@ -55,6 +55,10 @@ struct riscv_pmu { void (*ctr_start)(struct perf_event *event, u64 init_val); void (*ctr_stop)(struct perf_event *event, unsigned long flag); int (*event_map)(struct perf_event *event, u64 *config); + void (*event_init)(struct perf_event *event); + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm); + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm); + uint8_t (*csr_index)(struct perf_event *event); struct cpu_hw_events __percpu *hw_events; struct hlist_node node; @@ -81,4 +85,4 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); #endif /* CONFIG_RISCV_PMU */ -#endif /* _ASM_RISCV_PERF_EVENT_H */ +#endif /* _RISCV_PMU_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2166a69e3bf2..1269c96bc3b6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -445,7 +445,8 @@ struct pmu { /* * Will return the value for perf_event_mmap_page::index for this event, - * if no implementation is provided it will default to: event->hw.idx + 1. + * if no implementation is provided it will default to 0 (see + * perf_event_idx_default). */ int (*event_idx) (struct perf_event *event); /*optional */ diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 0d1634cedf44..2184814b37dd 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -392,6 +392,72 @@ static u64 read_perf_counter(unsigned int counter) static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } +/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */ +#elif defined(__riscv) && __riscv_xlen == 64 + +/* TODO: implement rv32 support */ + +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, %1" \ + : "=r" (__v) \ + : "i" (csr) : ); \ + __v; \ +}) + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + default: + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static u64 read_perf_counter(unsigned int counter) +{ + return csr_read_num(CSR_CYCLE + counter); +} + +static u64 read_timestamp(void) +{ + return csr_read_num(CSR_TIME); +} + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index e68ca6229756..886a13a77a16 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -284,7 +284,8 @@ static struct test_case tests__basic_mmap[] = { "permissions"), TEST_CASE_REASON("User space counter reading of instructions", mmap_user_read_instr, -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64) "permissions" #else "unsupported" @@ -292,7 +293,8 @@ static struct test_case tests__basic_mmap[] = { ), TEST_CASE_REASON("User space counter reading of cycles", mmap_user_read_cycles, -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64) "permissions" #else "unsupported" |