From e8cde32f111f7f5681a7bad3ec747e9e697569a9 Mon Sep 17 00:00:00 2001 From: Nianyao Tang Date: Tue, 11 Jun 2024 12:20:49 +0000 Subject: arm64/cpufeatures/kvm: Add ARMv8.9 FEAT_ECBHB bits in ID_AA64MMFR1 register Enable ECBHB bits in ID_AA64MMFR1 register as per ARM DDI 0487K.a specification. When guest OS read ID_AA64MMFR1_EL1, kvm emulate this reg using ftr_id_aa64mmfr1 and always return ID_AA64MMFR1_EL1.ECBHB=0 to guest. It results in guest syscall jump to tramp ventry, which is not needed in implementation with ID_AA64MMFR1_EL1.ECBHB=1. Let's make the guest syscall process the same as the host. Signed-off-by: Nianyao Tang Link: https://lore.kernel.org/r/20240611122049.2758600-1-tangnianyao@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 48e7029f1054..c3913c90797e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -429,6 +429,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), -- cgit v1.2.3 From 7647e2b109f4d508fcb35bb8089a27c4fdd81f61 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 3 May 2024 17:18:46 +0000 Subject: arm64/arch_timer: include arch_timer.h includes linux/smp.h since the commit: 6acc71ccac7187fc ("arm64: arch_timer: Allows a CPU-specific erratum to only affect a subset of CPUs") It was included to use DEFINE_PER_CPU(), etc. But It should have included rather than . It worked because smp.h includes percpu.h. The next commit will remove percpu.h from smp.h and it will break this usage. Explicitly include percpu.h and remove smp.h Signed-off-by: Puranjay Mohan Acked-by: Mark Rutland Reviewed-by: Stephen Boyd Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20240503171847.68267-1-puranjay@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/arch_timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 934c658ee947..f5794d50f51d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From bf0baa5bbdc9b99ea081d360f245e5f96e835612 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 3 May 2024 17:18:47 +0000 Subject: arm64: implement raw_smp_processor_id() using thread_info Historically, arm64 implemented raw_smp_processor_id() as a read of current_thread_info()->cpu. This changed when arm64 moved thread_info into task struct, as at the time CONFIG_THREAD_INFO_IN_TASK made core code use thread_struct::cpu for the cpu number, and due to header dependencies prevented using this in raw_smp_processor_id(). As a workaround, we moved to using a percpu variable in commit: 57c82954e77fa12c ("arm64: make cpu number a percpu variable") Since then, thread_info::cpu was reintroduced, and core code was made to use this in commits: 001430c1910df65a ("arm64: add CPU field to struct thread_info") bcf9033e5449bdca ("sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y") Consequently it is possible to use current_thread_info()->cpu again. This decreases the number of emitted instructions like in the following example: Dump of assembler code for function bpf_get_smp_processor_id: 0xffff8000802cd608 <+0>: nop 0xffff8000802cd60c <+4>: nop 0xffff8000802cd610 <+8>: adrp x0, 0xffff800082138000 0xffff8000802cd614 <+12>: mrs x1, tpidr_el1 0xffff8000802cd618 <+16>: add x0, x0, #0x8 0xffff8000802cd61c <+20>: ldrsw x0, [x0, x1] 0xffff8000802cd620 <+24>: ret After this patch: Dump of assembler code for function bpf_get_smp_processor_id: 0xffff8000802c9130 <+0>: nop 0xffff8000802c9134 <+4>: nop 0xffff8000802c9138 <+8>: mrs x0, sp_el0 0xffff8000802c913c <+12>: ldr w0, [x0, #24] 0xffff8000802c9140 <+16>: ret A microbenchmark[1] was built to measure the performance improvement provided by this change. It calls the following function given number of times and finds the runtime overhead: static noinline int get_cpu_id(void) { return smp_processor_id(); } Run the benchmark like: modprobe smp_processor_id nr_function_calls=1000000000 +--------------------------+------------------------+ | | Number of Calls | Time taken | +--------+-----------------+------------------------+ | Before | 1000000000 | 1602888401ns | +--------+-----------------+------------------------+ | After | 1000000000 | 1206212658ns | +--------+-----------------+------------------------+ | Difference (decrease) | 396675743ns (24.74%) | +---------------------------------------------------+ Remove the percpu variable cpu_number as it is used only in set_smp_ipi_range() as a dummy variable to be passed to ipi_handler(). Use irq_stat in place of cpu_number here like arm32. [1] https://github.com/puranjaymohan/linux/commit/77d3fdd Signed-off-by: Puranjay Mohan Acked-by: Mark Rutland Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20240503171847.68267-2-puranjay@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/smp.h | 13 +------------ arch/arm64/kernel/smp.c | 9 ++------- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index efb13112b408..2510eec026f7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -25,22 +25,11 @@ #ifndef __ASSEMBLY__ -#include - #include #include #include -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); - -/* - * We don't use this_cpu_read(cpu_number) as that has implicit writes to - * preempt_count, and associated (compiler) barriers, that we'd like to avoid - * the expense of. If we're preemptible, the value can be stale at use anyway. - * And we can't use this_cpu_ptr() either, as that winds up recursing back - * here under CONFIG_DEBUG_PREEMPT=y. - */ -#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) +#define raw_smp_processor_id() (current_thread_info()->cpu) /* * Logical CPU mapping. diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 31c8b3094dd7..753360ce72f6 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -55,9 +55,6 @@ #include -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -749,8 +746,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) */ for_each_possible_cpu(cpu) { - per_cpu(cpu_number, cpu) = cpu; - if (cpu == smp_processor_id()) continue; @@ -1028,12 +1023,12 @@ void __init set_smp_ipi_range(int ipi_base, int n) if (ipi_should_be_nmi(i)) { err = request_percpu_nmi(ipi_base + i, ipi_handler, - "IPI", &cpu_number); + "IPI", &irq_stat); WARN(err, "Could not request IPI %d as NMI, err=%d\n", i, err); } else { err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &cpu_number); + "IPI", &irq_stat); WARN(err, "Could not request IPI %d as IRQ, err=%d\n", i, err); } -- cgit v1.2.3 From d5859510d35d8e7d63fed5169f1775317f40fb03 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 21 May 2024 19:18:26 +0100 Subject: kselftest/arm64: Include kernel mode NEON in fp-stress Currently fp-stress only covers userspace use of floating point, it does not cover any kernel mode uses. Since currently kernel mode floating point usage can't be preempted and there are explicit preemption points in the existing implementations this isn't so important for fp-stress but when we readd preemption it will be good to try to exercise it. When the arm64 accelerated crypto operations are implemented we can relatively straightforwardly trigger kernel mode floating point usage by using the crypto userspace API to hash data, using the splice() support in an effort to minimise copying. We use /proc/crypto to check which accelerated implementations are available, picking the first symmetric hash we find. We run the kernel mode test unconditionally, replacing the second copy of the FPSIMD testcase for systems with FPSIMD only. If we don't think there are any suitable kernel mode implementations we fall back to running another copy of fpsimd-stress. There are a number issues with this approach, we don't actually verify that we are using an accelerated (or even CPU) implementation of the algorithm being tested and even with attempting to use splice() to minimise copying there are sizing limits on how much data gets spliced at once. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240521-arm64-fp-stress-kernel-v1-1-e38f107baad4@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/.gitignore | 1 + tools/testing/selftests/arm64/fp/Makefile | 1 + tools/testing/selftests/arm64/fp/fp-stress.c | 26 +- tools/testing/selftests/arm64/fp/kernel-test.c | 324 +++++++++++++++++++++++++ 4 files changed, 343 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/arm64/fp/kernel-test.c diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index 00e52c966281..8362e7ec35ad 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -2,6 +2,7 @@ fp-pidbench fp-ptrace fp-stress fpsimd-test +kernel-test rdvl-sme rdvl-sve sve-probe-vls diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 55d4f00d9e8e..d171021e4cdd 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -12,6 +12,7 @@ TEST_GEN_PROGS := \ vec-syscfg \ za-fork za-ptrace TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ + kernel-test \ rdvl-sme rdvl-sve \ sve-test \ ssve-test \ diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index dd31647b00a2..faac24bdefeb 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -319,6 +319,19 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy) ksft_print_msg("Started %s\n", child->name); } +static void start_kernel(struct child_data *child, int cpu, int copy) +{ + int ret; + + ret = asprintf(&child->name, "KERNEL-%d-%d", cpu, copy); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + child_start(child, "./kernel-test"); + + ksft_print_msg("Started %s\n", child->name); +} + static void start_sve(struct child_data *child, int vl, int cpu) { int ret; @@ -438,7 +451,7 @@ int main(int argc, char **argv) int ret; int timeout = 10; int cpus, i, j, c; - int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + int sve_vl_count, sme_vl_count; bool all_children_started = false; int seen_children; int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; @@ -482,12 +495,7 @@ int main(int argc, char **argv) have_sme2 = false; } - /* Force context switching if we only have FPSIMD */ - if (!sve_vl_count && !sme_vl_count) - fpsimd_per_cpu = 2; - else - fpsimd_per_cpu = 1; - tests += cpus * fpsimd_per_cpu; + tests += cpus * 2; ksft_print_header(); ksft_set_plan(tests); @@ -542,8 +550,8 @@ int main(int argc, char **argv) tests); for (i = 0; i < cpus; i++) { - for (j = 0; j < fpsimd_per_cpu; j++) - start_fpsimd(&children[num_children++], i, j); + start_fpsimd(&children[num_children++], i, 0); + start_kernel(&children[num_children++], i, 0); for (j = 0; j < sve_vl_count; j++) start_sve(&children[num_children++], sve_vls[j], i); diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c new file mode 100644 index 000000000000..50db26a3ed79 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 ARM Limited. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#define DATA_SIZE (16 * 4096) + +static int base, sock; + +static int digest_len; +static char *ref; +static char *digest; +static char *alg_name; + +static struct iovec data_iov; +static int zerocopy[2]; +static int sigs; +static int iter; + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + printf("Terminated by signal %d, iterations=%d, signals=%d\n", + sig, iter, sigs); + exit(0); +} + +static void handle_kick_signal(int sig, siginfo_t *info, void *context) +{ + sigs++; +} + +static char *drivers[] = { + "crct10dif-arm64-ce", + /* "crct10dif-arm64-neon", - Same priority as generic */ + "sha1-ce", + "sha224-arm64", + "sha224-arm64-neon", + "sha224-ce", + "sha256-arm64", + "sha256-arm64-neon", + "sha256-ce", + "sha384-ce", + "sha512-ce", + "sha3-224-ce", + "sha3-256-ce", + "sha3-384-ce", + "sha3-512-ce", + "sm3-ce", + "sm3-neon", +}; + +static bool create_socket(void) +{ + FILE *proc; + struct sockaddr_alg addr; + char buf[1024]; + char *c, *driver_name; + bool is_shash, match; + int ret, i; + + ret = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (ret < 0) { + if (errno == EAFNOSUPPORT) { + printf("AF_ALG not supported\n"); + return false; + } + + printf("Failed to create AF_ALG socket: %s (%d)\n", + strerror(errno), errno); + return false; + } + base = ret; + + memset(&addr, 0, sizeof(addr)); + addr.salg_family = AF_ALG; + strncpy((char *)addr.salg_type, "hash", sizeof(addr.salg_type)); + + proc = fopen("/proc/crypto", "r"); + if (!proc) { + printf("Unable to open /proc/crypto\n"); + return false; + } + + driver_name = NULL; + is_shash = false; + match = false; + + /* Look through /proc/crypto for a driver with kernel mode FP usage */ + while (!match) { + c = fgets(buf, sizeof(buf), proc); + if (!c) { + if (feof(proc)) { + printf("Nothing found in /proc/crypto\n"); + return false; + } + continue; + } + + /* Algorithm descriptions are separated by a blank line */ + if (*c == '\n') { + if (is_shash && driver_name) { + for (i = 0; i < ARRAY_SIZE(drivers); i++) { + if (strcmp(drivers[i], + driver_name) == 0) { + match = true; + } + } + } + + if (!match) { + digest_len = 0; + + free(driver_name); + driver_name = NULL; + + free(alg_name); + alg_name = NULL; + + is_shash = false; + } + continue; + } + + /* Remove trailing newline */ + c = strchr(buf, '\n'); + if (c) + *c = '\0'; + + /* Find the field/value separator and start of the value */ + c = strchr(buf, ':'); + if (!c) + continue; + c += 2; + + if (strncmp(buf, "digestsize", strlen("digestsize")) == 0) + sscanf(c, "%d", &digest_len); + + if (strncmp(buf, "name", strlen("name")) == 0) + alg_name = strdup(c); + + if (strncmp(buf, "driver", strlen("driver")) == 0) + driver_name = strdup(c); + + if (strncmp(buf, "type", strlen("type")) == 0) + if (strncmp(c, "shash", strlen("shash")) == 0) + is_shash = true; + } + + strncpy((char *)addr.salg_name, alg_name, + sizeof(addr.salg_name) - 1); + + ret = bind(base, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + printf("Failed to bind %s: %s (%d)\n", + addr.salg_name, strerror(errno), errno); + return false; + } + + ret = accept(base, NULL, 0); + if (ret < 0) { + printf("Failed to accept %s: %s (%d)\n", + addr.salg_name, strerror(errno), errno); + return false; + } + + sock = ret; + + ret = pipe(zerocopy); + if (ret != 0) { + printf("Failed to create zerocopy pipe: %s (%d)\n", + strerror(errno), errno); + return false; + } + + ref = malloc(digest_len); + if (!ref) { + printf("Failed to allocated %d byte reference\n", digest_len); + return false; + } + + digest = malloc(digest_len); + if (!digest) { + printf("Failed to allocated %d byte digest\n", digest_len); + return false; + } + + return true; +} + +static bool compute_digest(void *buf) +{ + struct iovec iov; + int ret, wrote; + + iov = data_iov; + while (iov.iov_len) { + ret = vmsplice(zerocopy[1], &iov, 1, SPLICE_F_GIFT); + if (ret < 0) { + printf("Failed to send buffer: %s (%d)\n", + strerror(errno), errno); + return false; + } + + wrote = ret; + ret = splice(zerocopy[0], NULL, sock, NULL, wrote, 0); + if (ret < 0) { + printf("Failed to splice buffer: %s (%d)\n", + strerror(errno), errno); + } else if (ret != wrote) { + printf("Short splice: %d < %d\n", ret, wrote); + } + + iov.iov_len -= wrote; + iov.iov_base += wrote; + } + +reread: + ret = recv(sock, buf, digest_len, 0); + if (ret == 0) { + printf("No disgest returned\n"); + return false; + } + if (ret != digest_len) { + if (errno == -EAGAIN) + goto reread; + printf("Failed to get digest: %s (%d)\n", + strerror(errno), errno); + return false; + } + + return true; +} + +int main(void) +{ + char *data; + struct sigaction sa; + int ret; + + /* Ensure we have unbuffered output */ + setvbuf(stdout, NULL, _IOLBF, 0); + + /* The parent will communicate with us via signals */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + + sa.sa_sigaction = handle_kick_signal; + ret = sigaction(SIGUSR2, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGUSR2 handler: %s (%d)\n", + strerror(errno), errno); + + data = malloc(DATA_SIZE); + if (!data) { + printf("Failed to allocate data buffer\n"); + return EXIT_FAILURE; + } + memset(data, 0, DATA_SIZE); + + data_iov.iov_base = data; + data_iov.iov_len = DATA_SIZE; + + /* + * If we can't create a socket assume it's a lack of system + * support and fall back to a basic FPSIMD test for the + * benefit of fp-stress. + */ + if (!create_socket()) { + execl("./fpsimd-test", "./fpsimd-test", NULL); + printf("Failed to fall back to fspimd-test: %d (%s)\n", + errno, strerror(errno)); + return EXIT_FAILURE; + } + + /* + * Compute a reference digest we hope is repeatable, we do + * this at runtime partly to make it easier to play with + * parameters. + */ + if (!compute_digest(ref)) { + printf("Failed to compute reference digest\n"); + return EXIT_FAILURE; + } + + printf("AF_ALG using %s\n", alg_name); + + while (true) { + if (!compute_digest(digest)) { + printf("Failed to coempute digest, iter=%d\n", iter); + return EXIT_FAILURE; + } + + if (memcmp(ref, digest, digest_len) != 0) { + printf("Digest mismatch, iter=%d\n", iter); + return EXIT_FAILURE; + } + + iter++; + } + + return EXIT_FAILURE; +} -- cgit v1.2.3 From 031d1f20d5db496a4fc980a59a7c6de83b8a5a76 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 5 Jun 2024 17:24:48 +0530 Subject: kselftest/arm64: Fix redundancy of a testcase Currently, we are writing the same value as we read into the TLS register, hence we cannot confirm update of the register, making the testcase "verify_tpidr_one" redundant. Fix this. Signed-off-by: Dev Jain Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20240605115448.640717-1-dev.jain@arm.com [catalin.marinas@arm.com: remove the increment style change] Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index abe4d58d731d..4c941270d8de 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -47,7 +47,7 @@ static void test_tpidr(pid_t child) /* ...write a new value.. */ write_iov.iov_len = sizeof(uint64_t); - write_val[0] = read_val[0]++; + write_val[0] = read_val[0] + 1; ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov); ksft_test_result(ret == 0, "write_tpidr_one\n"); -- cgit v1.2.3 From 26ca4423604f15930d96088dc5238f29dc11d5bc Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 28 May 2024 15:51:30 -0700 Subject: arm64: mte: Make mte_check_tfsr_*() conditional on KASAN instead of MTE The check in mte_check_tfsr_el1() is only necessary if HW tag based KASAN is enabled. However, we were also executing the check if MTE is enabled and KASAN is enabled at build time but disabled at runtime. This turned out to cause a measurable increase in power consumption on a specific microarchitecture after enabling MTE. Moreover, on the same system, an increase in invalid syscall latency (as measured by [1]) of around 20-30% (depending on the cluster) was observed after enabling MTE; this almost entirely goes away after removing this check. Therefore, make the check conditional on whether KASAN is enabled rather than on whether MTE is enabled. [1] https://lore.kernel.org/all/CAMn1gO4MwRV8bmFJ_SeY5tsYNPn2ZP56LjAhafygjFaKuu5ouw@mail.gmail.com/ Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I22d98d1483dd400a95595946552b769a5a1ad7bd Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20240528225131.3577704-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 91fbd5c8a391..0f84518632b4 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -182,7 +182,7 @@ void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; mte_check_tfsr_el1(); @@ -190,7 +190,7 @@ static inline void mte_check_tfsr_entry(void) static inline void mte_check_tfsr_exit(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; /* -- cgit v1.2.3 From be5a6f238700f38b534456608588723fba96c5ab Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 3 Jun 2024 12:18:08 +0100 Subject: arm64: cputype: Add Cortex-X3 definitions Add cputype definitions for Cortex-X3. These will be used for errata detection in subsequent patches. These values can be found in Table A-263 ("MIDR_EL1 bit descriptions") in issue 07 of the Cortex-X3 TRM, which can be found at: https://developer.arm.com/documentation/101593/0102/?lang=en Signed-off-by: Mark Rutland Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20240603111812.1514101-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7b32b99023a2..72fe207403c8 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 @@ -162,6 +163,7 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) -- cgit v1.2.3 From add332c40328cf06fe35e4b3cde8ec315c4629e5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 3 Jun 2024 12:18:09 +0100 Subject: arm64: cputype: Add Cortex-A720 definitions Add cputype definitions for Cortex-A720. These will be used for errata detection in subsequent patches. These values can be found in Table A-186 ("MIDR_EL1 bit descriptions") in issue 0002-05 of the Cortex-A720 TRM, which can be found at: https://developer.arm.com/documentation/102530/0002/?lang=en Signed-off-by: Mark Rutland Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20240603111812.1514101-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 72fe207403c8..dcbac1ce6c25 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -88,6 +88,7 @@ #define ARM_CPU_PART_CORTEX_A78C 0xD4B #define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 @@ -165,6 +166,7 @@ #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) -- cgit v1.2.3 From fd2ff5f0b320f418288e7a1f919f648fbc8a0dfc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 3 Jun 2024 12:18:10 +0100 Subject: arm64: cputype: Add Cortex-X925 definitions Add cputype definitions for Cortex-X925. These will be used for errata detection in subsequent patches. These values can be found in Table A-285 ("MIDR_EL1 bit descriptions") in issue 0001-05 of the Cortex-X925 TRM, which can be found at: https://developer.arm.com/documentation/102807/0001/?lang=en Signed-off-by: Mark Rutland Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20240603111812.1514101-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index dcbac1ce6c25..1cb0704c6163 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -91,6 +91,7 @@ #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -169,6 +170,7 @@ #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) -- cgit v1.2.3 From ec768766608092087dfb5c1fc45a16a6f524dee2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 3 Jun 2024 12:18:11 +0100 Subject: arm64: errata: Unify speculative SSBS errata logic Cortex-X4 erratum 3194386 and Neoverse-V3 erratum 3312417 are identical, with duplicate Kconfig text and some unsightly ifdeffery. While we try to share code behind CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS, having separate options results in a fair amount of boilerplate code, and this will only get worse as we expand the set of affected CPUs. To reduce this boilerplate, unify the two behind a common Kconfig option. This removes the duplicate text and Kconfig logic, and removes the need for the intermediate ARM64_WORKAROUND_SPECULATIVE_SSBS option. The set of affected CPUs is described as a list so that this can easily be extended. I've used ARM64_ERRATUM_3194386 (matching the Neoverse-V3 erratum ID) as the common option, matching the way we use ARM64_ERRATUM_1319367 to cover Cortex-A57 erratum 1319537 and Cortex-A72 erratum 1319367. Signed-off-by: Mark Rutland Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20240603111812.1514101-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/silicon-errata.rst | 2 +- arch/arm64/Kconfig | 29 ++++------------------------- arch/arm64/include/asm/cpucaps.h | 2 +- arch/arm64/kernel/cpu_errata.c | 8 ++------ arch/arm64/kernel/proton-pack.c | 2 +- 5 files changed, 9 insertions(+), 34 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index eb8af8032c31..59ee2832406c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -158,7 +158,7 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #1619801 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3312417 | +| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d91259ee7b5..fb31ff9151b9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1067,34 +1067,14 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. -config ARM64_WORKAROUND_SPECULATIVE_SSBS - bool - config ARM64_ERRATUM_3194386 - bool "Cortex-X4: 3194386: workaround for MSR SSBS not self-synchronizing" - select ARM64_WORKAROUND_SPECULATIVE_SSBS + bool "Cortex-X4/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" default y help - This option adds the workaround for ARM Cortex-X4 erratum 3194386. + This option adds the workaround for the following errata: - On affected cores "MSR SSBS, #0" instructions may not affect - subsequent speculative instructions, which may permit unexepected - speculative store bypassing. - - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. - - If unsure, say Y. - -config ARM64_ERRATUM_3312417 - bool "Neoverse-V3: 3312417: workaround for MSR SSBS not self-synchronizing" - select ARM64_WORKAROUND_SPECULATIVE_SSBS - default y - help - This option adds the workaround for ARM Neoverse-V3 erratum 3312417. + * ARM Cortex-X4 erratum 3194386 + * ARM Neoverse-V3 erratum 3312417 On affected cores "MSR SSBS, #0" instructions may not affect subsequent speculative instructions, which may permit unexepected @@ -1108,7 +1088,6 @@ config ARM64_ERRATUM_3312417 If unsure, say Y. - config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 7529c0263933..a6e5b07b64fd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -59,7 +59,7 @@ cpucap_is_possible(const unsigned int cap) case ARM64_WORKAROUND_REPEAT_TLBI: return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI); case ARM64_WORKAROUND_SPECULATIVE_SSBS: - return IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS); + return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386); } return true; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 828be635e7e1..5fbe14dc607f 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -432,14 +432,10 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { }; #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS -static const struct midr_range erratum_spec_ssbs_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 +static const struct midr_range erratum_spec_ssbs_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), -#endif -#ifdef CONFIG_ARM64_ERRATUM_3312417 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), -#endif {} }; #endif @@ -741,7 +737,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), }, #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS +#ifdef CONFIG_ARM64_ERRATUM_3194386 { .desc = "ARM errata 3194386, 3312417", .capability = ARM64_WORKAROUND_SPECULATIVE_SSBS, diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index baca47bd443c..da53722f95d4 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -567,7 +567,7 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) * Mitigate this with an unconditional speculation barrier, as CPUs * could mis-speculate branches and bypass a conditional barrier. */ - if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS)) + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386)) spec_bar(); return SPECTRE_MITIGATED; -- cgit v1.2.3 From 75b3c43eab594bfbd8184ec8ee1a6b820950819a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 3 Jun 2024 12:18:12 +0100 Subject: arm64: errata: Expand speculative SSBS workaround A number of Arm Ltd CPUs suffer from errata whereby an MSR to the SSBS special-purpose register does not affect subsequent speculative instructions, permitting speculative store bypassing for a window of time. We worked around this for Cortex-X4 and Neoverse-V3, in commit: 7187bb7d0b5c7dfa ("arm64: errata: Add workaround for Arm errata 3194386 and 3312417") ... as per their Software Developer Errata Notice (SDEN) documents: * Cortex-X4 SDEN v8.0, erratum 3194386: https://developer.arm.com/documentation/SDEN-2432808/0800/ * Neoverse-V3 SDEN v6.0, erratum 3312417: https://developer.arm.com/documentation/SDEN-2891958/0600/ Since then, similar errata have been published for a number of other Arm Ltd CPUs, for which the mitigation is the same. This is described in their respective SDEN documents: * Cortex-A710 SDEN v19.0, errataum 3324338 https://developer.arm.com/documentation/SDEN-1775101/1900/?lang=en * Cortex-A720 SDEN v11.0, erratum 3456091 https://developer.arm.com/documentation/SDEN-2439421/1100/?lang=en * Cortex-X2 SDEN v19.0, erratum 3324338 https://developer.arm.com/documentation/SDEN-1775100/1900/?lang=en * Cortex-X3 SDEN v14.0, erratum 3324335 https://developer.arm.com/documentation/SDEN-2055130/1400/?lang=en * Cortex-X925 SDEN v8.0, erratum 3324334 https://developer.arm.com/documentation/109108/800/?lang=en * Neoverse-N2 SDEN v17.0, erratum 3324339 https://developer.arm.com/documentation/SDEN-1982442/1700/?lang=en * Neoverse-V2 SDEN v9.0, erratum 3324336 https://developer.arm.com/documentation/SDEN-2332927/900/?lang=en Note that due to shared design lineage, some CPUs share the same erratum number. Add these to the existing mitigation under CONFIG_ARM64_ERRATUM_3194386. As listing all of the erratum IDs in the runtime description would be unwieldy, this is reduced to: "SSBS not fully self-synchronizing" ... matching the description of the errata in all of the SDENs. Signed-off-by: Mark Rutland Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20240603111812.1514101-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/silicon-errata.rst | 14 ++++++++++++++ arch/arm64/Kconfig | 9 ++++++++- arch/arm64/kernel/cpu_errata.c | 9 ++++++++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 59ee2832406c..bb83c5d8c675 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -132,16 +132,26 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #3324338 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1 | #1502854 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #3324338 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X3 | #3324335 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X4 | #3194386 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X925 | #3324334 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -156,8 +166,12 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #3324339 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #1619801 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fb31ff9151b9..f580f5af4a51 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1068,12 +1068,19 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. config ARM64_ERRATUM_3194386 - bool "Cortex-X4/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" + bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" default y help This option adds the workaround for the following errata: + * ARM Cortex-A710 erratam 3324338 + * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-X2 erratum 3324338 + * ARM Cortex-X3 erratum 3324335 * ARM Cortex-X4 erratum 3194386 + * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 On affected cores "MSR SSBS, #0" instructions may not affect diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5fbe14dc607f..617424b73f8c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -434,8 +434,15 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), {} }; #endif @@ -739,7 +746,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_3194386 { - .desc = "ARM errata 3194386, 3312417", + .desc = "SSBS not fully self-synchronizing", .capability = ARM64_WORKAROUND_SPECULATIVE_SSBS, ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list), }, -- cgit v1.2.3 From f8c6c92790dd305f4609c6d813c95ef125593774 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 5 Jun 2024 14:14:56 +0100 Subject: ACPI: arm64: Sort entries alphabetically Sort the entries in the Makefile alphabetically. Signed-off-by: Sudeep Holla Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20240605131458.3341095-2-sudeep.holla@arm.com Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 726944648c9b..7d7fd6512bca 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_AGDI) += agdi.o -obj-$(CONFIG_ACPI_IORT) += iort.o -obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_APMT) += apmt.o +obj-$(CONFIG_ACPI_GTDT) += gtdt.o +obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ARM_AMBA) += amba.o obj-y += dma.o init.o obj-y += thermal_cpufreq.o -- cgit v1.2.3 From 99e7a8adc0ca906151f5d70ff68b8a81f53fd106 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 5 Jun 2024 14:14:57 +0100 Subject: arm64: cpuidle: Move ACPI specific code into drivers/acpi/arm64/ The ACPI cpuidle LPI FFH code can be moved out of arm64 arch code as it just uses SMCCC. Move all the ACPI cpuidle LPI FFH code into drivers/acpi/arm64/cpuidle.c Signed-off-by: Sudeep Holla Acked-by: Catalin Marinas Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20240605131458.3341095-3-sudeep.holla@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/Makefile | 1 - arch/arm64/kernel/cpuidle.c | 74 -------------------------------------------- drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/cpuidle.c | 70 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 75 deletions(-) delete mode 100644 arch/arm64/kernel/cpuidle.c create mode 100644 drivers/acpi/arm64/cpuidle.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 763824963ed1..2b112f3b7510 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -46,7 +46,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c deleted file mode 100644 index f372295207fb..000000000000 --- a/arch/arm64/kernel/cpuidle.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ARM64 CPU idle arch support - * - * Copyright (C) 2014 ARM Ltd. - * Author: Lorenzo Pieralisi - */ - -#include -#include -#include -#include - -#ifdef CONFIG_ACPI_PROCESSOR_IDLE - -#include - -#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) - -static int psci_acpi_cpu_init_idle(unsigned int cpu) -{ - int i, count; - struct acpi_lpi_state *lpi; - struct acpi_processor *pr = per_cpu(processors, cpu); - - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - count = pr->power.count - 1; - if (count <= 0) - return -ENODEV; - - for (i = 0; i < count; i++) { - u32 state; - - lpi = &pr->power.lpi_states[i + 1]; - /* - * Only bits[31:0] represent a PSCI power_state while - * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification - */ - state = lpi->address; - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - return -EINVAL; - } - } - - return 0; -} - -int acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return psci_acpi_cpu_init_idle(cpu); -} - -__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) -{ - u32 state = lpi->address; - - if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter, - lpi->index, state); - else - return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, - lpi->index, state); -} -#endif diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 7d7fd6512bca..2efee23f00b4 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_ACPI_AGDI) += agdi.o obj-$(CONFIG_ACPI_APMT) += apmt.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_IORT) += iort.o +obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o obj-$(CONFIG_ARM_AMBA) += amba.o obj-y += dma.o init.o obj-y += thermal_cpufreq.o diff --git a/drivers/acpi/arm64/cpuidle.c b/drivers/acpi/arm64/cpuidle.c new file mode 100644 index 000000000000..801f9c450142 --- /dev/null +++ b/drivers/acpi/arm64/cpuidle.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ARM64 CPU idle arch support + * + * Copyright (C) 2014 ARM Ltd. + * Author: Lorenzo Pieralisi + */ + +#include +#include +#include +#include +#include + +#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) + +static int psci_acpi_cpu_init_idle(unsigned int cpu) +{ + int i, count; + struct acpi_lpi_state *lpi; + struct acpi_processor *pr = per_cpu(processors, cpu); + + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out + */ + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + + count = pr->power.count - 1; + if (count <= 0) + return -ENODEV; + + for (i = 0; i < count; i++) { + u32 state; + + lpi = &pr->power.lpi_states[i + 1]; + /* + * Only bits[31:0] represent a PSCI power_state while + * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification + */ + state = lpi->address; + if (!psci_power_state_is_valid(state)) { + pr_warn("Invalid PSCI power state %#x\n", state); + return -EINVAL; + } + } + + return 0; +} + +int acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return psci_acpi_cpu_init_idle(cpu); +} + +__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) +{ + u32 state = lpi->address; + + if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) + return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter, + lpi->index, state); + else + return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, + lpi->index, state); +} -- cgit v1.2.3 From 7a7a1cac3c2f5e3c859efb295bad3469e09e6f8f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 5 Jun 2024 14:14:58 +0100 Subject: arm64: FFH: Move ACPI specific code into drivers/acpi/arm64/ The ACPI FFH Opregion code can be moved out of arm64 arch code as it just uses SMCCC. Move all the ACPI FFH Opregion code into drivers/acpi/arm64/ffh.c Signed-off-by: Sudeep Holla Acked-by: Catalin Marinas Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20240605131458.3341095-4-sudeep.holla@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/acpi.c | 105 ------------------------------------------- drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/ffh.c | 107 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 105 deletions(-) create mode 100644 drivers/acpi/arm64/ffh.c diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e0e7b93c16cc..fd1917336d3a 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -422,108 +422,3 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size) { memblock_mark_nomap(addr, size); } - -#ifdef CONFIG_ACPI_FFH -/* - * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as - * specified in https://developer.arm.com/docs/den0048/latest - */ -struct acpi_ffh_data { - struct acpi_ffh_info info; - void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - unsigned long a4, unsigned long a5, - unsigned long a6, unsigned long a7, - struct arm_smccc_res *args, - struct arm_smccc_quirk *res); - void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args, - struct arm_smccc_1_2_regs *res); -}; - -int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt) -{ - enum arm_smccc_conduit conduit; - struct acpi_ffh_data *ffh_ctxt; - - if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2) - return -EOPNOTSUPP; - - conduit = arm_smccc_1_1_get_conduit(); - if (conduit == SMCCC_CONDUIT_NONE) { - pr_err("%s: invalid SMCCC conduit\n", __func__); - return -EOPNOTSUPP; - } - - ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL); - if (!ffh_ctxt) - return -ENOMEM; - - if (conduit == SMCCC_CONDUIT_SMC) { - ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc; - ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc; - } else { - ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc; - ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc; - } - - memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info)); - - *region_ctxt = ffh_ctxt; - return AE_OK; -} - -static bool acpi_ffh_smccc_owner_allowed(u32 fid) -{ - int owner = ARM_SMCCC_OWNER_NUM(fid); - - if (owner == ARM_SMCCC_OWNER_STANDARD || - owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM) - return true; - - return false; -} - -int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context) -{ - int ret = 0; - struct acpi_ffh_data *ffh_ctxt = region_context; - - if (ffh_ctxt->info.offset == 0) { - /* SMC/HVC 32bit call */ - struct arm_smccc_res res; - u32 a[8] = { 0 }, *ptr = (u32 *)value; - - if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) || - !acpi_ffh_smccc_owner_allowed(*ptr) || - ffh_ctxt->info.length > 32) { - ret = AE_ERROR; - } else { - int idx, len = ffh_ctxt->info.length >> 2; - - for (idx = 0; idx < len; idx++) - a[idx] = *(ptr + idx); - - ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4], - a[5], a[6], a[7], &res, NULL); - memcpy(value, &res, sizeof(res)); - } - - } else if (ffh_ctxt->info.offset == 1) { - /* SMC/HVC 64bit call */ - struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value; - - if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) || - !acpi_ffh_smccc_owner_allowed(r->a0) || - ffh_ctxt->info.length > sizeof(*r)) { - ret = AE_ERROR; - } else { - ffh_ctxt->invoke_ffh64_fn(r, r); - memcpy(value, r, ffh_ctxt->info.length); - } - } else { - ret = AE_ERROR; - } - - return ret; -} -#endif /* CONFIG_ACPI_FFH */ diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 2efee23f00b4..05ecde9eaabe 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_AGDI) += agdi.o obj-$(CONFIG_ACPI_APMT) += apmt.o +obj-$(CONFIG_ACPI_FFH) += ffh.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o diff --git a/drivers/acpi/arm64/ffh.c b/drivers/acpi/arm64/ffh.c new file mode 100644 index 000000000000..877edc6557e9 --- /dev/null +++ b/drivers/acpi/arm64/ffh.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +/* + * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as + * specified in https://developer.arm.com/docs/den0048/latest + */ +struct acpi_ffh_data { + struct acpi_ffh_info info; + void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *args, + struct arm_smccc_quirk *res); + void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args, + struct arm_smccc_1_2_regs *res); +}; + +int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt) +{ + enum arm_smccc_conduit conduit; + struct acpi_ffh_data *ffh_ctxt; + + if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2) + return -EOPNOTSUPP; + + conduit = arm_smccc_1_1_get_conduit(); + if (conduit == SMCCC_CONDUIT_NONE) { + pr_err("%s: invalid SMCCC conduit\n", __func__); + return -EOPNOTSUPP; + } + + ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL); + if (!ffh_ctxt) + return -ENOMEM; + + if (conduit == SMCCC_CONDUIT_SMC) { + ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc; + ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc; + } else { + ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc; + ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc; + } + + memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info)); + + *region_ctxt = ffh_ctxt; + return AE_OK; +} + +static bool acpi_ffh_smccc_owner_allowed(u32 fid) +{ + int owner = ARM_SMCCC_OWNER_NUM(fid); + + if (owner == ARM_SMCCC_OWNER_STANDARD || + owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM) + return true; + + return false; +} + +int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context) +{ + int ret = 0; + struct acpi_ffh_data *ffh_ctxt = region_context; + + if (ffh_ctxt->info.offset == 0) { + /* SMC/HVC 32bit call */ + struct arm_smccc_res res; + u32 a[8] = { 0 }, *ptr = (u32 *)value; + + if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) || + !acpi_ffh_smccc_owner_allowed(*ptr) || + ffh_ctxt->info.length > 32) { + ret = AE_ERROR; + } else { + int idx, len = ffh_ctxt->info.length >> 2; + + for (idx = 0; idx < len; idx++) + a[idx] = *(ptr + idx); + + ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4], + a[5], a[6], a[7], &res, NULL); + memcpy(value, &res, sizeof(res)); + } + + } else if (ffh_ctxt->info.offset == 1) { + /* SMC/HVC 64bit call */ + struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value; + + if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) || + !acpi_ffh_smccc_owner_allowed(r->a0) || + ffh_ctxt->info.length > sizeof(*r)) { + ret = AE_ERROR; + } else { + ffh_ctxt->invoke_ffh64_fn(r, r); + memcpy(value, r, ffh_ctxt->info.length); + } + } else { + ret = AE_ERROR; + } + + return ret; +} -- cgit v1.2.3 From 963c5d4968220548d013e42e0892b75ca7e790ae Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 13 Jun 2024 08:34:29 +0100 Subject: kselftest/arm64: Fix a couple of spelling mistakes There are two spelling mistakes in some error messages. Fix them. Signed-off-by: Colin Ian King Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20240613073429.1797451-1-colin.i.king@gmail.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/kernel-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index 50db26a3ed79..e8da3b4cbd23 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -233,7 +233,7 @@ static bool compute_digest(void *buf) reread: ret = recv(sock, buf, digest_len, 0); if (ret == 0) { - printf("No disgest returned\n"); + printf("No digest returned\n"); return false; } if (ret != digest_len) { @@ -308,7 +308,7 @@ int main(void) while (true) { if (!compute_digest(digest)) { - printf("Failed to coempute digest, iter=%d\n", iter); + printf("Failed to compute digest, iter=%d\n", iter); return EXIT_FAILURE; } -- cgit v1.2.3 From 2a805201f9bdfcc172ac54f7e21843e186389506 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 12 Jun 2024 13:34:56 -0700 Subject: ARM64: reloc_test: add missing MODULE_DESCRIPTION() macro With ARCH=arm64, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/arm64/kernel/arm64-reloc-test.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240612-md-arch-arm64-kernel-v1-1-1fafe8d11df3@quicinc.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/reloc_test_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index 99f2ffe9fc05..5b0891146054 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void) module_init(reloc_test_init); module_exit(reloc_test_exit); +MODULE_DESCRIPTION("Relocation testing module"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From cf63fe35f1efa71a4b09f07abbcdf28144b6c6b6 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Thu, 20 Jun 2024 20:40:38 +0300 Subject: arm64: Kconfig: fix typo in __builtin_return_adddress Comment about BUILTIN_RETURN_ADDRESS_STRIPS_PAC spells __builtin_return_adddress with a triple 'd', fix it. Signed-off-by: Mike Rapoport (IBM) Link: https://lore.kernel.org/r/20240620174038.3721466-1-rppt@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d91259ee7b5..3f39657af2cb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -381,7 +381,7 @@ config BROKEN_GAS_INST config BUILTIN_RETURN_ADDRESS_STRIPS_PAC bool - # Clang's __builtin_return_adddress() strips the PAC since 12.0.0 + # Clang's __builtin_return_address() strips the PAC since 12.0.0 # https://github.com/llvm/llvm-project/commit/2a96f47c5ffca84cd774ad402cacd137f4bf45e2 default y if CC_IS_CLANG # GCC's __builtin_return_address() strips the PAC since 11.1.0, -- cgit v1.2.3 From 573611145fcb6325a28c462aca3753e257a0b2a6 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 18 Jun 2024 09:17:03 +0530 Subject: arm64/mm: Stop using ESR_ELx_FSC_TYPE during fault Fault status codes at page table level 0, 1, 2 and 3 for access, permission and translation faults are architecturally organized in a way, that masking out ESR_ELx_FSC_TYPE, fetches Level 0 status code for the respective fault. Helpers like esr_fsc_is_[translation|permission|access_flag]_fault() mask out ESR_ELx_FSC_TYPE before comparing against corresponding Level 0 status code as the kernel does not yet care about the page table level, where in the fault really occurred previously. This scheme is starting to crumble after FEAT_LPA2 when level -1 got added. Fault status code for translation fault at level -1 is 0x2B which does not follow ESR_ELx_FSC_TYPE, requiring esr_fsc_is_translation_fault() changes. This changes above helpers to compare against individual fault status code values for each page table level and stop using ESR_ELx_FSC_TYPE, which is losing its value as a common mask. Cc: Will Deacon Cc: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: Marc Zyngier Reviewed-by: Ryan Roberts Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240618034703.3622510-1-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 7abf09df7033..3f482500f71f 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -121,6 +121,14 @@ #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) + /* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV_SHIFT (24) #define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT) @@ -388,20 +396,33 @@ static inline bool esr_is_data_abort(unsigned long esr) static inline bool esr_fsc_is_translation_fault(unsigned long esr) { - /* Translation fault, level -1 */ - if ((esr & ESR_ELx_FSC) == 0b101011) - return true; - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); } static inline bool esr_fsc_is_permission_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); } static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); } /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ -- cgit v1.2.3 From 118d777c4cb40f44e7b675955fd2da8b22f83913 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:37 +0100 Subject: wordpart.h: Add REPEAT_BYTE_U32() In some cases it's necessary to replicate a byte across a u32 value, for which REPEAT_BYTE() would be helpful. Currently this requires explicit masking of the result to avoid sparse warnings, as e.g. (u32)REPEAT_BYTE(0xa0)) ... will result in a warning: cast truncates bits from constant value (a0a0a0a0a0a0a0a0 becomes a0a0a0a0) Add a new REPEAT_BYTE_U32() which does the necessary masking internally, so that we don't need to duplicate this for every usage. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20240617111841.2529370-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- include/linux/wordpart.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h index 4ca1ba66d2f0..5a7b97bb7c95 100644 --- a/include/linux/wordpart.h +++ b/include/linux/wordpart.h @@ -39,6 +39,14 @@ */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) +/** + * REPEAT_BYTE_U32 - repeat the value @x multiple times as a u32 value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ +#define REPEAT_BYTE_U32(x) lower_32_bits(REPEAT_BYTE(x)) + /* Set bits in the first 'n' bytes when loaded from memory */ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1UL << 8*(n))-1) -- cgit v1.2.3 From e95c64a7fb7185383a0edfd6c21477567bee4c26 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:38 +0100 Subject: irqchip/gic-common: Remove sync_access callback The gic_configure_irq(), gic_dist_config(), and gic_cpu_config() functions each take an optional "sync_access" callback, but in almost all cases this is not used. The only user is the GICv3 driver's gic_cpu_init() function, which uses gic_redist_wait_for_rwp() as the "sync_access" callback for gic_cpu_config(). It would be simpler and clearer to remove the callback and have the GICv3 driver call gic_redist_wait_for_rwp() explicitly after gic_cpu_config(). Remove the "sync_access" callback, and call gic_redist_wait_for_rwp() explicitly in the GICv3 driver. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617111841.2529370-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- drivers/irqchip/irq-gic-common.c | 16 +++------------- drivers/irqchip/irq-gic-common.h | 7 +++---- drivers/irqchip/irq-gic-v3.c | 7 ++++--- drivers/irqchip/irq-gic.c | 6 +++--- drivers/irqchip/irq-hip04.c | 6 +++--- 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index afd6a1841715..4ed17620dc4d 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -45,7 +45,7 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, } int gic_configure_irq(unsigned int irq, unsigned int type, - void __iomem *base, void (*sync_access)(void)) + void __iomem *base) { u32 confmask = 0x2 << ((irq % 16) * 2); u32 confoff = (irq / 16) * 4; @@ -84,14 +84,10 @@ int gic_configure_irq(unsigned int irq, unsigned int type, raw_spin_unlock_irqrestore(&irq_controller_lock, flags); - if (sync_access) - sync_access(); - return ret; } -void gic_dist_config(void __iomem *base, int gic_irqs, - void (*sync_access)(void)) +void gic_dist_config(void __iomem *base, int gic_irqs) { unsigned int i; @@ -118,12 +114,9 @@ void gic_dist_config(void __iomem *base, int gic_irqs, writel_relaxed(GICD_INT_EN_CLR_X32, base + GIC_DIST_ENABLE_CLEAR + i / 8); } - - if (sync_access) - sync_access(); } -void gic_cpu_config(void __iomem *base, int nr, void (*sync_access)(void)) +void gic_cpu_config(void __iomem *base, int nr) { int i; @@ -144,7 +137,4 @@ void gic_cpu_config(void __iomem *base, int nr, void (*sync_access)(void)) for (i = 0; i < nr; i += 4) writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i * 4 / 4); - - if (sync_access) - sync_access(); } diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index f407cce9ecaa..c230175dd584 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -20,10 +20,9 @@ struct gic_quirk { }; int gic_configure_irq(unsigned int irq, unsigned int type, - void __iomem *base, void (*sync_access)(void)); -void gic_dist_config(void __iomem *base, int gic_irqs, - void (*sync_access)(void)); -void gic_cpu_config(void __iomem *base, int nr, void (*sync_access)(void)); + void __iomem *base); +void gic_dist_config(void __iomem *base, int gic_irqs); +void gic_cpu_config(void __iomem *base, int nr); void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void *data); void gic_enable_of_quirks(const struct device_node *np, diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 6fb276504bcc..d95dda2383fb 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -670,7 +670,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) offset = convert_offset_index(d, GICD_ICFGR, &index); - ret = gic_configure_irq(index, type, base + offset, NULL); + ret = gic_configure_irq(index, type, base + offset); if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) { /* Misconfigured PPIs are usually not fatal */ pr_warn("GIC: PPI INTID%ld is secure or misconfigured\n", irq); @@ -940,7 +940,7 @@ static void __init gic_dist_init(void) writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i); /* Now do the common stuff */ - gic_dist_config(base, GIC_LINE_NR, NULL); + gic_dist_config(base, GIC_LINE_NR); val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1; if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) { @@ -1282,7 +1282,8 @@ static void gic_cpu_init(void) for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32) writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8); - gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp); + gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR); + gic_redist_wait_for_rwp(); /* initialise system registers */ gic_cpu_sys_reg_init(); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 98aa383e39db..87255bde960f 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -303,7 +303,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) type != IRQ_TYPE_EDGE_RISING) return -EINVAL; - ret = gic_configure_irq(gicirq, type, base + GIC_DIST_CONFIG, NULL); + ret = gic_configure_irq(gicirq, type, base + GIC_DIST_CONFIG); if (ret && gicirq < 32) { /* Misconfigured PPIs are usually not fatal */ pr_warn("GIC: PPI%ld is secure or misconfigured\n", gicirq - 16); @@ -479,7 +479,7 @@ static void gic_dist_init(struct gic_chip_data *gic) for (i = 32; i < gic_irqs; i += 4) writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); - gic_dist_config(base, gic_irqs, NULL); + gic_dist_config(base, gic_irqs); writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL); } @@ -516,7 +516,7 @@ static int gic_cpu_init(struct gic_chip_data *gic) gic_cpu_map[i] &= ~cpu_mask; } - gic_cpu_config(dist_base, 32, NULL); + gic_cpu_config(dist_base, 32); writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); gic_cpu_if_up(gic); diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c index 46161f6ff289..5285150fd909 100644 --- a/drivers/irqchip/irq-hip04.c +++ b/drivers/irqchip/irq-hip04.c @@ -130,7 +130,7 @@ static int hip04_irq_set_type(struct irq_data *d, unsigned int type) raw_spin_lock(&irq_controller_lock); - ret = gic_configure_irq(irq, type, base + GIC_DIST_CONFIG, NULL); + ret = gic_configure_irq(irq, type, base + GIC_DIST_CONFIG); if (ret && irq < 32) { /* Misconfigured PPIs are usually not fatal */ pr_warn("GIC: PPI%d is secure or misconfigured\n", irq - 16); @@ -260,7 +260,7 @@ static void __init hip04_irq_dist_init(struct hip04_irq_data *intc) for (i = 32; i < nr_irqs; i += 2) writel_relaxed(cpumask, base + GIC_DIST_TARGET + ((i * 2) & ~3)); - gic_dist_config(base, nr_irqs, NULL); + gic_dist_config(base, nr_irqs); writel_relaxed(1, base + GIC_DIST_CTRL); } @@ -287,7 +287,7 @@ static void hip04_irq_cpu_init(struct hip04_irq_data *intc) if (i != cpu) hip04_cpu_map[i] &= ~cpu_mask; - gic_cpu_config(dist_base, 32, NULL); + gic_cpu_config(dist_base, 32); writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); writel_relaxed(1, base + GIC_CPU_CTRL); -- cgit v1.2.3 From a6156e70316b322b61739468e465bfb1345b7ae2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:39 +0100 Subject: irqchip/gic-v3: Make distributor priorities variables In subsequent patches the GICv3 driver will choose the regular interrupt priority at boot time. In preparation for using dynamic priorities, place the priorities in variables and update the code to pass these as parameters. Users of GICD_INT_DEF_PRI_X4 are modified to replicate the priority byte using REPEAT_BYTE_U32(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617111841.2529370-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- drivers/irqchip/irq-gic-common.c | 10 ++++++---- drivers/irqchip/irq-gic-common.h | 4 ++-- drivers/irqchip/irq-gic-v3-its.c | 11 ++++++----- drivers/irqchip/irq-gic-v3.c | 19 +++++++++++-------- drivers/irqchip/irq-gic.c | 8 ++++---- drivers/irqchip/irq-hip04.c | 4 ++-- include/linux/irqchip/arm-gic-common.h | 4 ---- include/linux/irqchip/arm-gic-v3.h | 2 +- 8 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index 4ed17620dc4d..c776f9142610 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "irq-gic-common.h" @@ -87,7 +88,7 @@ int gic_configure_irq(unsigned int irq, unsigned int type, return ret; } -void gic_dist_config(void __iomem *base, int gic_irqs) +void gic_dist_config(void __iomem *base, int gic_irqs, u8 priority) { unsigned int i; @@ -102,7 +103,8 @@ void gic_dist_config(void __iomem *base, int gic_irqs) * Set priority on all global interrupts. */ for (i = 32; i < gic_irqs; i += 4) - writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i); + writel_relaxed(REPEAT_BYTE_U32(priority), + base + GIC_DIST_PRI + i); /* * Deactivate and disable all SPIs. Leave the PPI and SGIs @@ -116,7 +118,7 @@ void gic_dist_config(void __iomem *base, int gic_irqs) } } -void gic_cpu_config(void __iomem *base, int nr) +void gic_cpu_config(void __iomem *base, int nr, u8 priority) { int i; @@ -135,6 +137,6 @@ void gic_cpu_config(void __iomem *base, int nr) * Set priority on PPI and SGI interrupts */ for (i = 0; i < nr; i += 4) - writel_relaxed(GICD_INT_DEF_PRI_X4, + writel_relaxed(REPEAT_BYTE_U32(priority), base + GIC_DIST_PRI + i * 4 / 4); } diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index c230175dd584..e8eab72ef195 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -21,8 +21,8 @@ struct gic_quirk { int gic_configure_irq(unsigned int irq, unsigned int type, void __iomem *base); -void gic_dist_config(void __iomem *base, int gic_irqs); -void gic_cpu_config(void __iomem *base, int nr); +void gic_dist_config(void __iomem *base, int gic_irqs, u8 priority); +void gic_cpu_config(void __iomem *base, int nr, u8 priority); void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void *data); void gic_enable_of_quirks(const struct device_node *np, diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 3c755d5dad6e..42e63272154e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -59,7 +59,7 @@ static u32 lpi_id_bits; #define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K) #define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K) -#define LPI_PROP_DEFAULT_PRIO GICD_INT_DEF_PRI +static u8 __ro_after_init lpi_prop_prio; /* * Collection structure - just an ID, and a redistributor address to @@ -1926,7 +1926,7 @@ static int its_vlpi_unmap(struct irq_data *d) /* and restore the physical one */ irqd_clr_forwarded_to_vcpu(d); its_send_mapti(its_dev, d->hwirq, event); - lpi_update_config(d, 0xff, (LPI_PROP_DEFAULT_PRIO | + lpi_update_config(d, 0xff, (lpi_prop_prio | LPI_PROP_ENABLED | LPI_PROP_GROUP1)); @@ -2181,8 +2181,8 @@ static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids) static void gic_reset_prop_table(void *va) { - /* Priority 0xa0, Group-1, disabled */ - memset(va, LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, LPI_PROPBASE_SZ); + /* Regular IRQ priority, Group-1, disabled */ + memset(va, lpi_prop_prio | LPI_PROP_GROUP1, LPI_PROPBASE_SZ); /* Make sure the GIC will observe the written configuration */ gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ); @@ -5650,7 +5650,7 @@ int __init its_lpi_memreserve_init(void) } int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, - struct irq_domain *parent_domain) + struct irq_domain *parent_domain, u8 irq_prio) { struct device_node *of_node; struct its_node *its; @@ -5660,6 +5660,7 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, gic_rdists = rdists; + lpi_prop_prio = irq_prio; its_parent = parent_domain; of_node = to_of_node(handle); if (of_node) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d95dda2383fb..d884d94c6f4d 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,8 @@ #include "irq-gic-common.h" -#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80) +static u8 dist_prio_irq __ro_after_init = GICD_INT_DEF_PRI; +static u8 dist_prio_nmi __ro_after_init = GICD_INT_DEF_PRI & ~0x80; #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) @@ -556,7 +558,7 @@ static int gic_irq_nmi_setup(struct irq_data *d) desc->handle_irq = handle_fasteoi_nmi; } - gic_irq_set_prio(d, GICD_INT_NMI_PRI); + gic_irq_set_prio(d, dist_prio_nmi); return 0; } @@ -591,7 +593,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d) desc->handle_irq = handle_fasteoi_irq; } - gic_irq_set_prio(d, GICD_INT_DEF_PRI); + gic_irq_set_prio(d, dist_prio_irq); } static bool gic_arm64_erratum_2941627_needed(struct irq_data *d) @@ -753,7 +755,7 @@ static bool gic_rpr_is_nmi_prio(void) if (!gic_supports_nmi()) return false; - return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI)); + return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(dist_prio_nmi)); } static bool gic_irqnr_is_special(u32 irqnr) @@ -937,10 +939,11 @@ static void __init gic_dist_init(void) writel_relaxed(0, base + GICD_ICFGRnE + i / 4); for (i = 0; i < GIC_ESPI_NR; i += 4) - writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i); + writel_relaxed(REPEAT_BYTE_U32(dist_prio_irq), + base + GICD_IPRIORITYRnE + i); /* Now do the common stuff */ - gic_dist_config(base, GIC_LINE_NR); + gic_dist_config(base, GIC_LINE_NR, dist_prio_irq); val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1; if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) { @@ -1282,7 +1285,7 @@ static void gic_cpu_init(void) for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32) writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8); - gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR); + gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, dist_prio_irq); gic_redist_wait_for_rwp(); /* initialise system registers */ @@ -2066,7 +2069,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_cpu_pm_init(); if (gic_dist_supports_lpis()) { - its_init(handle, &gic_data.rdists, gic_data.domain); + its_init(handle, &gic_data.rdists, gic_data.domain, dist_prio_irq); its_cpu_init(); its_lpi_memreserve_init(); } else { diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 87255bde960f..3be7bd8cd8cd 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -479,7 +479,7 @@ static void gic_dist_init(struct gic_chip_data *gic) for (i = 32; i < gic_irqs; i += 4) writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); - gic_dist_config(base, gic_irqs); + gic_dist_config(base, gic_irqs, GICD_INT_DEF_PRI); writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL); } @@ -516,7 +516,7 @@ static int gic_cpu_init(struct gic_chip_data *gic) gic_cpu_map[i] &= ~cpu_mask; } - gic_cpu_config(dist_base, 32); + gic_cpu_config(dist_base, 32, GICD_INT_DEF_PRI); writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); gic_cpu_if_up(gic); @@ -608,7 +608,7 @@ void gic_dist_restore(struct gic_chip_data *gic) dist_base + GIC_DIST_CONFIG + i * 4); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++) - writel_relaxed(GICD_INT_DEF_PRI_X4, + writel_relaxed(REPEAT_BYTE_U32(GICD_INT_DEF_PRI), dist_base + GIC_DIST_PRI + i * 4); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++) @@ -697,7 +697,7 @@ void gic_cpu_restore(struct gic_chip_data *gic) writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4); for (i = 0; i < DIV_ROUND_UP(32, 4); i++) - writel_relaxed(GICD_INT_DEF_PRI_X4, + writel_relaxed(REPEAT_BYTE_U32(GICD_INT_DEF_PRI), dist_base + GIC_DIST_PRI + i * 4); writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c index 5285150fd909..31c3f70a5d5e 100644 --- a/drivers/irqchip/irq-hip04.c +++ b/drivers/irqchip/irq-hip04.c @@ -260,7 +260,7 @@ static void __init hip04_irq_dist_init(struct hip04_irq_data *intc) for (i = 32; i < nr_irqs; i += 2) writel_relaxed(cpumask, base + GIC_DIST_TARGET + ((i * 2) & ~3)); - gic_dist_config(base, nr_irqs); + gic_dist_config(base, nr_irqs, GICD_INT_DEF_PRI); writel_relaxed(1, base + GIC_DIST_CTRL); } @@ -287,7 +287,7 @@ static void hip04_irq_cpu_init(struct hip04_irq_data *intc) if (i != cpu) hip04_cpu_map[i] &= ~cpu_mask; - gic_cpu_config(dist_base, 32); + gic_cpu_config(dist_base, 32, GICD_INT_DEF_PRI); writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); writel_relaxed(1, base + GIC_CPU_CTRL); diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index 1177f3a1aed5..fc0246cc05ac 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -10,10 +10,6 @@ #include #define GICD_INT_DEF_PRI 0xa0 -#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ - (GICD_INT_DEF_PRI << 16) |\ - (GICD_INT_DEF_PRI << 8) |\ - GICD_INT_DEF_PRI) struct irq_domain; struct fwnode_handle; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 728691365464..70c0948f978e 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -638,7 +638,7 @@ struct fwnode_handle; int __init its_lpi_memreserve_init(void); int its_cpu_init(void); int its_init(struct fwnode_handle *handle, struct rdists *rdists, - struct irq_domain *domain); + struct irq_domain *domain, u8 irq_prio); int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent); static inline bool gic_enable_sre(void) -- cgit v1.2.3 From d447bf09a4013541bbcbc7af898c26177734321e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:40 +0100 Subject: irqchip/gic-v3: Detect GICD_CTRL.DS and SCR_EL3.FIQ earlier In subsequent patches the GICv3 driver will choose the regular interrupt priority at boot time, dependent on the configuration of GICD_CTRL.DS and SCR_EL3.FIQ. This will need to be chosen before we configure the distributor with default prioirities for all the interrupts, which happens before we currently detect these in gic_cpu_sys_reg_init(). Add a new gic_prio_init() function to detect these earlier and log them to the console so that any problems can be debugged more easily. This also allows the uniformity checks in gic_cpu_sys_reg_init() to be simplified, as we can compare directly with the boot CPU values which were recorded earlier. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617111841.2529370-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3.c | 117 +++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d884d94c6f4d..e262f73b1ce8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -134,6 +134,62 @@ EXPORT_SYMBOL(gic_nonsecure_priorities); __priority; \ }) +static u32 gic_get_pribits(void) +{ + u32 pribits; + + pribits = gic_read_ctlr(); + pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; + pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; + pribits++; + + return pribits; +} + +static bool gic_has_group0(void) +{ + u32 val; + u32 old_pmr; + + old_pmr = gic_read_pmr(); + + /* + * Let's find out if Group0 is under control of EL3 or not by + * setting the highest possible, non-zero priority in PMR. + * + * If SCR_EL3.FIQ is set, the priority gets shifted down in + * order for the CPU interface to set bit 7, and keep the + * actual priority in the non-secure range. In the process, it + * looses the least significant bit and the actual priority + * becomes 0x80. Reading it back returns 0, indicating that + * we're don't have access to Group0. + */ + gic_write_pmr(BIT(8 - gic_get_pribits())); + val = gic_read_pmr(); + + gic_write_pmr(old_pmr); + + return val != 0; +} + +static inline bool gic_dist_security_disabled(void) +{ + return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; +} + +static bool cpus_have_security_disabled __ro_after_init; +static bool cpus_have_group0 __ro_after_init; + +static void __init gic_prio_init(void) +{ + cpus_have_security_disabled = gic_dist_security_disabled(); + cpus_have_group0 = gic_has_group0(); + + pr_info("GICD_CTRL.DS=%d, SCR_EL3.FIQ=%d\n", + cpus_have_security_disabled, + !cpus_have_group0); +} + /* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */ static refcount_t *rdist_nmi_refs; @@ -868,44 +924,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs __gic_handle_irq_from_irqson(regs); } -static u32 gic_get_pribits(void) -{ - u32 pribits; - - pribits = gic_read_ctlr(); - pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; - pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; - pribits++; - - return pribits; -} - -static bool gic_has_group0(void) -{ - u32 val; - u32 old_pmr; - - old_pmr = gic_read_pmr(); - - /* - * Let's find out if Group0 is under control of EL3 or not by - * setting the highest possible, non-zero priority in PMR. - * - * If SCR_EL3.FIQ is set, the priority gets shifted down in - * order for the CPU interface to set bit 7, and keep the - * actual priority in the non-secure range. In the process, it - * looses the least significant bit and the actual priority - * becomes 0x80. Reading it back returns 0, indicating that - * we're don't have access to Group0. - */ - gic_write_pmr(BIT(8 - gic_get_pribits())); - val = gic_read_pmr(); - - gic_write_pmr(old_pmr); - - return val != 0; -} - static void __init gic_dist_init(void) { unsigned int i; @@ -1122,12 +1140,6 @@ static void gic_update_rdist_properties(void) gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : ""); } -/* Check whether it's single security state view */ -static inline bool gic_dist_security_disabled(void) -{ - return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; -} - static void gic_cpu_sys_reg_init(void) { int i, cpu = smp_processor_id(); @@ -1155,18 +1167,14 @@ static void gic_cpu_sys_reg_init(void) write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); } else if (gic_supports_nmi()) { /* - * Mismatch configuration with boot CPU, the system is likely - * to die as interrupt masking will not work properly on all - * CPUs + * Check that all CPUs use the same priority space. * - * The boot CPU calls this function before enabling NMI support, - * and as a result we'll never see this warning in the boot path - * for that CPU. + * If there's a mismatch with the boot CPU, the system is + * likely to die as interrupt masking will not work properly on + * all CPUs. */ - if (static_branch_unlikely(&gic_nonsecure_priorities)) - WARN_ON(!group0 || gic_dist_security_disabled()); - else - WARN_ON(group0 && !gic_dist_security_disabled()); + WARN_ON(group0 != cpus_have_group0); + WARN_ON(gic_dist_security_disabled() != cpus_have_security_disabled); } /* @@ -2062,6 +2070,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_update_rdist_properties(); + gic_prio_init(); gic_dist_init(); gic_cpu_init(); gic_enable_nmi_support(); -- cgit v1.2.3 From 18fdb6348c480fb646799f621204f674815f05e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:41 +0100 Subject: arm64: irqchip/gic-v3: Select priorities at boot time The distributor and PMR/RPR can present different views of the interrupt priority space dependent upon the values of GICD_CTLR.DS and SCR_EL3.FIQ. Currently we treat the distributor's view of the priority space as canonical, and when the two differ we change the way we handle values in the PMR/RPR, using the `gic_nonsecure_priorities` static key to decide what to do. This approach works, but it's sub-optimal. When using pseudo-NMI we manipulate the distributor rarely, and we manipulate the PMR/RPR registers very frequently in code spread out throughout the kernel (e.g. local_irq_{save,restore}()). It would be nicer if we could use fixed values for the PMR/RPR, and dynamically choose the values programmed into the distributor. This patch changes the GICv3 driver and arm64 code accordingly. PMR values are chosen at compile time, and the GICv3 driver determines the appropriate values to program into the distributor at boot time. This removes the need for the `gic_nonsecure_priorities` static key and results in smaller and better generated code for saving/restoring the irqflags. Before this patch, local_irq_disable() compiles to: | 0000000000000000 : | 0: d503201f nop | 4: d50343df msr daifset, #0x3 | 8: d65f03c0 ret | c: d503201f nop | 10: d2800c00 mov x0, #0x60 // #96 | 14: d5184600 msr icc_pmr_el1, x0 | 18: d65f03c0 ret | 1c: d2801400 mov x0, #0xa0 // #160 | 20: 17fffffd b 14 After this patch, local_irq_disable() compiles to: | 0000000000000000 : | 0: d503201f nop | 4: d50343df msr daifset, #0x3 | 8: d65f03c0 ret | c: d2801800 mov x0, #0xc0 // #192 | 10: d5184600 msr icc_pmr_el1, x0 | 14: d65f03c0 ret ... with 3 fewer instructions per call. For defconfig + CONFIG_PSEUDO_NMI=y, this results in a minor saving of ~4K of text, and will make it easier to make further improvements to the way we manipulate irqflags and DAIF bits. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617111841.2529370-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- arch/arm64/include/asm/arch_gicv3.h | 15 ------ arch/arm64/include/asm/ptrace.h | 35 +++--------- arch/arm64/kernel/image-vars.h | 5 -- drivers/irqchip/irq-gic-v3.c | 96 ++++++++++++++------------------- include/linux/irqchip/arm-gic-v3-prio.h | 52 ++++++++++++++++++ 5 files changed, 97 insertions(+), 106 deletions(-) create mode 100644 include/linux/irqchip/arm-gic-v3-prio.h diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 5f172611654b..9e96f024b2f1 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -175,21 +175,6 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF | - GIC_PRIO_PSR_I_SET)); - BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); - /* - * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared - * and non-secure PMR accesses are not subject to the shifts that - * are applied to IRQ priorities - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); - /* - * Same situation as above, but now we make sure that we can mask - * regular interrupts. - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS | - GIC_PRIO_PSR_I_SET)); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 47ec58031f11..0abe975d68a8 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,35 +21,12 @@ #define INIT_PSTATE_EL2 \ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h) -/* - * PMR values used to mask/unmask interrupts. - * - * GIC priority masking works as follows: if an IRQ's priority is a higher value - * than the value held in PMR, that IRQ is masked. Lowering the value of PMR - * means masking more IRQs (or at least that the same IRQs remain masked). - * - * To mask interrupts, we clear the most significant bit of PMR. - * - * Some code sections either automatically switch back to PSR.I or explicitly - * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included - * in the priority mask, it indicates that PSR.I should be set and - * interrupt disabling temporarily does not rely on IRQ priorities. - */ -#define GIC_PRIO_IRQON 0xe0 -#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) -#define __GIC_PRIO_IRQOFF_NS 0xa0 -#define GIC_PRIO_PSR_I_SET (1 << 4) - -#define GIC_PRIO_IRQOFF \ - ({ \ - extern struct static_key_false gic_nonsecure_priorities;\ - u8 __prio = __GIC_PRIO_IRQOFF; \ - \ - if (static_branch_unlikely(&gic_nonsecure_priorities)) \ - __prio = __GIC_PRIO_IRQOFF_NS; \ - \ - __prio; \ - }) +#include + +#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED +#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ + +#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index ba4f8f7d6a91..8f5422ed1b75 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -105,11 +105,6 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); -#ifdef CONFIG_ARM64_PSEUDO_NMI -/* Static key checked in GIC_PRIO_IRQOFF. */ -KVM_NVHE_ALIAS(gic_nonsecure_priorities); -#endif - /* EL2 exception handling */ KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index e262f73b1ce8..1b2b5f89f832 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -37,8 +38,8 @@ #include "irq-gic-common.h" -static u8 dist_prio_irq __ro_after_init = GICD_INT_DEF_PRI; -static u8 dist_prio_nmi __ro_after_init = GICD_INT_DEF_PRI & ~0x80; +static u8 dist_prio_irq __ro_after_init = GICV3_PRIO_IRQ; +static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI; #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) @@ -110,30 +111,6 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); */ static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); -DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); -EXPORT_SYMBOL(gic_nonsecure_priorities); - -/* - * When the Non-secure world has access to group 0 interrupts (as a - * consequence of SCR_EL3.FIQ == 0), reading the ICC_RPR_EL1 register will - * return the Distributor's view of the interrupt priority. - * - * When GIC security is enabled (GICD_CTLR.DS == 0), the interrupt priority - * written by software is moved to the Non-secure range by the Distributor. - * - * If both are true (which is when gic_nonsecure_priorities gets enabled), - * we need to shift down the priority programmed by software to match it - * against the value returned by ICC_RPR_EL1. - */ -#define GICD_INT_RPR_PRI(priority) \ - ({ \ - u32 __priority = (priority); \ - if (static_branch_unlikely(&gic_nonsecure_priorities)) \ - __priority = 0x80 | (__priority >> 1); \ - \ - __priority; \ - }) - static u32 gic_get_pribits(void) { u32 pribits; @@ -185,6 +162,41 @@ static void __init gic_prio_init(void) cpus_have_security_disabled = gic_dist_security_disabled(); cpus_have_group0 = gic_has_group0(); + /* + * How priority values are used by the GIC depends on two things: + * the security state of the GIC (controlled by the GICD_CTRL.DS bit) + * and if Group 0 interrupts can be delivered to Linux in the non-secure + * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the + * way priorities are presented in ICC_PMR_EL1 and in the distributor: + * + * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Distributor + * ------------------------------------------------------- + * 1 | - | unchanged | unchanged + * ------------------------------------------------------- + * 0 | 1 | non-secure | non-secure + * ------------------------------------------------------- + * 0 | 0 | unchanged | non-secure + * + * In the non-secure view reads and writes are modified: + * + * - A value written is right-shifted by one and the MSB is set, + * forcing the priority into the non-secure range. + * + * - A value read is left-shifted by one. + * + * In the first two cases, where ICC_PMR_EL1 and the interrupt priority + * are both either modified or unchanged, we can use the same set of + * priorities. + * + * In the last case, where only the interrupt priorities are modified to + * be in the non-secure range, we program the non-secure values into + * the distributor to match the PMR values we want. + */ + if (cpus_have_group0 & !cpus_have_security_disabled) { + dist_prio_irq = __gicv3_prio_to_ns(dist_prio_irq); + dist_prio_nmi = __gicv3_prio_to_ns(dist_prio_nmi); + } + pr_info("GICD_CTRL.DS=%d, SCR_EL3.FIQ=%d\n", cpus_have_security_disabled, !cpus_have_group0); @@ -811,7 +823,7 @@ static bool gic_rpr_is_nmi_prio(void) if (!gic_supports_nmi()) return false; - return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(dist_prio_nmi)); + return unlikely(gic_read_rpr() == GICV3_PRIO_NMI); } static bool gic_irqnr_is_special(u32 irqnr) @@ -1960,36 +1972,6 @@ static void gic_enable_nmi_support(void) pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); - /* - * How priority values are used by the GIC depends on two things: - * the security state of the GIC (controlled by the GICD_CTRL.DS bit) - * and if Group 0 interrupts can be delivered to Linux in the non-secure - * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the - * ICC_PMR_EL1 register and the priority that software assigns to - * interrupts: - * - * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Group 1 priority - * ----------------------------------------------------------- - * 1 | - | unchanged | unchanged - * ----------------------------------------------------------- - * 0 | 1 | non-secure | non-secure - * ----------------------------------------------------------- - * 0 | 0 | unchanged | non-secure - * - * where non-secure means that the value is right-shifted by one and the - * MSB bit set, to make it fit in the non-secure priority range. - * - * In the first two cases, where ICC_PMR_EL1 and the interrupt priority - * are both either modified or unchanged, we can use the same set of - * priorities. - * - * In the last case, where only the interrupt priorities are modified to - * be in the non-secure range, we use a different PMR value to mask IRQs - * and the rest of the values that we use remain unchanged. - */ - if (gic_has_group0() && !gic_dist_security_disabled()) - static_branch_enable(&gic_nonsecure_priorities); - static_branch_enable(&supports_pseudo_nmis); if (static_branch_likely(&supports_deactivate_key)) diff --git a/include/linux/irqchip/arm-gic-v3-prio.h b/include/linux/irqchip/arm-gic-v3-prio.h new file mode 100644 index 000000000000..44157c9abb78 --- /dev/null +++ b/include/linux/irqchip/arm-gic-v3-prio.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H +#define __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H + +/* + * GIC priorities from the view of the PMR/RPR. + * + * These values are chosen to be valid in either the absolute priority space or + * the NS view of the priority space. The value programmed into the distributor + * and ITS will be chosen at boot time such that these values appear in the + * PMR/RPR. + * + * GICV3_PRIO_UNMASKED is the PMR view of the priority to use to permit both + * IRQs and pseudo-NMIs. + * + * GICV3_PRIO_IRQ is the PMR view of the priority of regular interrupts. This + * can be written to the PMR to mask regular IRQs. + * + * GICV3_PRIO_NMI is the PMR view of the priority of pseudo-NMIs. This can be + * written to the PMR to mask pseudo-NMIs. + * + * On arm64 some code sections either automatically switch back to PSR.I or + * explicitly require to not use priority masking. If bit GICV3_PRIO_PSR_I_SET + * is included in the priority mask, it indicates that PSR.I should be set and + * interrupt disabling temporarily does not rely on IRQ priorities. + */ +#define GICV3_PRIO_UNMASKED 0xe0 +#define GICV3_PRIO_IRQ 0xc0 +#define GICV3_PRIO_NMI 0x80 + +#define GICV3_PRIO_PSR_I_SET (1 << 4) + +#ifndef __ASSEMBLER__ + +#define __gicv3_prio_to_ns(p) (0xff & ((p) << 1)) +#define __gicv3_ns_to_prio(ns) (0x80 | ((ns) >> 1)) + +#define __gicv3_prio_valid_ns(p) \ + (__gicv3_ns_to_prio(__gicv3_prio_to_ns(p)) == (p)) + +static_assert(__gicv3_prio_valid_ns(GICV3_PRIO_NMI)); +static_assert(__gicv3_prio_valid_ns(GICV3_PRIO_IRQ)); + +static_assert(GICV3_PRIO_NMI < GICV3_PRIO_IRQ); +static_assert(GICV3_PRIO_IRQ < GICV3_PRIO_UNMASKED); + +static_assert(GICV3_PRIO_IRQ < (GICV3_PRIO_IRQ | GICV3_PRIO_PSR_I_SET)); + +#endif /* __ASSEMBLER */ + +#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H */ -- cgit v1.2.3 From bfe3f0df3e3c0afdbe147d2479fe9aee4d0683e9 Mon Sep 17 00:00:00 2001 From: Youwan Wang Date: Mon, 24 Jun 2024 10:31:01 +0800 Subject: ACPI / amba: Drop unnecessary check for registered amba_dummy_clk amba_register_dummy_clk() is called only once from acpi_amba_init() and acpi_amba_init() itself is called once during the initialisation. amba_dummy_clk can't be initialised before this in any other code path and hence the check for already registered amba_dummy_clk is not necessary. Drop the same. Signed-off-by: Youwan Wang Acked-by: Sudeep Holla Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20240624023101.369633-1-youwan@nfschina.com Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/amba.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/acpi/arm64/amba.c b/drivers/acpi/arm64/amba.c index e1f0bbb8f393..1350083bce5f 100644 --- a/drivers/acpi/arm64/amba.c +++ b/drivers/acpi/arm64/amba.c @@ -27,11 +27,7 @@ static const struct acpi_device_id amba_id_list[] = { static void amba_register_dummy_clk(void) { - static struct clk *amba_dummy_clk; - - /* If clock already registered */ - if (amba_dummy_clk) - return; + struct clk *amba_dummy_clk; amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0); clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL); -- cgit v1.2.3 From cf938f91784f5b35d16fa9fc746a3bb03659ab50 Mon Sep 17 00:00:00 2001 From: Seongsu Park Date: Thu, 23 May 2024 21:21:46 +0900 Subject: arm64: Cleanup __cpu_set_tcr_t0sz() The T0SZ field of TCR_EL1 occupies bits 0-5 of the register and encode the virtual address space translated by TTBR0_EL1. When updating the field, for example because we are switching to/from the idmap page-table, __cpu_set_tcr_t0sz() erroneously treats its 't0sz' argument as unshifted, resulting in harmless but confusing double shifts by 0 in the code. Co-developed-by: Leem ChaeHoon Signed-off-by: Leem ChaeHoon Signed-off-by: Seongsu Park Link: https://lore.kernel.org/r/20240523122146.144483-1-sgsu.park@samsung.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu_context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index c768d16b81a4..bd19f4c758b7 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -72,11 +72,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr = read_sysreg(tcr_el1); - if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz) + if ((tcr & TCR_T0SZ_MASK) == t0sz) return; tcr &= ~TCR_T0SZ_MASK; - tcr |= t0sz << TCR_T0SZ_OFFSET; + tcr |= t0sz; write_sysreg(tcr, tcr_el1); isb(); } -- cgit v1.2.3 From c1385c1f0ba3b80bd12f26c440612175088c664c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:28 +0100 Subject: ACPI: processor: Simplify initial onlining to use same path for cold and hotplug Separate code paths, combined with a flag set in acpi_processor.c to indicate a struct acpi_processor was for a hotplugged CPU ensured that per CPU data was only set up the first time that a CPU was initialized. This appears to be unnecessary as the paths can be combined by letting the online logic also handle any CPUs online at the time of driver load. Motivation for this change, beyond simplification, is that ARM64 virtual CPU HP uses the same code paths for hotplug and cold path in acpi_processor.c so had no easy way to set the flag for hotplug only. Removing this necessity will enable ARM64 vCPU HP to reuse the existing code paths. Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Tested-by: Miguel Luis Reviewed-by: Gavin Shan Reviewed-by: Miguel Luis Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-2-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 7 +++---- drivers/acpi/processor_driver.c | 43 ++++++++++++----------------------------- include/acpi/processor.h | 2 +- 3 files changed, 16 insertions(+), 36 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 7a0dd35d62c9..b2f0b6c19482 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -211,12 +211,11 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) } /* - * CPU got hot-added, but cpu_data is not initialized yet. Set a flag - * to delay cpu_idle/throttling initialization and do it when the CPU - * gets online for the first time. + * CPU got hot-added, but cpu_data is not initialized yet. Do + * cpu_idle/throttling initialization when the CPU gets online for + * the first time. */ pr_info("CPU%d has been hot-added\n", pr->id); - pr->flags.need_hotplug_init = 1; out: cpus_write_unlock(); diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 67db60eda370..cb52dd000b95 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -33,7 +33,6 @@ MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); MODULE_LICENSE("GPL"); -static int acpi_processor_start(struct device *dev); static int acpi_processor_stop(struct device *dev); static const struct acpi_device_id processor_device_ids[] = { @@ -47,7 +46,6 @@ static struct device_driver acpi_processor_driver = { .name = "processor", .bus = &cpu_subsys, .acpi_match_table = processor_device_ids, - .probe = acpi_processor_start, .remove = acpi_processor_stop, }; @@ -115,12 +113,9 @@ static int acpi_soft_cpu_online(unsigned int cpu) * CPU got physically hotplugged and onlined for the first time: * Initialize missing things. */ - if (pr->flags.need_hotplug_init) { + if (!pr->flags.previously_online) { int ret; - pr_info("Will online and init hotplugged CPU: %d\n", - pr->id); - pr->flags.need_hotplug_init = 0; ret = __acpi_processor_start(device); WARN(ret, "Failed to start CPU: %d\n", pr->id); } else { @@ -167,9 +162,6 @@ static int __acpi_processor_start(struct acpi_device *device) if (!pr) return -ENODEV; - if (pr->flags.need_hotplug_init) - return 0; - result = acpi_cppc_processor_probe(pr); if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS)) dev_dbg(&device->dev, "CPPC data invalid or not present\n"); @@ -185,32 +177,21 @@ static int __acpi_processor_start(struct acpi_device *device) status = acpi_install_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify, device); - if (ACPI_SUCCESS(status)) - return 0; + if (!ACPI_SUCCESS(status)) { + result = -ENODEV; + goto err_thermal_exit; + } + pr->flags.previously_online = 1; - result = -ENODEV; - acpi_processor_thermal_exit(pr, device); + return 0; +err_thermal_exit: + acpi_processor_thermal_exit(pr, device); err_power_exit: acpi_processor_power_exit(pr); return result; } -static int acpi_processor_start(struct device *dev) -{ - struct acpi_device *device = ACPI_COMPANION(dev); - int ret; - - if (!device) - return -ENODEV; - - /* Protect against concurrent CPU hotplug operations */ - cpu_hotplug_disable(); - ret = __acpi_processor_start(device); - cpu_hotplug_enable(); - return ret; -} - static int acpi_processor_stop(struct device *dev) { struct acpi_device *device = ACPI_COMPANION(dev); @@ -279,9 +260,9 @@ static int __init acpi_processor_driver_init(void) if (result < 0) return result; - result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "acpi/cpu-drv:online", - acpi_soft_cpu_online, NULL); + result = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "acpi/cpu-drv:online", + acpi_soft_cpu_online, NULL); if (result < 0) goto err; hp_online = result; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3f34ebb27525..e6f6074eadbf 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -217,7 +217,7 @@ struct acpi_processor_flags { u8 has_lpi:1; u8 power_setup_done:1; u8 bm_rld_set:1; - u8 need_hotplug_init:1; + u8 previously_online:1; }; struct acpi_processor { -- cgit v1.2.3 From d830ef3ac56941089f49ec13c80ea4aaa3c2e6c0 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:29 +0100 Subject: cpu: Do not warn on arch_register_cpu() returning -EPROBE_DEFER For arm64 the CPU registration cannot complete until the ACPI interpreter us up and running so in those cases the arch specific arch_register_cpu() will return -EPROBE_DEFER at this stage and the registration will be attempted later. Suggested-by: Rafael J. Wysocki Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Reviewed-by: Gavin Shan Tested-by: Miguel Luis Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-3-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/base/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index c61ecb0c2ae2..1487cd9761a4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -558,7 +558,7 @@ static void __init cpu_dev_register_generic(void) for_each_present_cpu(i) { ret = arch_register_cpu(i); - if (ret) + if (ret && ret != -EPROBE_DEFER) pr_warn("register_cpu %d failed (%d)\n", i, ret); } } -- cgit v1.2.3 From 157080f03c7abb7df58c4cb519e18c188cfb7404 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:30 +0100 Subject: ACPI: processor: Drop duplicated check on _STA (enabled + present) The ACPI bus scan will only result in acpi_processor_add() being called if _STA has already been checked and the result is that the processor is enabled and present. Hence drop this additional check. Suggested-by: Rafael J. Wysocki Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Reviewed-by: Gavin Shan Tested-by: Miguel Luis Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-4-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index b2f0b6c19482..161c95c9d60a 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -186,17 +186,11 @@ static void __init acpi_pcc_cpufreq_init(void) {} #ifdef CONFIG_ACPI_HOTPLUG_CPU static int acpi_processor_hotadd_init(struct acpi_processor *pr) { - unsigned long long sta; - acpi_status status; int ret; if (invalid_phys_cpuid(pr->phys_id)) return -ENODEV; - status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); - if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_PRESENT)) - return -ENODEV; - cpu_maps_update_begin(); cpus_write_lock(); -- cgit v1.2.3 From fadf231f0a06a6748a7fc4a2c29ac9ef7bca6bfd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:31 +0100 Subject: ACPI: processor: Return an error if acpi_processor_get_info() fails in processor_add() Rafael observed [1] that returning 0 from processor_add() will result in acpi_default_enumeration() being called which will attempt to create a platform device, but that makes little sense when the processor is known to be not available. So just return the error code from acpi_processor_get_info() instead. Link: https://lore.kernel.org/all/CAJZ5v0iKU8ra9jR+EmgxbuNm=Uwx2m1-8vn_RAZ+aCiUVLe3Pw@mail.gmail.com/ [1] Suggested-by: Rafael J. Wysocki Acked-by: Rafael J. Wysocki Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-5-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 161c95c9d60a..5f062806ca40 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -393,7 +393,7 @@ static int acpi_processor_add(struct acpi_device *device, result = acpi_processor_get_info(device); if (result) /* Processor is not physically present or unavailable */ - return 0; + return result; BUG_ON(pr->id >= nr_cpu_ids); -- cgit v1.2.3 From 47ec9b417ed9b6b8ec2a941cd84d9de62adc358a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:32 +0100 Subject: ACPI: processor: Fix memory leaks in error paths of processor_add() If acpi_processor_get_info() returned an error, pr and the associated pr->throttling.shared_cpu_map were leaked. The unwind code was in the wrong order wrt to setup, relying on some unwind actions having no affect (clearing variables that were never set etc). That makes it harder to reason about so reorder and add appropriate labels to only undo what was actually set up in the first place. Acked-by: Rafael J. Wysocki Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-6-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 5f062806ca40..16e36e55a560 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -393,7 +393,7 @@ static int acpi_processor_add(struct acpi_device *device, result = acpi_processor_get_info(device); if (result) /* Processor is not physically present or unavailable */ - return result; + goto err_clear_driver_data; BUG_ON(pr->id >= nr_cpu_ids); @@ -408,7 +408,7 @@ static int acpi_processor_add(struct acpi_device *device, "BIOS reported wrong ACPI id %d for the processor\n", pr->id); /* Give up, but do not abort the namespace scan. */ - goto err; + goto err_clear_driver_data; } /* * processor_device_array is not cleared on errors to allow buggy BIOS @@ -420,12 +420,12 @@ static int acpi_processor_add(struct acpi_device *device, dev = get_cpu_device(pr->id); if (!dev) { result = -ENODEV; - goto err; + goto err_clear_per_cpu; } result = acpi_bind_one(dev, device); if (result) - goto err; + goto err_clear_per_cpu; pr->dev = dev; @@ -436,10 +436,11 @@ static int acpi_processor_add(struct acpi_device *device, dev_err(dev, "Processor driver could not be attached\n"); acpi_unbind_one(dev); - err: - free_cpumask_var(pr->throttling.shared_cpu_map); - device->driver_data = NULL; + err_clear_per_cpu: per_cpu(processors, pr->id) = NULL; + err_clear_driver_data: + device->driver_data = NULL; + free_cpumask_var(pr->throttling.shared_cpu_map); err_free_pr: kfree(pr); return result; -- cgit v1.2.3 From cd9239660b8c0357a7338a33d40a429e48273e77 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:33 +0100 Subject: ACPI: processor: Move checks and availability of acpi_processor earlier Make the per_cpu(processors, cpu) entries available earlier so that they are available in arch_register_cpu() as ARM64 will need access to the acpi_handle to distinguish between acpi_processor_add() and earlier registration attempts (which will fail as _STA cannot be checked). Reorder the remove flow to clear this per_cpu() after arch_unregister_cpu() has completed, allowing it to be used in there as well. Note that on x86 for the CPU hotplug case, the pr->id prior to acpi_map_cpu() may be invalid. Thus the per_cpu() structures must be initialized after that call or after checking the ID is valid (not hotplug path). Acked-by: Rafael J. Wysocki Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-7-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 88 +++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 16e36e55a560..0bc0419adf6b 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -183,8 +183,38 @@ static void __init acpi_pcc_cpufreq_init(void) {} #endif /* CONFIG_X86 */ /* Initialization */ +static DEFINE_PER_CPU(void *, processor_device_array); + +static int acpi_processor_set_per_cpu(struct acpi_processor *pr, + struct acpi_device *device) +{ + BUG_ON(pr->id >= nr_cpu_ids); + + /* + * Buggy BIOS check. + * ACPI id of processors can be reported wrongly by the BIOS. + * Don't trust it blindly + */ + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { + dev_warn(&device->dev, + "BIOS reported wrong ACPI id %d for the processor\n", + pr->id); + return -EINVAL; + } + /* + * processor_device_array is not cleared on errors to allow buggy BIOS + * checks. + */ + per_cpu(processor_device_array, pr->id) = device; + per_cpu(processors, pr->id) = pr; + + return 0; +} + #ifdef CONFIG_ACPI_HOTPLUG_CPU -static int acpi_processor_hotadd_init(struct acpi_processor *pr) +static int acpi_processor_hotadd_init(struct acpi_processor *pr, + struct acpi_device *device) { int ret; @@ -198,8 +228,16 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) if (ret) goto out; + ret = acpi_processor_set_per_cpu(pr, device); + if (ret) { + acpi_unmap_cpu(pr->id); + goto out; + } + ret = arch_register_cpu(pr->id); if (ret) { + /* Leave the processor device array in place to detect buggy bios */ + per_cpu(processors, pr->id) = NULL; acpi_unmap_cpu(pr->id); goto out; } @@ -217,7 +255,8 @@ out: return ret; } #else -static inline int acpi_processor_hotadd_init(struct acpi_processor *pr) +static inline int acpi_processor_hotadd_init(struct acpi_processor *pr, + struct acpi_device *device) { return -ENODEV; } @@ -232,6 +271,7 @@ static int acpi_processor_get_info(struct acpi_device *device) acpi_status status = AE_OK; static int cpu0_initialized; unsigned long long value; + int ret; acpi_processor_errata(); @@ -315,12 +355,12 @@ static int acpi_processor_get_info(struct acpi_device *device) * NOTE: Even if the processor has a cpuid, it may not be present * because cpuid <-> apicid mapping is persistent now. */ - if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) { - int ret = acpi_processor_hotadd_init(pr); - - if (ret) - return ret; - } + if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) + ret = acpi_processor_hotadd_init(pr, device); + else + ret = acpi_processor_set_per_cpu(pr, device); + if (ret) + return ret; /* * On some boxes several processors use the same processor bus id. @@ -365,8 +405,6 @@ static int acpi_processor_get_info(struct acpi_device *device) * (cpu_data(cpu)) values, like CPU feature flags, family, model, etc. * Such things have to be put in and set up by the processor driver's .probe(). */ -static DEFINE_PER_CPU(void *, processor_device_array); - static int acpi_processor_add(struct acpi_device *device, const struct acpi_device_id *id) { @@ -395,28 +433,6 @@ static int acpi_processor_add(struct acpi_device *device, if (result) /* Processor is not physically present or unavailable */ goto err_clear_driver_data; - BUG_ON(pr->id >= nr_cpu_ids); - - /* - * Buggy BIOS check. - * ACPI id of processors can be reported wrongly by the BIOS. - * Don't trust it blindly - */ - if (per_cpu(processor_device_array, pr->id) != NULL && - per_cpu(processor_device_array, pr->id) != device) { - dev_warn(&device->dev, - "BIOS reported wrong ACPI id %d for the processor\n", - pr->id); - /* Give up, but do not abort the namespace scan. */ - goto err_clear_driver_data; - } - /* - * processor_device_array is not cleared on errors to allow buggy BIOS - * checks. - */ - per_cpu(processor_device_array, pr->id) = device; - per_cpu(processors, pr->id) = pr; - dev = get_cpu_device(pr->id); if (!dev) { result = -ENODEV; @@ -470,10 +486,6 @@ static void acpi_processor_remove(struct acpi_device *device) device_release_driver(pr->dev); acpi_unbind_one(pr->dev); - /* Clean up. */ - per_cpu(processor_device_array, pr->id) = NULL; - per_cpu(processors, pr->id) = NULL; - cpu_maps_update_begin(); cpus_write_lock(); @@ -481,6 +493,10 @@ static void acpi_processor_remove(struct acpi_device *device) arch_unregister_cpu(pr->id); acpi_unmap_cpu(pr->id); + /* Clean up. */ + per_cpu(processor_device_array, pr->id) = NULL; + per_cpu(processors, pr->id) = NULL; + cpus_write_unlock(); cpu_maps_update_done(); -- cgit v1.2.3 From 36b921637e900c77f5f9bd5b45db522124068a96 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:34 +0100 Subject: ACPI: processor: Add acpi_get_processor_handle() helper If CONFIG_ACPI_PROCESSOR provide a helper to retrieve the acpi_handle for a given CPU allowing access to methods in DSDT. Tested-by: Miguel Luis Reviewed-by: Gavin Shan Acked-by: Rafael J. Wysocki Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-8-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 11 +++++++++++ include/linux/acpi.h | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 0bc0419adf6b..56592462d2fb 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -35,6 +35,17 @@ EXPORT_PER_CPU_SYMBOL(processors); struct acpi_processor_errata errata __read_mostly; EXPORT_SYMBOL_GPL(errata); +acpi_handle acpi_get_processor_handle(int cpu) +{ + struct acpi_processor *pr; + + pr = per_cpu(processors, cpu); + if (pr) + return pr->handle; + + return NULL; +} + static int acpi_processor_errata_piix4(struct pci_dev *dev) { u8 value1 = 0; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..fd45bfab66b8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -304,6 +304,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ +acpi_handle acpi_get_processor_handle(int cpu); + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif @@ -1076,6 +1078,11 @@ static inline bool acpi_sleep_state_supported(u8 sleep_state) return false; } +static inline acpi_handle acpi_get_processor_handle(int cpu) +{ + return NULL; +} + #endif /* !CONFIG_ACPI */ extern void arch_post_acpi_subsys_init(void); -- cgit v1.2.3 From b398a91decd9d4b399389a12675ea7716039b964 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:35 +0100 Subject: ACPI: processor: Register deferred CPUs from acpi_processor_get_info() The arm64 specific arch_register_cpu() call may defer CPU registration until the ACPI interpreter is available and the _STA method can be evaluated. If this occurs, then a second attempt is made in acpi_processor_get_info(). Note that the arm64 specific call has not yet been added so for now this will be called for the original hotplug case. For architectures that do not defer until the ACPI Processor driver loads (e.g. x86), for initially present CPUs there will already be a CPU device. If present do not try to register again. Systems can still be booted with 'acpi=off', or not include an ACPI description at all as in these cases arch_register_cpu() will not have deferred registration when first called. This moves the CPU register logic back to a subsys_initcall(), while the memory nodes will have been registered earlier. Note this is where the call was prior to the cleanup series so there should be no side effects of moving it back again for this specific case. [PATCH 00/21] Initial cleanups for vCPU HP. https://lore.kernel.org/all/ZVyz%2FVe5pPu8AWoA@shell.armlinux.org.uk/ commit 5b95f94c3b9f ("x86/topology: Switch over to GENERIC_CPU_DEVICES") Signed-off-by: James Morse Signed-off-by: Russell King (Oracle) Reviewed-by: Gavin Shan Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Reviewed-by: Hanjun Guo Acked-by: Rafael J. Wysocki Co-developed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-9-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 56592462d2fb..f0b7a789f0a1 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -359,14 +359,14 @@ static int acpi_processor_get_info(struct acpi_device *device) } /* - * Extra Processor objects may be enumerated on MP systems with - * less than the max # of CPUs. They should be ignored _iff - * they are physically not present. - * - * NOTE: Even if the processor has a cpuid, it may not be present - * because cpuid <-> apicid mapping is persistent now. + * This code is not called unless we know the CPU is present and + * enabled. The two paths are: + * a) Initially present CPUs on architectures that do not defer + * their arch_register_cpu() calls until this point. + * b) Hotplugged CPUs (enabled bit in _STA has transitioned from not + * enabled to enabled) */ - if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) + if (!get_cpu_device(pr->id)) ret = acpi_processor_hotadd_init(pr, device); else ret = acpi_processor_set_per_cpu(pr, device); -- cgit v1.2.3 From 1859a671bdb9b3df114c3fdf85b1032a9a8d208d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:36 +0100 Subject: ACPI: scan: switch to flags for acpi_scan_check_and_detach() Precursor patch adds the ability to pass a uintptr_t of flags into acpi_scan_check_and detach() so that additional flags can be added to indicate whether to defer portions of the eject flow. The new flag follows in the next patch. Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Reviewed-by: Gavin Shan Tested-by: Miguel Luis Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-10-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/scan.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 503773707e01..4d649ce5876f 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -243,13 +243,16 @@ static int acpi_scan_try_to_offline(struct acpi_device *device) return 0; } -static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check) +#define ACPI_SCAN_CHECK_FLAG_STATUS BIT(0) + +static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p) { struct acpi_scan_handler *handler = adev->handler; + uintptr_t flags = (uintptr_t)p; - acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, check); + acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p); - if (check) { + if (flags & ACPI_SCAN_CHECK_FLAG_STATUS) { acpi_bus_get_status(adev); /* * Skip devices that are still there and take the enabled @@ -287,7 +290,9 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check) static void acpi_scan_check_subtree(struct acpi_device *adev) { - acpi_scan_check_and_detach(adev, (void *)true); + uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS; + + acpi_scan_check_and_detach(adev, (void *)flags); } static int acpi_scan_hot_remove(struct acpi_device *device) @@ -2596,7 +2601,9 @@ EXPORT_SYMBOL(acpi_bus_scan); */ void acpi_bus_trim(struct acpi_device *adev) { - acpi_scan_check_and_detach(adev, NULL); + uintptr_t flags = 0; + + acpi_scan_check_and_detach(adev, (void *)flags); } EXPORT_SYMBOL_GPL(acpi_bus_trim); -- cgit v1.2.3 From 3b9d0a78aeda194994892f7c42f6ca5feb45eadd Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:37 +0100 Subject: ACPI: Add post_eject to struct acpi_scan_handler for cpu hotplug struct acpi_scan_handler has a detach callback that is used to remove a driver when a bus is changed. When interacting with an eject-request, the detach callback is called before _EJ0. This means the ACPI processor driver can't use _STA to determine if a CPU has been made not-present, or some of the other _STA bits have been changed. acpi_processor_remove() needs to know the value of _STA after _EJ0 has been called. Add a post_eject callback to struct acpi_scan_handler. This is called after acpi_scan_hot_remove() has successfully called _EJ0. Because acpi_scan_check_and_detach() also clears the handler pointer, it needs to be told if the caller will go on to call acpi_bus_post_eject(), so that acpi_device_clear_enumerated() and clearing the handler pointer can be deferred. An extra flag is added to flags field introduced in the previous patch to achieve this. Signed-off-by: James Morse Reviewed-by: Gavin Shan Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-11-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/acpi/acpi_processor.c | 4 ++-- drivers/acpi/scan.c | 30 +++++++++++++++++++++++++++--- include/acpi/acpi_bus.h | 1 + 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index f0b7a789f0a1..64e10bad8072 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -475,7 +475,7 @@ static int acpi_processor_add(struct acpi_device *device, #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Removal */ -static void acpi_processor_remove(struct acpi_device *device) +static void acpi_processor_post_eject(struct acpi_device *device) { struct acpi_processor *pr; @@ -643,7 +643,7 @@ static struct acpi_scan_handler processor_handler = { .ids = processor_device_ids, .attach = acpi_processor_add, #ifdef CONFIG_ACPI_HOTPLUG_CPU - .detach = acpi_processor_remove, + .post_eject = acpi_processor_post_eject, #endif .hotplug = { .enabled = true, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 4d649ce5876f..0aa20623525a 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -244,6 +244,7 @@ static int acpi_scan_try_to_offline(struct acpi_device *device) } #define ACPI_SCAN_CHECK_FLAG_STATUS BIT(0) +#define ACPI_SCAN_CHECK_FLAG_EJECT BIT(1) static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p) { @@ -272,8 +273,6 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p) if (handler) { if (handler->detach) handler->detach(adev); - - adev->handler = NULL; } else { device_release_driver(&adev->dev); } @@ -283,6 +282,28 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p) */ acpi_device_set_power(adev, ACPI_STATE_D3_COLD); adev->flags.initialized = false; + + /* For eject this is deferred to acpi_bus_post_eject() */ + if (!(flags & ACPI_SCAN_CHECK_FLAG_EJECT)) { + adev->handler = NULL; + acpi_device_clear_enumerated(adev); + } + return 0; +} + +static int acpi_bus_post_eject(struct acpi_device *adev, void *not_used) +{ + struct acpi_scan_handler *handler = adev->handler; + + acpi_dev_for_each_child_reverse(adev, acpi_bus_post_eject, NULL); + + if (handler) { + if (handler->post_eject) + handler->post_eject(adev); + + adev->handler = NULL; + } + acpi_device_clear_enumerated(adev); return 0; @@ -300,6 +321,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_handle handle = device->handle; unsigned long long sta; acpi_status status; + uintptr_t flags = ACPI_SCAN_CHECK_FLAG_EJECT; if (device->handler && device->handler->hotplug.demand_offline) { if (!acpi_scan_is_offline(device, true)) @@ -312,7 +334,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_handle_debug(handle, "Ejecting\n"); - acpi_bus_trim(device); + acpi_scan_check_and_detach(device, (void *)flags); acpi_evaluate_lck(handle, 0); /* @@ -335,6 +357,8 @@ static int acpi_scan_hot_remove(struct acpi_device *device) } else if (sta & ACPI_STA_DEVICE_ENABLED) { acpi_handle_warn(handle, "Eject incomplete - status 0x%llx\n", sta); + } else { + acpi_bus_post_eject(device, NULL); } return 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1a4dfd7a1c4a..05ca49478537 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -134,6 +134,7 @@ struct acpi_scan_handler { bool (*match)(const char *idstr, const struct acpi_device_id **matchid); int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); void (*detach)(struct acpi_device *dev); + void (*post_eject)(struct acpi_device *dev); void (*bind)(struct device *phys_dev); void (*unbind)(struct device *phys_dev); struct acpi_hotplug_profile hotplug; -- cgit v1.2.3 From 8d34b6f17b9ac93faa2791eb037dcb08bdf755de Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:38 +0100 Subject: arm64: acpi: Move get_cpu_for_acpi_id() to a header ACPI identifies CPUs by UID. get_cpu_for_acpi_id() maps the ACPI UID to the Linux CPU number. The helper to retrieve this mapping is only available in arm64's NUMA code. Move it to live next to get_acpi_id_for_cpu(). Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Signed-off-by: Russell King (Oracle) Acked-by: Hanjun Guo Signed-off-by: Jonathan Cameron Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20240529133446.28446-12-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 11 +++++++++++ arch/arm64/kernel/acpi_numa.c | 11 ----------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 6792a1f83f2a..bc9a6656fc0c 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -119,6 +119,17 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index e51535a5f939..0c036a9a3c33 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu) return acpi_early_node_map[cpu]; } -static inline int get_cpu_for_acpi_id(u32 uid) -{ - int cpu; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) - return cpu; - - return -EINVAL; -} - static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { -- cgit v1.2.3 From 2488444274c70038eb6b686cba5f1ce48ebb9cdd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:39 +0100 Subject: arm64: acpi: Harden get_cpu_for_acpi_id() against missing CPU entry In a review discussion of the changes to support vCPU hotplug where a check was added on the GICC being enabled if was online, it was noted that there is need to map back to the cpu and use that to index into a cpumask. As such, a valid ID is needed. If an MPIDR check fails in acpi_map_gic_cpu_interface() it is possible for the entry in cpu_madt_gicc[cpu] == NULL. This function would then cause a NULL pointer dereference. Whilst a path to trigger this has not been established, harden this caller against the possibility. Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-13-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index bc9a6656fc0c..a407f9cd549e 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -124,7 +124,8 @@ static inline int get_cpu_for_acpi_id(u32 uid) int cpu; for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) + if (acpi_cpu_get_madt_gicc(cpu) && + uid == get_acpi_id_for_cpu(cpu)) return cpu; return -EINVAL; -- cgit v1.2.3 From fa2dabe57220e6af78ed7a2f7016bf250a618204 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:40 +0100 Subject: irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc() gic_acpi_match_gicc() is only called via gic_acpi_count_gicr_regions(). It should only count the number of enabled redistributors, but it also tries to sanity check the GICC entry, currently returning an error if the Enabled bit is set, but the gicr_base_address is zero. Adding support for the online-capable bit to the sanity check will complicate it, for no benefit. The existing check implicitly depends on gic_acpi_count_gicr_regions() previous failing to find any GICR regions (as it is valid to have gicr_base_address of zero if the redistributors are described via a GICR entry). Instead of complicating the check, remove it. Failures that happen at this point cause the irqchip not to register, meaning no irqs can be requested. The kernel grinds to a panic() pretty quickly. Without the check, MADT tables that exhibit this problem are still caught by gic_populate_rdist(), which helpfully also prints what went wrong: | CPU4: mpidr 100 has no re-distributor! Signed-off-by: James Morse Reviewed-by: Gavin Shan Tested-by: Miguel Luis Signed-off-by: Russell King (Oracle) Reviewed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240529133446.28446-14-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- drivers/irqchip/irq-gic-v3.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 6fb276504bcc..10af15f93d4d 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -2415,19 +2415,10 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) { + if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) acpi_data.enabled_rdists++; - return 0; - } - /* - * It's perfectly valid firmware can pass disabled GICC entry, driver - * should not treat as errors, skip the entry instead of probe fail. - */ - if (!acpi_gicc_is_usable(gicc)) - return 0; - - return -ENODEV; + return 0; } static int __init gic_acpi_count_gicr_regions(void) -- cgit v1.2.3 From d633da5d3ab1a0eb26a2213d65da1e189e82f8ab Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:41 +0100 Subject: irqchip/gic-v3: Add support for ACPI's disabled but 'online capable' CPUs To support virtual CPU hotplug, ACPI has added an 'online capable' bit to the MADT GICC entries. This indicates a disabled CPU entry may not be possible to online via PSCI until firmware has set enabled bit in _STA. This means that a "usable" GIC redistributor is one that is marked as either enabled, or online capable. The meaning of the acpi_gicc_is_usable() would become less clear than just checking the pair of flags at call sites. As such, drop that helper function. The test in gic_acpi_match_gicc() remains as testing just the enabled bit so the count of enabled distributors is correct. What about the redistributor in the GICC entry? ACPI doesn't want to say. Assume the worst: When a redistributor is described in the GICC entry, but the entry is marked as disabled at boot, assume the redistributor is inaccessible. The GICv3 driver doesn't support late online of redistributors, so this means the corresponding CPU can't be brought online either. Rather than modifying cpu masks that may already have been used, register a new cpuhp callback to fail this case. This must run earlier than the main gic_starting_cpu() so that this case can be rejected before the section of cpuhp that runs on the CPU that is coming up as that is not allowed to fail. This solution keeps the handling of this broken firmware corner case local to the GIC driver. As precise ordering of this callback doesn't need to be controlled as long as it is in that initial prepare phase, use CPUHP_BP_PREPARE_DYN. Systems that want CPU hotplug in a VM can ensure their redistributors are always-on, and describe them that way with a GICR entry in the MADT. Suggested-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Russell King (Oracle) Tested-by: Miguel Luis Co-developed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240529133446.28446-15-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 3 ++- drivers/acpi/processor_core.c | 3 ++- drivers/irqchip/irq-gic-v3.c | 46 +++++++++++++++++++++++++++++++++++++++---- include/linux/acpi.h | 5 ----- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 31c8b3094dd7..6b0572daa9da 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -530,7 +530,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (!acpi_gicc_is_usable(processor)) { + if (!(processor->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) { pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index b203cfe28550..b04b684f3190 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -90,7 +90,8 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, struct acpi_madt_generic_interrupt *gicc = container_of(entry, struct acpi_madt_generic_interrupt, header); - if (!acpi_gicc_is_usable(gicc)) + if (!(gicc->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) return -ENODEV; /* device_declaration means Device object in DSDT, in the diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 10af15f93d4d..cc81515c1413 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -44,6 +44,8 @@ #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) +static struct cpumask broken_rdists __read_mostly; + struct redist_region { void __iomem *redist_base; phys_addr_t phys_base; @@ -1293,6 +1295,18 @@ static void gic_cpu_init(void) #define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) #define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL) +/* + * gic_starting_cpu() is called after the last point where cpuhp is allowed + * to fail. So pre check for problems earlier. + */ +static int gic_check_rdist(unsigned int cpu) +{ + if (cpumask_test_cpu(cpu, &broken_rdists)) + return -EINVAL; + + return 0; +} + static int gic_starting_cpu(unsigned int cpu) { gic_cpu_init(); @@ -1384,6 +1398,10 @@ static void __init gic_smp_init(void) }; int base_sgi; + cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, + "irqchip/arm/gicv3:checkrdist", + gic_check_rdist, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, "irqchip/arm/gicv3:starting", gic_starting_cpu, NULL); @@ -2365,8 +2383,24 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; void __iomem *redist_base; - if (!acpi_gicc_is_usable(gicc)) + /* Neither enabled or online capable means it doesn't exist, skip it */ + if (!(gicc->flags & (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) + return 0; + + /* + * Capable but disabled CPUs can be brought online later. What about + * the redistributor? ACPI doesn't want to say! + * Virtual hotplug systems can use the MADT's "always-on" GICR entries. + * Otherwise, prevent such CPUs from being brought online. + */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) { + int cpu = get_cpu_for_acpi_id(gicc->uid); + + pr_warn("CPU %u's redistributor is inaccessible: this CPU can't be brought online\n", cpu); + if (cpu >= 0) + cpumask_set_cpu(cpu, &broken_rdists); return 0; + } redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) @@ -2413,9 +2447,12 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, /* * If GICC is enabled and has valid gicr base address, then it means - * GICR base is presented via GICC + * GICR base is presented via GICC. The redistributor is only known to + * be accessible if the GICC is marked as enabled. If this bit is not + * set, we'd need to add the redistributor at runtime, which isn't + * supported. */ - if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) + if (gicc->flags & ACPI_MADT_ENABLED && gicc->gicr_base_address) acpi_data.enabled_rdists++; return 0; @@ -2474,7 +2511,8 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea int maint_irq_mode; static int first_madt = true; - if (!acpi_gicc_is_usable(gicc)) + if (!(gicc->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) return 0; maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fd45bfab66b8..9f8c9d29b035 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -237,11 +237,6 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); -static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) -{ - return gicc->flags & ACPI_MADT_ENABLED; -} - #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else -- cgit v1.2.3 From 643e12da4a4983a34658e33f2b1581caee9cac8c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 29 May 2024 14:34:42 +0100 Subject: arm64: psci: Ignore DENIED CPUs When a CPU is marked as disabled, but online capable in the MADT, PSCI applies some firmware policy to control when it can be brought online. PSCI returns DENIED to a CPU_ON request if this is not currently permitted. The OS can learn the current policy from the _STA enabled bit. Handle the PSCI DENIED return code gracefully instead of printing an error. Note the alternatives to the PSCI cpu_boot() callback do not return -EPERM so the change in smp.c has no affect. See https://developer.arm.com/documentation/den0022/f/?lang=en page 58. Signed-off-by: Jean-Philippe Brucker [ morse: Rewrote commit message ] Signed-off-by: James Morse Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Reviewed-by: Gavin Shan Signed-off-by: Russell King (Oracle) Signed-off-by: Jonathan Cameron Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-16-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/psci.c | 2 +- arch/arm64/kernel/smp.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 29a8e444db83..fabd732d0a2d 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry); int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry); - if (err) + if (err && err != -EPERM) pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6b0572daa9da..a755b9f902fa 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -132,7 +132,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); if (ret) { - pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + if (ret != -EPERM) + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } -- cgit v1.2.3 From eba4675008a6e4cbff27b579a837bd29be2642de Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:43 +0100 Subject: arm64: arch_register_cpu() variant to check if an ACPI handle is now available. The ARM64 architecture does not support physical CPU HP today. To avoid any possibility of a bug against such an architecture if defined in future, check for the physical CPU HP case (not present) and return an error on any such attempt. On ARM64 virtual CPU Hotplug relies on the status value that can be queried via the AML method _STA for the CPU object. There are two conditions in which the CPU can be registered. 1) ACPI disabled. 2) ACPI enabled and the acpi_handle is available. _STA evaluates to the CPU is both enabled and present. (Note that in absence of the _STA method they are always in this state). If neither of these conditions is met the CPU is not 'yet' ready to be used and -EPROBE_DEFER is returned. Success occurs in the early attempt to register the CPUs if we are booting with DT (no concept yet of vCPU HP) if not it succeeds for already enabled CPUs when the ACPI Processor driver attaches to them. Finally it may succeed via the CPU Hotplug code indicating that the CPU is now enabled. For ACPI if CONFIG_ACPI_PROCESSOR the only path to get to arch_register_cpu() with that handle set is via acpi_processor_hot_add_init() which is only called from an ACPI bus scan in which _STA has already been queried there is no need to repeat it here. Add a comment to remind us of this in the future. Suggested-by: Rafael J. Wysocki Tested-by: Miguel Luis Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-17-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a755b9f902fa..1c84375f978e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -511,6 +511,59 @@ static int __init smp_cpu_setup(int cpu) static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; +int arch_register_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (!acpi_disabled && !acpi_handle && + IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU)) + return -EPROBE_DEFER; + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + /* For now block anything that looks like physical CPU Hotplug */ + if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return -ENODEV; + } +#endif + + /* + * Availability of the acpi handle is sufficient to establish + * that _STA has aleady been checked. No need to recheck here. + */ + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); + + return register_cpu(c, cpu); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +void arch_unregister_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + acpi_status status; + unsigned long long sta; + + if (!acpi_handle) { + pr_err_once("Removing a CPU without associated ACPI handle\n"); + return; + } + + status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return; + + /* For now do not allow anything that looks like physical CPU HP */ + if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return; + } + + unregister_cpu(c); +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + #ifdef CONFIG_ACPI static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; -- cgit v1.2.3 From 9d0873892f4d4bf62a351897e91e5169b4c6f4aa Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:44 +0100 Subject: arm64: Kconfig: Enable hotplug CPU on arm64 if ACPI_PROCESSOR is enabled. In order to move arch_register_cpu() to be called via the same path for initially present CPUs described by ACPI and hotplugged CPUs ACPI_HOTPLUG_CPU needs to be enabled. The protection against invalid IDs in acpi_map_cpu() is needed as at least one production BIOS is in the wild which reports entries in DSDT (with no _STA method, so assumed enabled and present) that don't match MADT. Tested-by: Miguel Luis Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-18-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/acpi.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d91259ee7b5..e8f2ef2312db 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -5,6 +5,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if (ACPI && PCI) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e0e7b93c16cc..9360ba86678b 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -423,6 +424,27 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size) memblock_mark_nomap(addr, size); } +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id, + int *pcpu) +{ + /* If an error code is passed in this stub can't fix it */ + if (*pcpu < 0) { + pr_warn_once("Unable to map CPU to valid ID\n"); + return *pcpu; + } + + return 0; +} +EXPORT_SYMBOL(acpi_map_cpu); + +int acpi_unmap_cpu(int cpu) +{ + return 0; +} +EXPORT_SYMBOL(acpi_unmap_cpu); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + #ifdef CONFIG_ACPI_FFH /* * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as -- cgit v1.2.3 From 828ce929d1c3aff0e9a330fea40d29980e17e256 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:45 +0100 Subject: arm64: document virtual CPU hotplug's expectations Add a description of physical and virtual CPU hotplug, explain the differences and elaborate on what is required in ACPI for a working virtual hotplug system. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Signed-off-by: Russell King (Oracle) Tested-by: Miguel Luis Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-19-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/cpu-hotplug.rst | 79 ++++++++++++++++++++++++++++++++ Documentation/arch/arm64/index.rst | 1 + 2 files changed, 80 insertions(+) create mode 100644 Documentation/arch/arm64/cpu-hotplug.rst diff --git a/Documentation/arch/arm64/cpu-hotplug.rst b/Documentation/arch/arm64/cpu-hotplug.rst new file mode 100644 index 000000000000..76ba8d932c72 --- /dev/null +++ b/Documentation/arch/arm64/cpu-hotplug.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _cpuhp_index: + +==================== +CPU Hotplug and ACPI +==================== + +CPU hotplug in the arm64 world is commonly used to describe the kernel taking +CPUs online/offline using PSCI. This document is about ACPI firmware allowing +CPUs that were not available during boot to be added to the system later. + +``possible`` and ``present`` refer to the state of the CPU as seen by linux. + + +CPU Hotplug on physical systems - CPUs not present at boot +---------------------------------------------------------- + +Physical systems need to mark a CPU that is ``possible`` but not ``present`` as +being ``present``. An example would be a dual socket machine, where the package +in one of the sockets can be replaced while the system is running. + +This is not supported. + +In the arm64 world CPUs are not a single device but a slice of the system. +There are no systems that support the physical addition (or removal) of CPUs +while the system is running, and ACPI is not able to sufficiently describe +them. + +e.g. New CPUs come with new caches, but the platform's cache toplogy is +described in a static table, the PPTT. How caches are shared between CPUs is +not discoverable, and must be described by firmware. + +e.g. The GIC redistributor for each CPU must be accessed by the driver during +boot to discover the system wide supported features. ACPI's MADT GICC +structures can describe a redistributor associated with a disabled CPU, but +can't describe whether the redistributor is accessible, only that it is not +'always on'. + +arm64's ACPI tables assume that everything described is ``present``. + + +CPU Hotplug on virtual systems - CPUs not enabled at boot +--------------------------------------------------------- + +Virtual systems have the advantage that all the properties the system will +ever have can be described at boot. There are no power-domain considerations +as such devices are emulated. + +CPU Hotplug on virtual systems is supported. It is distinct from physical +CPU Hotplug as all resources are described as ``present``, but CPUs may be +marked as disabled by firmware. Only the CPU's online/offline behaviour is +influenced by firmware. An example is where a virtual machine boots with a +single CPU, and additional CPUs are added once a cloud orchestrator deploys +the workload. + +For a virtual machine, the VMM (e.g. Qemu) plays the part of firmware. + +Virtual hotplug is implemented as a firmware policy affecting which CPUs can be +brought online. Firmware can enforce its policy via PSCI's return codes. e.g. +``DENIED``. + +The ACPI tables must describe all the resources of the virtual machine. CPUs +that firmware wishes to disable either from boot (or later) should not be +``enabled`` in the MADT GICC structures, but should have the ``online capable`` +bit set, to indicate they can be enabled later. The boot CPU must be marked as +``enabled``. The 'always on' GICR structure must be used to describe the +redistributors. + +CPUs described as ``online capable`` but not ``enabled`` can be set to enabled +by the DSDT's Processor object's _STA method. On virtual systems the _STA method +must always report the CPU as ``present``. Changes to the firmware policy can +be notified to the OS via device-check or eject-request. + +CPUs described as ``enabled`` in the static table, should not have their _STA +modified dynamically by firmware. Soft-restart features such as kexec will +re-read the static properties of the system from these static tables, and +may malfunction if these no longer describe the running system. Linux will +re-discover the dynamic properties of the system from the _STA method later +during boot. diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index d08e924204bf..78544de0a8a9 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -13,6 +13,7 @@ ARM64 Architecture asymmetric-32bit booting cpu-feature-registers + cpu-hotplug elf_hwcaps hugetlbpage kdump -- cgit v1.2.3 From 4e1a7df4548003fc081360b0f4edce3f7a991bfc Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:46 +0100 Subject: cpumask: Add enabled cpumask for present CPUs that can be brought online The 'offline' file in sysfs shows all offline CPUs, including those that aren't present. User-space is expected to remove not-present CPUs from this list to learn which CPUs could be brought online. CPUs can be present but not-enabled. These CPUs can't be brought online until the firmware policy changes, which comes with an ACPI notification that will register the CPUs. With only the offline and present files, user-space is unable to determine which CPUs it can try to bring online. Add a new CPU mask that shows this based on all the registered CPUs. Signed-off-by: James Morse Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Acked-by: Thomas Gleixner Signed-off-by: Russell King (Oracle) Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-20-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- Documentation/ABI/testing/sysfs-devices-system-cpu | 6 ++++++ drivers/base/cpu.c | 10 +++++++++ include/linux/cpumask.h | 25 ++++++++++++++++++++++ kernel/cpu.c | 3 +++ 4 files changed, 44 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index e7e160954e79..53ed1a803422 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -694,3 +694,9 @@ Description: (RO) indicates whether or not the kernel directly supports modifying the crash elfcorehdr for CPU hot un/plug and/or on/offline changes. + +What: /sys/devices/system/cpu/enabled +Date: Nov 2022 +Contact: Linux kernel mailing list +Description: + (RO) the list of CPUs that can be brought online. diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 1487cd9761a4..b57326fd48d4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -95,6 +95,7 @@ void unregister_cpu(struct cpu *cpu) { int logical_cpu = cpu->dev.id; + set_cpu_enabled(logical_cpu, false); unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); device_unregister(&cpu->dev); @@ -273,6 +274,13 @@ static ssize_t print_cpus_offline(struct device *dev, } static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); +static ssize_t print_cpus_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask)); +} +static DEVICE_ATTR(enabled, 0444, print_cpus_enabled, NULL); + static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { @@ -413,6 +421,7 @@ int register_cpu(struct cpu *cpu, int num) register_cpu_under_node(num, cpu_to_node(num)); dev_pm_qos_expose_latency_limit(&cpu->dev, PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); + set_cpu_enabled(num, true); return 0; } @@ -494,6 +503,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, + &dev_attr_enabled.attr, &dev_attr_isolated.attr, #ifdef CONFIG_NO_HZ_FULL &dev_attr_nohz_full.attr, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..954d4adc8f81 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -93,6 +93,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * @@ -125,11 +126,13 @@ static inline void set_nr_cpu_ids(unsigned int nr) extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; +extern struct cpumask __cpu_enabled_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) +#define cpu_enabled_mask ((const struct cpumask *)&__cpu_enabled_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) @@ -1075,6 +1078,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) +#define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) #endif @@ -1092,6 +1096,15 @@ set_cpu_possible(unsigned int cpu, bool possible) cpumask_clear_cpu(cpu, &__cpu_possible_mask); } +static inline void +set_cpu_enabled(unsigned int cpu, bool can_be_onlined) +{ + if (can_be_onlined) + cpumask_set_cpu(cpu, &__cpu_enabled_mask); + else + cpumask_clear_cpu(cpu, &__cpu_enabled_mask); +} + static inline void set_cpu_present(unsigned int cpu, bool present) { @@ -1173,6 +1186,7 @@ static __always_inline unsigned int num_online_cpus(void) return raw_atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_enabled_cpus() cpumask_weight(cpu_enabled_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) @@ -1181,6 +1195,11 @@ static inline bool cpu_online(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_online_mask); } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_enabled_mask); +} + static inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); @@ -1205,6 +1224,7 @@ static inline bool cpu_dying(unsigned int cpu) #define num_online_cpus() 1U #define num_possible_cpus() 1U +#define num_enabled_cpus() 1U #define num_present_cpus() 1U #define num_active_cpus() 1U @@ -1218,6 +1238,11 @@ static inline bool cpu_possible(unsigned int cpu) return cpu == 0; } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpu == 0; +} + static inline bool cpu_present(unsigned int cpu) { return cpu == 0; diff --git a/kernel/cpu.c b/kernel/cpu.c index 563877d6c28b..a2f3ad276814 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3069,6 +3069,9 @@ EXPORT_SYMBOL(__cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); +struct cpumask __cpu_enabled_mask __read_mostly; +EXPORT_SYMBOL(__cpu_enabled_mask); + struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask); -- cgit v1.2.3 From 4a112585ebe8cb89edf1cae5ba41940c3b9ae307 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Mon, 17 Jun 2024 17:50:55 -0700 Subject: perf/arm-cmn: Decouple wp_config registers from filter group number Previously, wp_config0/2 registers were used for primary match group and wp_config1/3 registers for secondary match group. In order to support tertiary match group, this patch decouples the registers and the groups. Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20240618005056.3092866-2-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 97 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index e26ad1d3ed0b..f2349d23df57 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -590,6 +590,13 @@ struct arm_cmn_hw_event { s8 dtc_idx[CMN_MAX_DTCS]; u8 num_dns; u8 dtm_offset; + + /* + * WP config registers are divided to UP and DOWN events. We need to + * keep to track only one of them. + */ + DECLARE_BITMAP(wp_idx, CMN_MAX_XPS); + bool wide_sel; enum cmn_filter_select filter_sel; }; @@ -617,6 +624,17 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos) return (x[pos / 32] >> ((pos % 32) * 2)) & 3; } +static void arm_cmn_set_wp_idx(unsigned long *wp_idx, unsigned int pos, bool val) +{ + if (val) + set_bit(pos, wp_idx); +} + +static unsigned int arm_cmn_get_wp_idx(unsigned long *wp_idx, unsigned int pos) +{ + return test_bit(pos, wp_idx); +} + struct arm_cmn_event_attr { struct device_attribute attr; enum cmn_model model; @@ -1336,9 +1354,34 @@ static const struct attribute_group *arm_cmn_attr_groups[] = { NULL }; -static int arm_cmn_wp_idx(struct perf_event *event) +static int arm_cmn_find_free_wp_idx(struct arm_cmn_dtm *dtm, + struct perf_event *event) +{ + int wp_idx = CMN_EVENT_EVENTID(event); + + if (dtm->wp_event[wp_idx] >= 0) + if (dtm->wp_event[++wp_idx] >= 0) + return -ENOSPC; + + return wp_idx; +} + +static int arm_cmn_get_assigned_wp_idx(struct perf_event *event, + struct arm_cmn_hw_event *hw, + unsigned int pos) { - return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event); + return CMN_EVENT_EVENTID(event) + arm_cmn_get_wp_idx(hw->wp_idx, pos); +} + +static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm, + struct perf_event *event, + unsigned int dtc, int wp_idx, + unsigned int pos) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + + dtm->wp_event[wp_idx] = hw->dtc_idx[dtc]; + arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event)); } static u32 arm_cmn_wp_config(struct perf_event *event) @@ -1520,12 +1563,12 @@ static void arm_cmn_event_start(struct perf_event *event, int flags) writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); cmn->dtc[i].cc_active = true; } else if (type == CMN_TYPE_WP) { - int wp_idx = arm_cmn_wp_idx(event); u64 val = CMN_EVENT_WP_VAL(event); u64 mask = CMN_EVENT_WP_MASK(event); for_each_hw_dn(hw, dn, i) { void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset); + int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i); writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx)); writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx)); @@ -1550,10 +1593,9 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) i = hw->dtc_idx[0]; cmn->dtc[i].cc_active = false; } else if (type == CMN_TYPE_WP) { - int wp_idx = arm_cmn_wp_idx(event); - for_each_hw_dn(hw, dn, i) { void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset); + int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i); writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx)); writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx)); @@ -1571,10 +1613,23 @@ struct arm_cmn_val { u8 dtm_count[CMN_MAX_DTMS]; u8 occupid[CMN_MAX_DTMS][SEL_MAX]; u8 wp[CMN_MAX_DTMS][4]; + u8 wp_combine[CMN_MAX_DTMS][2]; int dtc_count[CMN_MAX_DTCS]; bool cycles; }; +static int arm_cmn_val_find_free_wp_config(struct perf_event *event, + struct arm_cmn_val *val, int dtm) +{ + int wp_idx = CMN_EVENT_EVENTID(event); + + if (val->wp[dtm][wp_idx]) + if (val->wp[dtm][++wp_idx]) + return -ENOSPC; + + return wp_idx; +} + static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val, struct perf_event *event) { @@ -1606,8 +1661,9 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val, if (type != CMN_TYPE_WP) continue; - wp_idx = arm_cmn_wp_idx(event); - val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; + wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm); + val->wp[dtm][wp_idx] = 1; + val->wp_combine[dtm][wp_idx >> 1] += !!CMN_EVENT_WP_COMBINE(event); } } @@ -1631,6 +1687,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) return -ENOMEM; arm_cmn_val_add_event(cmn, val, leader); + for_each_sibling_event(sibling, leader) arm_cmn_val_add_event(cmn, val, sibling); @@ -1645,7 +1702,7 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) goto done; for_each_hw_dn(hw, dn, i) { - int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel; + int wp_idx, dtm = dn->dtm, sel = hw->filter_sel; if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS) goto done; @@ -1657,12 +1714,12 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) if (type != CMN_TYPE_WP) continue; - wp_idx = arm_cmn_wp_idx(event); - if (val->wp[dtm][wp_idx]) + wp_idx = arm_cmn_val_find_free_wp_config(event, val, dtm); + if (wp_idx < 0) goto done; - wp_cmb = val->wp[dtm][wp_idx ^ 1]; - if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) + if (wp_idx & 1 && + val->wp_combine[dtm][wp_idx >> 1] != !!CMN_EVENT_WP_COMBINE(event)) goto done; } @@ -1773,8 +1830,11 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset; unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); - if (type == CMN_TYPE_WP) - dtm->wp_event[arm_cmn_wp_idx(event)] = -1; + if (type == CMN_TYPE_WP) { + int wp_idx = arm_cmn_get_assigned_wp_idx(event, hw, i); + + dtm->wp_event[wp_idx] = -1; + } if (hw->filter_sel > SEL_NONE) hw->dn[i].occupid[hw->filter_sel].count--; @@ -1783,6 +1843,7 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG); } memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); + memset(hw->wp_idx, 0, sizeof(hw->wp_idx)); for_each_hw_dtc_idx(hw, j, idx) cmn->dtc[j].counters[idx] = NULL; @@ -1836,10 +1897,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) if (type == CMN_TYPE_XP) { input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx; } else if (type == CMN_TYPE_WP) { - int tmp, wp_idx = arm_cmn_wp_idx(event); + int tmp, wp_idx; u32 cfg = arm_cmn_wp_config(event); - if (dtm->wp_event[wp_idx] >= 0) + wp_idx = arm_cmn_find_free_wp_idx(dtm, event); + if (wp_idx < 0) goto free_dtms; tmp = dtm->wp_event[wp_idx ^ 1]; @@ -1848,7 +1910,8 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) goto free_dtms; input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; - dtm->wp_event[wp_idx] = hw->dtc_idx[d]; + + arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i); writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); -- cgit v1.2.3 From f9a7a91f64064216572d0914e452704bbcfa3130 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Mon, 17 Jun 2024 17:50:56 -0700 Subject: perf/arm-cmn: Enable support for tertiary match group Add support for tertiary match group. Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20240618005056.3092866-3-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index f2349d23df57..c932d9d355cf 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -174,9 +174,8 @@ #define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) -/* Note that we don't yet support the tertiary match group on newer IPs */ -#define CMN_CONFIG_WP_GRP BIT_ULL(56) -#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57) +#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56) +#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58) #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) #define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0) @@ -1384,7 +1383,7 @@ static void arm_cmn_claim_wp_idx(struct arm_cmn_dtm *dtm, arm_cmn_set_wp_idx(hw->wp_idx, pos, wp_idx - CMN_EVENT_EVENTID(event)); } -static u32 arm_cmn_wp_config(struct perf_event *event) +static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx) { u32 config; u32 dev = CMN_EVENT_WP_DEV_SEL(event); @@ -1394,6 +1393,10 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 combine = CMN_EVENT_WP_COMBINE(event); bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600; + /* CMN-600 supports only primary and secondary matching groups */ + if (is_cmn600) + grp &= 1; + config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | @@ -1401,7 +1404,9 @@ static u32 arm_cmn_wp_config(struct perf_event *event) if (exc) config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE : CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE; - if (combine && !grp) + + /* wp_combine is available only on WP0 and WP2 */ + if (combine && !(wp_idx & 0x1)) config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE : CMN_DTM_WPn_CONFIG_WP_COMBINE; return config; @@ -1898,12 +1903,14 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx; } else if (type == CMN_TYPE_WP) { int tmp, wp_idx; - u32 cfg = arm_cmn_wp_config(event); + u32 cfg; wp_idx = arm_cmn_find_free_wp_idx(dtm, event); if (wp_idx < 0) goto free_dtms; + cfg = arm_cmn_wp_config(event, wp_idx); + tmp = dtm->wp_event[wp_idx ^ 1]; if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp])) -- cgit v1.2.3 From 695b1fd0851004c4616ad538217a2242e06149b1 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 28 Jun 2024 15:56:11 +0100 Subject: dt-bindings: arm: pmu: Add new Cortex and Neoverse cores Add compatible strings for the PMUs in the Arm Cortex-A725, Cortex-X925, Neoverse N3, Neoverse V2, Neoverse V3 and Neoverse V3AE cores. Signed-off-by: Andre Przywara Link: https://lore.kernel.org/r/20240628145612.1291329-2-andre.przywara@arm.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 99b5e9530707..528544d0a161 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -53,14 +53,20 @@ properties: - arm,cortex-a710-pmu - arm,cortex-a715-pmu - arm,cortex-a720-pmu + - arm,cortex-a725-pmu - arm,cortex-x1-pmu - arm,cortex-x2-pmu - arm,cortex-x3-pmu - arm,cortex-x4-pmu + - arm,cortex-x925-pmu - arm,neoverse-e1-pmu - arm,neoverse-n1-pmu - arm,neoverse-n2-pmu + - arm,neoverse-n3-pmu - arm,neoverse-v1-pmu + - arm,neoverse-v2-pmu + - arm,neoverse-v3-pmu + - arm,neoverse-v3ae-pmu - brcm,vulcan-pmu - cavium,thunder-pmu - nvidia,denver-pmu -- cgit v1.2.3 From c99d00ef9e73f3be162ff44cf32e30ff6a1c2e65 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 28 Jun 2024 15:56:12 +0100 Subject: perf: pmuv3: Add new Cortex and Neoverse PMUs Add support for the Arm Cortex-A725, Cortex-X925, Neoverse N3, Neoverse V2, Neoverse V3 and Neoverse V3AE. This just adds the names and connects them with their DT compatible strings. Signed-off-by: Andre Przywara Link: https://lore.kernel.org/r/20240628145612.1291329-3-andre.przywara@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 23fa6c5da82c..9f46ce0c459e 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1340,14 +1340,20 @@ PMUV3_INIT_SIMPLE(armv9_cortex_a520) PMUV3_INIT_SIMPLE(armv9_cortex_a710) PMUV3_INIT_SIMPLE(armv9_cortex_a715) PMUV3_INIT_SIMPLE(armv9_cortex_a720) +PMUV3_INIT_SIMPLE(armv9_cortex_a725) PMUV3_INIT_SIMPLE(armv8_cortex_x1) PMUV3_INIT_SIMPLE(armv9_cortex_x2) PMUV3_INIT_SIMPLE(armv9_cortex_x3) PMUV3_INIT_SIMPLE(armv9_cortex_x4) +PMUV3_INIT_SIMPLE(armv9_cortex_x925) PMUV3_INIT_SIMPLE(armv8_neoverse_e1) PMUV3_INIT_SIMPLE(armv8_neoverse_n1) PMUV3_INIT_SIMPLE(armv9_neoverse_n2) +PMUV3_INIT_SIMPLE(armv9_neoverse_n3) PMUV3_INIT_SIMPLE(armv8_neoverse_v1) +PMUV3_INIT_SIMPLE(armv8_neoverse_v2) +PMUV3_INIT_SIMPLE(armv8_neoverse_v3) +PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae) PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) PMUV3_INIT_SIMPLE(armv8_nvidia_denver) @@ -1379,14 +1385,20 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init}, {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init}, + {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init}, {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, {.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init}, {.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init}, + {.compatible = "arm,cortex-x925-pmu", .data = armv9_cortex_x925_pmu_init}, {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, + {.compatible = "arm,neoverse-n3-pmu", .data = armv9_neoverse_n3_pmu_init}, {.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init}, + {.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init}, + {.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init}, + {.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init}, {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, -- cgit v1.2.3 From f26f374824573706a3f3264d54fd17d1e161cab4 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 29 May 2024 16:03:51 +0800 Subject: dt-bindings: perf: fsl-imx-ddr: Add i.MX95 compatible i.MX95 has a DDR pmu. This will add a compatible for it. Acked-by: Conor Dooley Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240529080358.703784-1-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml index 6c96a4204e5d..37e8b98f2cdc 100644 --- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml +++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml @@ -30,6 +30,9 @@ properties: - items: - const: fsl,imx8dxl-ddr-pmu - const: fsl,imx8-ddr-pmu + - items: + - const: fsl,imx95-ddr-pmu + - const: fsl,imx93-ddr-pmu reg: maxItems: 1 -- cgit v1.2.3 From 4773dd10fda0e12a08a231d2fb773646817da343 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 29 May 2024 16:03:52 +0800 Subject: perf: imx_perf: add macro definitions for parsing config attr The user can set event and counter in cmdline and the driver need to parse it using 'config' attr value. This will add macro definitions to avoid hard-code in driver. Reviewed-by: Frank Li Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240529080358.703784-2-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx9_ddr_perf.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 72c2d3074cde..c4caeab7a9be 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -42,6 +42,9 @@ #define NUM_COUNTERS 11 #define CYCLES_COUNTER 0 +#define CONFIG_EVENT_MASK GENMASK(7, 0) +#define CONFIG_COUNTER_MASK GENMASK(15, 8) + #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) #define DDR_PERF_DEV_NAME "imx9_ddr" @@ -339,8 +342,10 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, int counter, bool enable) { u32 ctrl_a; + int event; ctrl_a = readl_relaxed(pmu->base + PMLCA(counter)); + event = FIELD_GET(CONFIG_EVENT_MASK, config); if (enable) { ctrl_a |= PMLCA_FC; @@ -352,7 +357,7 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, ctrl_a &= ~PMLCA_FC; ctrl_a |= PMLCA_CE; ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F); - ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF)); + ctrl_a |= FIELD_PREP(PMLCA_EVENT, event); writel(ctrl_a, pmu->base + PMLCA(counter)); } else { /* Freeze counter. */ @@ -366,8 +371,8 @@ static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int u32 pmcfg1, pmcfg2; int event, counter; - event = cfg & 0x000000FF; - counter = (cfg & 0x0000FF00) >> 8; + event = FIELD_GET(CONFIG_EVENT_MASK, cfg); + counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg); pmcfg1 = readl_relaxed(pmu->base + PMCFG1); @@ -469,7 +474,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) int cfg2 = event->attr.config2; int counter; - counter = (cfg & 0x0000FF00) >> 8; + counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg); pmu->events[counter] = event; pmu->active_events++; -- cgit v1.2.3 From 27e4a6523edd2aaf78abd72556d30629a38490b2 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 29 May 2024 16:03:53 +0800 Subject: perf: imx_perf: let the driver manage the counter usage rather the user In current design, the user of perf app needs to input counter ID to count events. However, this is not user-friendly since the user needs to lookup the map table to find the counter. Instead of letting the user to input the counter, let this driver to manage the counters in this patch. This will be implemented by: 1. allocate counter 0 for cycle event. 2. find unused counter from 1-10 for reference events. 3. allocate specific counter for counter-specific events. In this patch, counter attr will be kept for back-compatible but all the value passed down by counter= will be ignored. To mark counter-specific events, counter ID will be encoded into perf_pmu_events_attr.id. Reviewed-by: Frank Li Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240529080358.703784-3-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx9_ddr_perf.c | 168 +++++++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 68 deletions(-) diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index c4caeab7a9be..08cb1bfb09de 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -41,9 +41,10 @@ #define NUM_COUNTERS 11 #define CYCLES_COUNTER 0 +#define CYCLES_EVENT_ID 0 #define CONFIG_EVENT_MASK GENMASK(7, 0) -#define CONFIG_COUNTER_MASK GENMASK(15, 8) +#define CONFIG_COUNTER_MASK GENMASK(23, 16) #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) @@ -130,6 +131,9 @@ static ssize_t ddr_pmu_event_show(struct device *dev, return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } +#define COUNTER_OFFSET_IN_EVENT 8 +#define ID(counter, id) ((counter << COUNTER_OFFSET_IN_EVENT) | id) + #define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ (&((struct perf_pmu_events_attr[]) { \ { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ @@ -162,81 +166,81 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63), /* counter1 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, ID(1, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, ID(1, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, ID(1, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, ID(1, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, ID(1, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, ID(1, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, ID(1, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, ID(1, 71)), /* counter2 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, ID(2, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, ID(2, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, ID(2, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, ID(2, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, ID(2, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, ID(2, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, ID(2, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* counter3 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, ID(3, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, ID(3, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, ID(3, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, ID(3, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, ID(3, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, ID(3, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* counter4 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, ID(4, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, ID(4, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, ID(4, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, ID(4, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, ID(4, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, ID(4, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* counter5 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70), - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, ID(5, 65)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, ID(5, 66)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, ID(5, 67)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, ID(5, 68)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, ID(5, 69)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, ID(5, 70)), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, ID(5, 71)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, ID(5, 72)), /* counter6 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, ID(6, 64)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, ID(6, 72)), /* counter7 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, ID(7, 64)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, ID(7, 65)), /* counter8 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, ID(8, 64)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, ID(8, 65)), /* counter9 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, ID(9, 65)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, ID(9, 66)), /* counter10 specific events */ - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, ID(10, 65)), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, ID(10, 66)), NULL, }; @@ -245,7 +249,7 @@ static const struct attribute_group ddr_perf_events_attr_group = { .attrs = ddr_perf_events_attrs, }; -PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7,16-23"); PMU_FORMAT_ATTR(counter, "config:8-15"); PMU_FORMAT_ATTR(axi_id, "config1:0-17"); PMU_FORMAT_ATTR(axi_mask, "config2:0-17"); @@ -366,13 +370,10 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, } } -static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2) +static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, + int counter, int axi_id, int axi_mask) { u32 pmcfg1, pmcfg2; - int event, counter; - - event = FIELD_GET(CONFIG_EVENT_MASK, cfg); - counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg); pmcfg1 = readl_relaxed(pmu->base + PMCFG1); @@ -392,12 +393,12 @@ static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN; pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF); - pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2); + pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, axi_mask); writel(pmcfg1, pmu->base + PMCFG1); pmcfg2 = readl_relaxed(pmu->base + PMCFG2); pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF); - pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1); + pmcfg2 |= FIELD_PREP(PMCFG2_ID, axi_id); writel(pmcfg2, pmu->base + PMCFG2); } @@ -465,6 +466,28 @@ static void ddr_perf_event_start(struct perf_event *event, int flags) hwc->state = 0; } +static int ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event, int counter) +{ + int i; + + if (event == CYCLES_EVENT_ID) { + // Cycles counter is dedicated for cycle event. + if (pmu->events[CYCLES_COUNTER] == NULL) + return CYCLES_COUNTER; + } else if (counter != 0) { + // Counter specific event use specific counter. + if (pmu->events[counter] == NULL) + return counter; + } else { + // Auto allocate counter for referene event. + for (i = 1; i < NUM_COUNTERS; i++) + if (pmu->events[i] == NULL) + return i; + } + + return -ENOENT; +} + static int ddr_perf_event_add(struct perf_event *event, int flags) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); @@ -472,10 +495,17 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) int cfg = event->attr.config; int cfg1 = event->attr.config1; int cfg2 = event->attr.config2; - int counter; + int event_id, counter; + event_id = FIELD_GET(CONFIG_EVENT_MASK, cfg); counter = FIELD_GET(CONFIG_COUNTER_MASK, cfg); + counter = ddr_perf_alloc_counter(pmu, event_id, counter); + if (counter < 0) { + dev_dbg(pmu->dev, "There are not enough counters\n"); + return -EOPNOTSUPP; + } + pmu->events[counter] = event; pmu->active_events++; hwc->idx = counter; @@ -485,7 +515,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) ddr_perf_event_start(event, flags); /* read trans, write trans, read beat */ - ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2); + ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); return 0; } @@ -506,9 +536,11 @@ static void ddr_perf_event_del(struct perf_event *event, int flags) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; ddr_perf_event_stop(event, PERF_EF_UPDATE); + pmu->events[counter] = NULL; pmu->active_events--; hwc->idx = -1; } -- cgit v1.2.3 From fab5e5a866e86054a78bedd1bd1d6707421149a5 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 29 May 2024 16:03:54 +0800 Subject: perf: imx_perf: refactor driver for imx93 This driver is initinally used to support imx93 Soc and now it's time to add support for imx95 Soc. However, some macro definitions and events are different on these two Socs. For preparing imx95 supports, this will refactor driver for imx93. Reviewed-by: Frank Li Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240529080358.703784-4-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx9_ddr_perf.c | 106 +++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 08cb1bfb09de..5433c52a9872 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -11,14 +11,14 @@ #include /* Performance monitor configuration */ -#define PMCFG1 0x00 -#define PMCFG1_RD_TRANS_FILT_EN BIT(31) -#define PMCFG1_WR_TRANS_FILT_EN BIT(30) -#define PMCFG1_RD_BT_FILT_EN BIT(29) -#define PMCFG1_ID_MASK GENMASK(17, 0) +#define PMCFG1 0x00 +#define MX93_PMCFG1_RD_TRANS_FILT_EN BIT(31) +#define MX93_PMCFG1_WR_TRANS_FILT_EN BIT(30) +#define MX93_PMCFG1_RD_BT_FILT_EN BIT(29) +#define MX93_PMCFG1_ID_MASK GENMASK(17, 0) -#define PMCFG2 0x04 -#define PMCFG2_ID GENMASK(17, 0) +#define PMCFG2 0x04 +#define MX93_PMCFG2_ID GENMASK(17, 0) /* Global control register affects all counters and takes priority over local control registers */ #define PMGC0 0x40 @@ -75,6 +75,11 @@ static const struct imx_ddr_devtype_data imx93_devtype_data = { .identifier = "imx93", }; +static inline bool is_imx93(struct ddr_pmu *pmu) +{ + return pmu->devtype_data == &imx93_devtype_data; +} + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data}, { /* sentinel */ } @@ -122,24 +127,37 @@ static const struct attribute_group ddr_perf_cpumask_attr_group = { .attrs = ddr_perf_cpumask_attrs, }; +struct imx9_pmu_events_attr { + struct device_attribute attr; + u64 id; + const void *devtype_data; +}; + static ssize_t ddr_pmu_event_show(struct device *dev, struct device_attribute *attr, char *page) { - struct perf_pmu_events_attr *pmu_attr; + struct imx9_pmu_events_attr *pmu_attr; - pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + pmu_attr = container_of(attr, struct imx9_pmu_events_attr, attr); return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } #define COUNTER_OFFSET_IN_EVENT 8 #define ID(counter, id) ((counter << COUNTER_OFFSET_IN_EVENT) | id) -#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ - (&((struct perf_pmu_events_attr[]) { \ +#define DDR_PMU_EVENT_ATTR_COMM(_name, _id, _data) \ + (&((struct imx9_pmu_events_attr[]) { \ { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ - .id = _id, } \ + .id = _id, \ + .devtype_data = _data, } \ })[0].attr.attr) +#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ + DDR_PMU_EVENT_ATTR_COMM(_name, _id, NULL) + +#define IMX93_DDR_PMU_EVENT_ATTR(_name, _id) \ + DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx93_devtype_data) + static struct attribute *ddr_perf_events_attrs[] = { /* counter0 cycles event */ IMX9_DDR_PMU_EVENT_ATTR(cycles, 0), @@ -185,7 +203,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, ID(2, 70)), IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), + IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* imx93 specific*/ /* counter3 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)), @@ -197,7 +215,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, ID(3, 70)), IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), + IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* imx93 specific*/ /* counter4 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)), @@ -209,7 +227,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, ID(4, 70)), IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)), - IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), + IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* imx93 specific*/ /* counter5 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)), @@ -244,9 +262,29 @@ static struct attribute *ddr_perf_events_attrs[] = { NULL, }; +static umode_t +ddr_perf_events_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj)); + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + struct imx9_pmu_events_attr *eattr; + + eattr = container_of(attr, typeof(*eattr), attr.attr); + + if (!eattr->devtype_data) + return attr->mode; + + if (eattr->devtype_data != ddr_pmu->devtype_data) + return 0; + + return attr->mode; +} + static const struct attribute_group ddr_perf_events_attr_group = { .name = "events", .attrs = ddr_perf_events_attrs, + .is_visible = ddr_perf_events_attrs_is_visible, }; PMU_FORMAT_ATTR(event, "config:0-7,16-23"); @@ -370,36 +408,28 @@ static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, } } -static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, - int counter, int axi_id, int axi_mask) +static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, + int counter, int axi_id, int axi_mask) { u32 pmcfg1, pmcfg2; + u32 mask[] = { MX93_PMCFG1_RD_TRANS_FILT_EN, + MX93_PMCFG1_WR_TRANS_FILT_EN, + MX93_PMCFG1_RD_BT_FILT_EN }; pmcfg1 = readl_relaxed(pmu->base + PMCFG1); - if (counter == 2 && event == 73) - pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN; - else if (counter == 2 && event != 73) - pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN; - - if (counter == 3 && event == 73) - pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN; - else if (counter == 3 && event != 73) - pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN; - - if (counter == 4 && event == 73) - pmcfg1 |= PMCFG1_RD_BT_FILT_EN; - else if (counter == 4 && event != 73) - pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN; + if (counter >= 2 && counter <= 4) + pmcfg1 = event == 73 ? pmcfg1 | mask[counter - 2] : + pmcfg1 & ~mask[counter - 2]; - pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF); - pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, axi_mask); - writel(pmcfg1, pmu->base + PMCFG1); + pmcfg1 &= ~FIELD_PREP(MX93_PMCFG1_ID_MASK, 0x3FFFF); + pmcfg1 |= FIELD_PREP(MX93_PMCFG1_ID_MASK, axi_mask); + writel_relaxed(pmcfg1, pmu->base + PMCFG1); pmcfg2 = readl_relaxed(pmu->base + PMCFG2); - pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF); - pmcfg2 |= FIELD_PREP(PMCFG2_ID, axi_id); - writel(pmcfg2, pmu->base + PMCFG2); + pmcfg2 &= ~FIELD_PREP(MX93_PMCFG2_ID, 0x3FFFF); + pmcfg2 |= FIELD_PREP(MX93_PMCFG2_ID, axi_id); + writel_relaxed(pmcfg2, pmu->base + PMCFG2); } static void ddr_perf_event_update(struct perf_event *event) @@ -515,7 +545,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) ddr_perf_event_start(event, flags); /* read trans, write trans, read beat */ - ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); + imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); return 0; } -- cgit v1.2.3 From ac9aa295f7a89d38656739628796f086f0b160e2 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 29 May 2024 16:03:55 +0800 Subject: perf: imx_perf: fix counter start and config sequence In current driver, the counter will start firstly and then be configured. This sequence is not correct for AXI filter events since the correct AXI_MASK and AXI_ID are not set yet. Then the results may be inaccurate. Reviewed-by: Frank Li Fixes: 55691f99d417 ("drivers/perf: imx_ddr: Add support for NXP i.MX9 SoC DDRC PMU driver") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240529080358.703784-5-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx9_ddr_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 5433c52a9872..7b43b54920da 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -541,12 +541,12 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) hwc->idx = counter; hwc->state |= PERF_HES_STOPPED; - if (flags & PERF_EF_START) - ddr_perf_event_start(event, flags); - /* read trans, write trans, read beat */ imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); + if (flags & PERF_EF_START) + ddr_perf_event_start(event, flags); + return 0; } -- cgit v1.2.3 From d0d7c66c537d36fbb87c515edb695c1e8ea3cdcb Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 29 May 2024 16:03:56 +0800 Subject: perf: imx_perf: add support for i.MX95 platform i.MX95 has a DDR PMU which is almostly same as i.MX93, it now supports read beat and write beat filter capabilities. This will add support for i.MX95 and enhance the driver to support specific filter handling for it. Usage: For read beat: ~# perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_beat_filt2,axi_mask=ID_MASK,axi_id=ID/ ~# perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_beat_filt1,axi_mask=ID_MASK,axi_id=ID/ ~# perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_beat_filt0,axi_mask=ID_MASK,axi_id=ID/ eg: For edma2: perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_beat_filt0,axi_mask=0x00f,axi_id=0x00c/ For write beat: ~# perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_wr_beat_filt,axi_mask=ID_MASK,axi_id=ID/ eg: For edma2: perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_wr_beat_filt,axi_mask=0x00f,axi_id=0x00c/ Reviewed-by: Frank Li Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240529080358.703784-6-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx9_ddr_perf.c | 89 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 7b43b54920da..69f920b1caf2 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -17,9 +17,19 @@ #define MX93_PMCFG1_RD_BT_FILT_EN BIT(29) #define MX93_PMCFG1_ID_MASK GENMASK(17, 0) +#define MX95_PMCFG1_WR_BEAT_FILT_EN BIT(31) +#define MX95_PMCFG1_RD_BEAT_FILT_EN BIT(30) + #define PMCFG2 0x04 #define MX93_PMCFG2_ID GENMASK(17, 0) +#define PMCFG3 0x08 +#define PMCFG4 0x0C +#define PMCFG5 0x10 +#define PMCFG6 0x14 +#define MX95_PMCFG_ID_MASK GENMASK(9, 0) +#define MX95_PMCFG_ID GENMASK(25, 16) + /* Global control register affects all counters and takes priority over local control registers */ #define PMGC0 0x40 /* Global control register bits */ @@ -75,13 +85,23 @@ static const struct imx_ddr_devtype_data imx93_devtype_data = { .identifier = "imx93", }; +static const struct imx_ddr_devtype_data imx95_devtype_data = { + .identifier = "imx95", +}; + static inline bool is_imx93(struct ddr_pmu *pmu) { return pmu->devtype_data == &imx93_devtype_data; } +static inline bool is_imx95(struct ddr_pmu *pmu) +{ + return pmu->devtype_data == &imx95_devtype_data; +} + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { - {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data}, + { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data }, + { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); @@ -158,6 +178,9 @@ static ssize_t ddr_pmu_event_show(struct device *dev, #define IMX93_DDR_PMU_EVENT_ATTR(_name, _id) \ DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx93_devtype_data) +#define IMX95_DDR_PMU_EVENT_ATTR(_name, _id) \ + DDR_PMU_EVENT_ATTR_COMM(_name, _id, &imx95_devtype_data) + static struct attribute *ddr_perf_events_attrs[] = { /* counter0 cycles event */ IMX9_DDR_PMU_EVENT_ATTR(cycles, 0), @@ -204,6 +227,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, ID(2, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, ID(2, 72)), IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, ID(2, 73)), /* imx93 specific*/ + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_beat_filt, ID(2, 73)), /* imx95 specific*/ /* counter3 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, ID(3, 64)), @@ -216,6 +240,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, ID(3, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, ID(3, 72)), IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, ID(3, 73)), /* imx93 specific*/ + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt2, ID(3, 73)), /* imx95 specific*/ /* counter4 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, ID(4, 64)), @@ -228,6 +253,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, ID(4, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, ID(4, 72)), IMX93_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, ID(4, 73)), /* imx93 specific*/ + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt1, ID(4, 73)), /* imx95 specific*/ /* counter5 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, ID(5, 64)), @@ -239,6 +265,7 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, ID(5, 70)), IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, ID(5, 71)), IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, ID(5, 72)), + IMX95_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt0, ID(5, 73)), /* imx95 specific*/ /* counter6 specific events */ IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, ID(6, 64)), @@ -432,6 +459,57 @@ static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, writel_relaxed(pmcfg2, pmu->base + PMCFG2); } +static void imx95_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event, + int counter, int axi_id, int axi_mask) +{ + u32 pmcfg1, pmcfg, offset = 0; + + pmcfg1 = readl_relaxed(pmu->base + PMCFG1); + + if (event == 73) { + switch (counter) { + case 2: + pmcfg1 |= MX95_PMCFG1_WR_BEAT_FILT_EN; + offset = PMCFG3; + break; + case 3: + pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN; + offset = PMCFG4; + break; + case 4: + pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN; + offset = PMCFG5; + break; + case 5: + pmcfg1 |= MX95_PMCFG1_RD_BEAT_FILT_EN; + offset = PMCFG6; + break; + } + } else { + switch (counter) { + case 2: + pmcfg1 &= ~MX95_PMCFG1_WR_BEAT_FILT_EN; + break; + case 3: + case 4: + case 5: + pmcfg1 &= ~MX95_PMCFG1_RD_BEAT_FILT_EN; + break; + } + } + + writel_relaxed(pmcfg1, pmu->base + PMCFG1); + + if (offset) { + pmcfg = readl_relaxed(pmu->base + offset); + pmcfg &= ~(FIELD_PREP(MX95_PMCFG_ID_MASK, 0x3FF) | + FIELD_PREP(MX95_PMCFG_ID, 0x3FF)); + pmcfg |= (FIELD_PREP(MX95_PMCFG_ID_MASK, axi_mask) | + FIELD_PREP(MX95_PMCFG_ID, axi_id)); + writel_relaxed(pmcfg, pmu->base + offset); + } +} + static void ddr_perf_event_update(struct perf_event *event) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); @@ -541,8 +619,13 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) hwc->idx = counter; hwc->state |= PERF_HES_STOPPED; - /* read trans, write trans, read beat */ - imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); + if (is_imx93(pmu)) + /* read trans, write trans, read beat */ + imx93_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); + + if (is_imx95(pmu)) + /* write beat, read beat2, read beat1, read beat */ + imx95_ddr_perf_monitor_config(pmu, event_id, counter, cfg1, cfg2); if (flags & PERF_EF_START) ddr_perf_event_start(event, flags); -- cgit v1.2.3 From 46800e38ef0e45a73839ded0347885ada2b4f7bb Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 1 Jul 2024 10:11:32 +1000 Subject: arm64: Kconfig: Fix dependencies to enable ACPI_HOTPLUG_CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both ACPI_PROCESSOR and HOTPLUG_CPU are needed by ACPI_HOTPLUG_CPU. Otherwise, we can have compiling error with the following configurations. CONFIG_HOTPLUG_CPU=n CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y arch/arm64/kernel/smp.c: In function ‘arch_unregister_cpu’: arch/arm64/kernel/smp.c:563:9: error: implicit declaration of \ function ‘unregister_cpu’; did you mean ‘register_cpu’? \ [-Werror=implicit-function-declaration] 563 | unregister_cpu(c); | ^~~~~~~~~~~~~~ | register_cpu Fix it by enabling ACPI_HOTPLUG_CPU when both ACPI_PROCESSOR and HOTPLUG_CPU are enabled, consistent with other architectures like x86 and loongarch. Fixes: 9d0873892f4d ("arm64: Kconfig: Enable hotplug CPU on arm64 if ACPI_PROCESSOR is enabled.") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406300437.XnuW0n34-lkp@intel.com/ Signed-off-by: Gavin Shan Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240701001132.1585153-1-gshan@redhat.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e8f2ef2312db..ff17c5705324 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -5,7 +5,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI - select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR + select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if (ACPI && PCI) -- cgit v1.2.3 From 81e15ca3e523a508d62806fe681c1d289361ca16 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:25 -0600 Subject: perf: arm_pmuv3: Avoid assigning fixed cycle counter with threshold If the user has requested a counting threshold for the CPU cycles event, then the fixed cycle counter can't be assigned as it lacks threshold support. Currently, the thresholds will work or not randomly depending on which counter the event is assigned. While using thresholds for CPU cycles doesn't make much sense, it can be useful for testing purposes. Fixes: 816c26754447 ("arm64: perf: Add support for event counting threshold") Signed-off-by: Rob Herring (Arm) Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-1-c9784b4f4065@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 9f46ce0c459e..02b2ac4cd297 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -338,6 +338,11 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event) return ATTR_CFG_GET_FLD(&event->attr, rdpmc); } +static u32 armv8pmu_event_get_threshold(struct perf_event_attr *attr) +{ + return ATTR_CFG_GET_FLD(attr, threshold); +} + static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr) { u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare); @@ -941,7 +946,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always prefer to place a cycle counter into the cycle counter. */ - if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { + if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !armv8pmu_event_get_threshold(&event->attr)) { if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) return ARMV8_IDX_CYCLE_COUNTER; else if (armv8pmu_event_is_64bit(event) && @@ -1033,7 +1039,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will * be 0 and will also trigger this check, preventing it from being used. */ - th = ATTR_CFG_GET_FLD(attr, threshold); + th = armv8pmu_event_get_threshold(attr); if (th > threshold_max(cpu_pmu)) { pr_debug("PMU event threshold exceeds max value\n"); return -EINVAL; -- cgit v1.2.3 From 598c1a2d9f4ba8a04cf92af8e988052179e31199 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:26 -0600 Subject: perf: arm_pmuv3: Drop unnecessary IS_ENABLED(CONFIG_ARM64) check The IS_ENABLED(CONFIG_ARM64) check for threshold support is unnecessary. The purpose is to not enable thresholds on arm32, but if threshold is non-zero, the check against threshold_max() just above here will have errored out because threshold_max() is always 0 on arm32. Signed-off-by: Rob Herring (Arm) Acked-by: Mark rutland Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-2-c9784b4f4065@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 02b2ac4cd297..d6248493b5c6 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1045,7 +1045,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return -EINVAL; } - if (IS_ENABLED(CONFIG_ARM64) && th) { + if (th) { config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th); config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC, armv8pmu_event_threshold_control(attr)); -- cgit v1.2.3 From 8d75537bebfa14fcd493b1f840b07a8cff5a640d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:27 -0600 Subject: perf/arm: Move 32-bit PMU drivers to drivers/perf/ It is preferred to put drivers under drivers/ rather than under arch/. The PMU drivers also depend on arm_pmu.c, so it's better to place them all together. Acked-by: Mark Rutland Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-3-c9784b4f4065@kernel.org Signed-off-by: Will Deacon --- arch/arm/kernel/Makefile | 2 - arch/arm/kernel/perf_event_v6.c | 448 -------- arch/arm/kernel/perf_event_v7.c | 2005 ----------------------------------- arch/arm/kernel/perf_event_xscale.c | 748 ------------- drivers/perf/Kconfig | 12 + drivers/perf/Makefile | 3 + drivers/perf/arm_v6_pmu.c | 445 ++++++++ drivers/perf/arm_v7_pmu.c | 2002 ++++++++++++++++++++++++++++++++++ drivers/perf/arm_xscale_pmu.c | 745 +++++++++++++ 9 files changed, 3207 insertions(+), 3203 deletions(-) delete mode 100644 arch/arm/kernel/perf_event_v6.c delete mode 100644 arch/arm/kernel/perf_event_v7.c delete mode 100644 arch/arm/kernel/perf_event_xscale.c create mode 100644 drivers/perf/arm_v6_pmu.c create mode 100644 drivers/perf/arm_v7_pmu.c create mode 100644 drivers/perf/arm_xscale_pmu.c diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89a77e3f51d2..aaae31b8c4a5 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -78,8 +78,6 @@ obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ - perf_event_v7.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_VDSO) += vdso.o diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c deleted file mode 100644 index d9fd53841591..000000000000 --- a/arch/arm/kernel/perf_event_v6.c +++ /dev/null @@ -1,448 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARMv6 Performance counter handling code. - * - * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) - -#include -#include - -#include -#include -#include - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 0, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - - /* - * The ARM performance counters can count micro DTLB misses, micro ITLB - * misses and main TLB misses. There isn't an event for TLB misses, so - * use the micro misses here and if users want the main TLB misses they - * can use a raw counter. - */ - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u64 armv6pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -static void armv6pmu_enable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); -} - -static irqreturn_t -armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv6pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); -} - -static void armv6pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); -} - -static int -armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) - return ARMV6_COUNTER1; - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) - return ARMV6_COUNTER0; - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - clear_bit(event->hw.idx, cpuc->used_mask); -} - -static void armv6pmu_disable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); -} - -static int armv6_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv6_perf_map, - &armv6_perf_cache_map, 0xFF); -} - -static void armv6pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->handle_irq = armv6pmu_handle_irq; - cpu_pmu->enable = armv6pmu_enable_event; - cpu_pmu->disable = armv6pmu_disable_event; - cpu_pmu->read_counter = armv6pmu_read_counter; - cpu_pmu->write_counter = armv6pmu_write_counter; - cpu_pmu->get_event_idx = armv6pmu_get_event_idx; - cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; - cpu_pmu->start = armv6pmu_start; - cpu_pmu->stop = armv6pmu_stop; - cpu_pmu->map_event = armv6_map_event; - cpu_pmu->num_events = 3; -} - -static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1136"; - return 0; -} - -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1156"; - return 0; -} - -static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1176"; - return 0; -} - -static const struct of_device_id armv6_pmu_of_device_ids[] = { - {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, - { /* sentinel value */ } -}; - -static const struct pmu_probe_info armv6_pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - { /* sentinel value */ } -}; - -static int armv6_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, - armv6_pmu_probe_table); -} - -static struct platform_driver armv6_pmu_driver = { - .driver = { - .name = "armv6-pmu", - .of_match_table = armv6_pmu_of_device_ids, - }, - .probe = armv6_pmu_device_probe, -}; - -builtin_platform_driver(armv6_pmu_driver); -#endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c deleted file mode 100644 index a3322e2b3ea4..000000000000 --- a/arch/arm/kernel/perf_event_v7.c +++ /dev/null @@ -1,2005 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * ARMv7 support: Jean Pihet - * 2010 (c) MontaVista Software, LLC. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -#ifdef CONFIG_CPU_V7 - -#include -#include -#include -#include -#include "../vfp/vfpinstr.h" - -#include -#include -#include - -/* - * Common ARMv7 event types - * - * Note: An implementation may not be able to count all of these events - * but the encodings are considered to be `reserved' in the case that - * they are not available. - */ -#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00 -#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01 -#define ARMV7_PERFCTR_ITLB_REFILL 0x02 -#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03 -#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04 -#define ARMV7_PERFCTR_DTLB_REFILL 0x05 -#define ARMV7_PERFCTR_MEM_READ 0x06 -#define ARMV7_PERFCTR_MEM_WRITE 0x07 -#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08 -#define ARMV7_PERFCTR_EXC_TAKEN 0x09 -#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A -#define ARMV7_PERFCTR_CID_WRITE 0x0B - -/* - * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all (taken) branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ -#define ARMV7_PERFCTR_PC_WRITE 0x0C -#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D -#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E -#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F -#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10 -#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11 -#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12 - -/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ -#define ARMV7_PERFCTR_MEM_ACCESS 0x13 -#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14 -#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15 -#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16 -#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17 -#define ARMV7_PERFCTR_L2_CACHE_WB 0x18 -#define ARMV7_PERFCTR_BUS_ACCESS 0x19 -#define ARMV7_PERFCTR_MEM_ERROR 0x1A -#define ARMV7_PERFCTR_INSTR_SPEC 0x1B -#define ARMV7_PERFCTR_TTBR_WRITE 0x1C -#define ARMV7_PERFCTR_BUS_CYCLES 0x1D - -#define ARMV7_PERFCTR_CPU_CYCLES 0xFF - -/* ARMv7 Cortex-A8 specific event types */ -#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43 -#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44 -#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50 -#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56 - -/* ARMv7 Cortex-A9 specific event types */ -#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68 -#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60 -#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66 - -/* ARMv7 Cortex-A5 specific event types */ -#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2 -#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3 - -/* ARMv7 Cortex-A15 specific event types */ -#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 -#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 -#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42 -#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43 - -#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C -#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D - -#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50 -#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 -#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52 -#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53 - -#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76 - -/* ARMv7 Cortex-A12 specific event types */ -#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 -#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 - -#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50 -#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 - -#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76 - -#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7 - -/* ARMv7 Krait specific event types */ -#define KRAIT_PMRESR0_GROUP0 0xcc -#define KRAIT_PMRESR1_GROUP0 0xd0 -#define KRAIT_PMRESR2_GROUP0 0xd4 -#define KRAIT_VPMRESR0_GROUP0 0xd8 - -#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011 -#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010 - -#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222 -#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210 - -/* ARMv7 Scorpion specific event types */ -#define SCORPION_LPM0_GROUP0 0x4c -#define SCORPION_LPM1_GROUP0 0x50 -#define SCORPION_LPM2_GROUP0 0x54 -#define SCORPION_L2LPM_GROUP0 0x58 -#define SCORPION_VLPM_GROUP0 0x5c - -#define SCORPION_ICACHE_ACCESS 0x10053 -#define SCORPION_ICACHE_MISS 0x10052 - -#define SCORPION_DTLB_ACCESS 0x12013 -#define SCORPION_DTLB_MISS 0x12012 - -#define SCORPION_ITLB_MISS 0x12021 - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A5 HW events mapping - */ -static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - /* - * The prefetch counters don't differentiate between the I side and the - * D side. - */ - [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, - [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A15 HW events mapping - */ -static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, - - /* - * Not all performance counters differentiate between read and write - * accesses/misses so we're not always strictly correct, but it's the - * best we can do. Writes and reads get combined in these cases. - */ - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A7 HW events mapping - */ -static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A12 HW events mapping - */ -static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - /* - * Not all performance counters differentiate between read and write - * accesses/misses so we're not always strictly correct, but it's the - * best we can do. Writes and reads get combined in these cases. - */ - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Krait HW events mapping - */ -static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, - - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, - - [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Scorpion HW events mapping - */ -static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, - /* - * Only ITLB misses and DTLB refills are supported. If users want the - * DTLB refills misses a raw counter must be used. - */ - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -PMU_FORMAT_ATTR(event, "config:0-7"); - -static struct attribute *armv7_pmu_format_attrs[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group armv7_pmu_format_attr_group = { - .name = "format", - .attrs = armv7_pmu_format_attrs, -}; - -#define ARMV7_EVENT_ATTR_RESOLVE(m) #m -#define ARMV7_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \ - "event=" ARMV7_EVENT_ATTR_RESOLVE(config)) - -ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR); -ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL); -ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL); -ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL); -ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS); -ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL); -ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ); -ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE); -ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED); -ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN); -ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED); -ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE); -ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE); -ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH); -ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN); -ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS); -ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED); -ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES); -ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED); - -static struct attribute *armv7_pmuv1_event_attrs[] = { - &armv7_event_attr_sw_incr.attr.attr, - &armv7_event_attr_l1i_cache_refill.attr.attr, - &armv7_event_attr_l1i_tlb_refill.attr.attr, - &armv7_event_attr_l1d_cache_refill.attr.attr, - &armv7_event_attr_l1d_cache.attr.attr, - &armv7_event_attr_l1d_tlb_refill.attr.attr, - &armv7_event_attr_ld_retired.attr.attr, - &armv7_event_attr_st_retired.attr.attr, - &armv7_event_attr_inst_retired.attr.attr, - &armv7_event_attr_exc_taken.attr.attr, - &armv7_event_attr_exc_return.attr.attr, - &armv7_event_attr_cid_write_retired.attr.attr, - &armv7_event_attr_pc_write_retired.attr.attr, - &armv7_event_attr_br_immed_retired.attr.attr, - &armv7_event_attr_br_return_retired.attr.attr, - &armv7_event_attr_unaligned_ldst_retired.attr.attr, - &armv7_event_attr_br_mis_pred.attr.attr, - &armv7_event_attr_cpu_cycles.attr.attr, - &armv7_event_attr_br_pred.attr.attr, - NULL, -}; - -static struct attribute_group armv7_pmuv1_events_attr_group = { - .name = "events", - .attrs = armv7_pmuv1_event_attrs, -}; - -ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); -ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); -ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); -ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS); -ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL); -ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB); -ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS); -ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR); -ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); -ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); -ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); - -static struct attribute *armv7_pmuv2_event_attrs[] = { - &armv7_event_attr_sw_incr.attr.attr, - &armv7_event_attr_l1i_cache_refill.attr.attr, - &armv7_event_attr_l1i_tlb_refill.attr.attr, - &armv7_event_attr_l1d_cache_refill.attr.attr, - &armv7_event_attr_l1d_cache.attr.attr, - &armv7_event_attr_l1d_tlb_refill.attr.attr, - &armv7_event_attr_ld_retired.attr.attr, - &armv7_event_attr_st_retired.attr.attr, - &armv7_event_attr_inst_retired.attr.attr, - &armv7_event_attr_exc_taken.attr.attr, - &armv7_event_attr_exc_return.attr.attr, - &armv7_event_attr_cid_write_retired.attr.attr, - &armv7_event_attr_pc_write_retired.attr.attr, - &armv7_event_attr_br_immed_retired.attr.attr, - &armv7_event_attr_br_return_retired.attr.attr, - &armv7_event_attr_unaligned_ldst_retired.attr.attr, - &armv7_event_attr_br_mis_pred.attr.attr, - &armv7_event_attr_cpu_cycles.attr.attr, - &armv7_event_attr_br_pred.attr.attr, - &armv7_event_attr_mem_access.attr.attr, - &armv7_event_attr_l1i_cache.attr.attr, - &armv7_event_attr_l1d_cache_wb.attr.attr, - &armv7_event_attr_l2d_cache.attr.attr, - &armv7_event_attr_l2d_cache_refill.attr.attr, - &armv7_event_attr_l2d_cache_wb.attr.attr, - &armv7_event_attr_bus_access.attr.attr, - &armv7_event_attr_memory_error.attr.attr, - &armv7_event_attr_inst_spec.attr.attr, - &armv7_event_attr_ttbr_write_retired.attr.attr, - &armv7_event_attr_bus_cycles.attr.attr, - NULL, -}; - -static struct attribute_group armv7_pmuv2_events_attr_group = { - .name = "events", - .attrs = armv7_pmuv2_event_attrs, -}; - -/* - * Perf Events' indices - */ -#define ARMV7_IDX_CYCLE_COUNTER 0 -#define ARMV7_IDX_COUNTER0 1 -#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) - -#define ARMV7_MAX_COUNTERS 32 -#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Perf Event to low level counters mapping - */ -#define ARMV7_IDX_TO_COUNTER(x) \ - (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK) - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -/* - * PMXEVTYPER: Event selection reg - */ -#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ -#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ - -/* - * Event filters for PMUv2 - */ -#define ARMV7_EXCLUDE_PL1 BIT(31) -#define ARMV7_EXCLUDE_USER BIT(30) -#define ARMV7_INCLUDE_HYP BIT(27) - -/* - * Secure debug enable reg - */ -#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */ - -static inline u32 armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(u32 val) -{ - val &= ARMV7_PMNC_MASK; - isb(); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(u32 pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV7_IDX_CYCLE_COUNTER && - idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); -} - -static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) -{ - return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); -} - -static inline void armv7_pmnc_select_counter(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); - isb(); -} - -static inline u64 armv7pmu_read_counter(struct perf_event *event) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - u32 value = 0; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - } else { - armv7_pmnc_select_counter(idx); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); - } - - return value; -} - -static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value)); - } else { - armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value)); - } -} - -static inline void armv7_pmnc_write_evtsel(int idx, u32 val) -{ - armv7_pmnc_select_counter(idx); - val &= ARMV7_EVTYPE_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); -} - -static inline void armv7_pmnc_enable_counter(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); -} - -static inline void armv7_pmnc_disable_counter(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); -} - -static inline void armv7_pmnc_enable_intens(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); -} - -static inline void armv7_pmnc_disable_intens(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); - isb(); - /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); - isb(); -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) -{ - u32 val; - unsigned int cnt; - - pr_info("PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - pr_info("PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - pr_info("CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - pr_info("INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - pr_info("FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - pr_info("SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - pr_info("CCNT =0x%08x\n", val); - - for (cnt = ARMV7_IDX_COUNTER0; - cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - pr_info("CNT[%d] count =0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - pr_info("CNT[%d] evtsel=0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); - } -} -#endif - -static void armv7pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - int idx = hwc->idx; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return; - } - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We only need to set the event for the cycle counter if we - * have the ability to perform event filtering. - */ - if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); -} - -static void armv7pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - int idx = hwc->idx; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return; - } - - /* - * Disable counter and interrupt - */ - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); -} - -static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - u32 pmnc; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(struct arm_pmu *cpu_pmu) -{ - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); -} - -static void armv7pmu_stop(struct arm_pmu *cpu_pmu) -{ - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); -} - -static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; - - /* Always place a cycle counter into the cycle counter. */ - if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_IDX_CYCLE_COUNTER; - } - - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; -} - -static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - clear_bit(event->hw.idx, cpuc->used_mask); -} - -/* - * Add an event filter to a given event. This will only work for PMUv2 PMUs. - */ -static int armv7pmu_set_event_filter(struct hw_perf_event *event, - struct perf_event_attr *attr) -{ - unsigned long config_base = 0; - - if (attr->exclude_idle) { - pr_debug("ARM performance counters do not support mode exclusion\n"); - return -EOPNOTSUPP; - } - if (attr->exclude_user) - config_base |= ARMV7_EXCLUDE_USER; - if (attr->exclude_kernel) - config_base |= ARMV7_EXCLUDE_PL1; - if (!attr->exclude_hv) - config_base |= ARMV7_INCLUDE_HYP; - - /* - * Install the filter into config_base as this is used to - * construct the event type. - */ - event->config_base = config_base; - - return 0; -} - -static void armv7pmu_reset(void *info) -{ - struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events, val; - - if (cpu_pmu->secure_access) { - asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val)); - val |= ARMV7_SDER_SUNIDEN; - asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val)); - } - - /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_disable_counter(idx); - armv7_pmnc_disable_intens(idx); - } - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); -} - -static int armv7_a8_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a8_perf_map, - &armv7_a8_perf_cache_map, 0xFF); -} - -static int armv7_a9_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a9_perf_map, - &armv7_a9_perf_cache_map, 0xFF); -} - -static int armv7_a5_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a5_perf_map, - &armv7_a5_perf_cache_map, 0xFF); -} - -static int armv7_a15_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a15_perf_map, - &armv7_a15_perf_cache_map, 0xFF); -} - -static int armv7_a7_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a7_perf_map, - &armv7_a7_perf_cache_map, 0xFF); -} - -static int armv7_a12_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a12_perf_map, - &armv7_a12_perf_cache_map, 0xFF); -} - -static int krait_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &krait_perf_map, - &krait_perf_cache_map, 0xFFFFF); -} - -static int krait_map_event_no_branch(struct perf_event *event) -{ - return armpmu_map_event(event, &krait_perf_map_no_branch, - &krait_perf_cache_map, 0xFFFFF); -} - -static int scorpion_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &scorpion_perf_map, - &scorpion_perf_cache_map, 0xFFFFF); -} - -static void armv7pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->handle_irq = armv7pmu_handle_irq; - cpu_pmu->enable = armv7pmu_enable_event; - cpu_pmu->disable = armv7pmu_disable_event; - cpu_pmu->read_counter = armv7pmu_read_counter; - cpu_pmu->write_counter = armv7pmu_write_counter; - cpu_pmu->get_event_idx = armv7pmu_get_event_idx; - cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; - cpu_pmu->start = armv7pmu_start; - cpu_pmu->stop = armv7pmu_stop; - cpu_pmu->reset = armv7pmu_reset; -}; - -static void armv7_read_num_pmnc_events(void *info) -{ - int *nb_cnt = info; - - /* Read the nb of CNTx counters supported from PMNC */ - *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter */ - *nb_cnt += 1; -} - -static int armv7_probe_num_events(struct arm_pmu *arm_pmu) -{ - return smp_call_function_any(&arm_pmu->supported_cpus, - armv7_read_num_pmnc_events, - &arm_pmu->num_events, 1); -} - -static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a8"; - cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv1_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a9"; - cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv1_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a5"; - cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv1_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a15"; - cpu_pmu->map_event = armv7_a15_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a7"; - cpu_pmu->map_event = armv7_a7_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a12"; - cpu_pmu->map_event = armv7_a12_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) -{ - int ret = armv7_a12_pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a17"; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return ret; -} - -/* - * Krait Performance Monitor Region Event Selection Register (PMRESRn) - * - * 31 30 24 16 8 0 - * +--------------------------------+ - * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0 - * +--------------------------------+ - * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1 - * +--------------------------------+ - * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2 - * +--------------------------------+ - * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ? - * +--------------------------------+ - * EN | G=3 | G=2 | G=1 | G=0 - * - * Event Encoding: - * - * hwc->config_base = 0xNRCCG - * - * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR) - * R = region register - * CC = class of events the group G is choosing from - * G = group or particular event - * - * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2 - * - * A region (R) corresponds to a piece of the CPU (execution unit, instruction - * unit, etc.) while the event code (CC) corresponds to a particular class of - * events (interrupts for example). An event code is broken down into - * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for - * example). - */ - -#define KRAIT_EVENT (1 << 16) -#define VENUM_EVENT (2 << 16) -#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) -#define PMRESRn_EN BIT(31) - -#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ -#define EVENT_GROUP(event) ((event) & 0xf) /* G */ -#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ -#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ -#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ - -static u32 krait_read_pmresrn(int n) -{ - u32 val; - - switch (n) { - case 0: - asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val)); - break; - case 1: - asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val)); - break; - case 2: - asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val)); - break; - default: - BUG(); /* Should be validated in krait_pmu_get_event_idx() */ - } - - return val; -} - -static void krait_write_pmresrn(int n, u32 val) -{ - switch (n) { - case 0: - asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val)); - break; - case 1: - asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val)); - break; - case 2: - asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val)); - break; - default: - BUG(); /* Should be validated in krait_pmu_get_event_idx() */ - } -} - -static u32 venum_read_pmresr(void) -{ - u32 val; - asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); - return val; -} - -static void venum_write_pmresr(u32 val) -{ - asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); -} - -static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) -{ - u32 venum_new_val; - u32 fp_new_val; - - BUG_ON(preemptible()); - /* CPACR Enable CP10 and CP11 access */ - *venum_orig_val = get_copro_access(); - venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11); - set_copro_access(venum_new_val); - - /* Enable FPEXC */ - *fp_orig_val = fmrx(FPEXC); - fp_new_val = *fp_orig_val | FPEXC_EN; - fmxr(FPEXC, fp_new_val); -} - -static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) -{ - BUG_ON(preemptible()); - /* Restore FPEXC */ - fmxr(FPEXC, fp_orig_val); - isb(); - /* Restore CPACR */ - set_copro_access(venum_orig_val); -} - -static u32 krait_get_pmresrn_event(unsigned int region) -{ - static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0, - KRAIT_PMRESR1_GROUP0, - KRAIT_PMRESR2_GROUP0 }; - return pmresrn_table[region]; -} - -static void krait_evt_setup(int idx, u32 config_base) -{ - u32 val; - u32 mask; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - unsigned int code = EVENT_CODE(config_base); - unsigned int group_shift; - bool venum_event = EVENT_VENUM(config_base); - - group_shift = group * 8; - mask = 0xff << group_shift; - - /* Configure evtsel for the region and group */ - if (venum_event) - val = KRAIT_VPMRESR0_GROUP0; - else - val = krait_get_pmresrn_event(region); - val += group; - /* Mix in mode-exclusion bits */ - val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); - armv7_pmnc_write_evtsel(idx, val); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = krait_read_pmresrn(region); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - krait_write_pmresrn(region, val); - } -} - -static u32 clear_pmresrn_group(u32 val, int group) -{ - u32 mask; - int group_shift; - - group_shift = group * 8; - mask = 0xff << group_shift; - val &= ~mask; - - /* Don't clear enable bit if entire region isn't disabled */ - if (val & ~PMRESRn_EN) - return val |= PMRESRn_EN; - - return 0; -} - -static void krait_clearpmu(u32 config_base) -{ - u32 val; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - bool venum_event = EVENT_VENUM(config_base); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val = clear_pmresrn_group(val, group); - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = krait_read_pmresrn(region); - val = clear_pmresrn_group(val, group); - krait_write_pmresrn(region, val); - } -} - -static void krait_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* Disable counter and interrupt */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Clear pmresr code (if destined for PMNx counters) - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - krait_clearpmu(hwc->config_base); - - /* Disable interrupt for this counter */ - armv7_pmnc_disable_intens(idx); -} - -static void krait_pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We set the event for the cycle counter because we - * have the ability to perform event filtering. - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - krait_evt_setup(idx, hwc->config_base); - else - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* Enable interrupt for this counter */ - armv7_pmnc_enable_intens(idx); - - /* Enable counter */ - armv7_pmnc_enable_counter(idx); -} - -static void krait_pmu_reset(void *info) -{ - u32 vval, fval; - struct arm_pmu *cpu_pmu = info; - u32 idx, nb_cnt = cpu_pmu->num_events; - - armv7pmu_reset(info); - - /* Clear all pmresrs */ - krait_write_pmresrn(0, 0); - krait_write_pmresrn(1, 0); - krait_write_pmresrn(2, 0); - - venum_pre_pmresr(&vval, &fval); - venum_write_pmresr(0); - venum_post_pmresr(vval, fval); - - /* Reset PMxEVNCTCR to sane default */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - } - -} - -static int krait_event_to_bit(struct perf_event *event, unsigned int region, - unsigned int group) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - - if (hwc->config_base & VENUM_EVENT) - bit = KRAIT_VPMRESR0_GROUP0; - else - bit = krait_get_pmresrn_event(region); - bit -= krait_get_pmresrn_event(0); - bit += group; - /* - * Lower bits are reserved for use by the counters (see - * armv7pmu_get_event_idx() for more info) - */ - bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; - - return bit; -} - -/* - * We check for column exclusion constraints here. - * Two events cant use the same group within a pmresr register. - */ -static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - int bit = -1; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int code = EVENT_CODE(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool krait_event = EVENT_CPU(hwc->config_base); - - if (venum_event || krait_event) { - /* Ignore invalid events */ - if (group > 3 || region > 2) - return -EINVAL; - if (venum_event && (code & 0xe0)) - return -EINVAL; - - bit = krait_event_to_bit(event, region, group); - if (test_and_set_bit(bit, cpuc->used_mask)) - return -EAGAIN; - } - - idx = armv7pmu_get_event_idx(cpuc, event); - if (idx < 0 && bit >= 0) - clear_bit(bit, cpuc->used_mask); - - return idx; -} - -static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool krait_event = EVENT_CPU(hwc->config_base); - - armv7pmu_clear_event_idx(cpuc, event); - if (venum_event || krait_event) { - bit = krait_event_to_bit(event, region, group); - clear_bit(bit, cpuc->used_mask); - } -} - -static int krait_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_krait"; - /* Some early versions of Krait don't support PC write events */ - if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, - "qcom,no-pc-write")) - cpu_pmu->map_event = krait_map_event_no_branch; - else - cpu_pmu->map_event = krait_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->reset = krait_pmu_reset; - cpu_pmu->enable = krait_pmu_enable_event; - cpu_pmu->disable = krait_pmu_disable_event; - cpu_pmu->get_event_idx = krait_pmu_get_event_idx; - cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; - return armv7_probe_num_events(cpu_pmu); -} - -/* - * Scorpion Local Performance Monitor Register (LPMn) - * - * 31 30 24 16 8 0 - * +--------------------------------+ - * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 - * +--------------------------------+ - * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 - * +--------------------------------+ - * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 - * +--------------------------------+ - * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 - * +--------------------------------+ - * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? - * +--------------------------------+ - * EN | G=3 | G=2 | G=1 | G=0 - * - * - * Event Encoding: - * - * hwc->config_base = 0xNRCCG - * - * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) - * R = region register - * CC = class of events the group G is choosing from - * G = group or particular event - * - * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 - * - * A region (R) corresponds to a piece of the CPU (execution unit, instruction - * unit, etc.) while the event code (CC) corresponds to a particular class of - * events (interrupts for example). An event code is broken down into - * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for - * example). - */ - -static u32 scorpion_read_pmresrn(int n) -{ - u32 val; - - switch (n) { - case 0: - asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); - break; - case 1: - asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); - break; - case 2: - asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); - break; - case 3: - asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); - break; - default: - BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ - } - - return val; -} - -static void scorpion_write_pmresrn(int n, u32 val) -{ - switch (n) { - case 0: - asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); - break; - case 1: - asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); - break; - case 2: - asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); - break; - case 3: - asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); - break; - default: - BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ - } -} - -static u32 scorpion_get_pmresrn_event(unsigned int region) -{ - static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, - SCORPION_LPM1_GROUP0, - SCORPION_LPM2_GROUP0, - SCORPION_L2LPM_GROUP0 }; - return pmresrn_table[region]; -} - -static void scorpion_evt_setup(int idx, u32 config_base) -{ - u32 val; - u32 mask; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - unsigned int code = EVENT_CODE(config_base); - unsigned int group_shift; - bool venum_event = EVENT_VENUM(config_base); - - group_shift = group * 8; - mask = 0xff << group_shift; - - /* Configure evtsel for the region and group */ - if (venum_event) - val = SCORPION_VLPM_GROUP0; - else - val = scorpion_get_pmresrn_event(region); - val += group; - /* Mix in mode-exclusion bits */ - val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); - armv7_pmnc_write_evtsel(idx, val); - - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = scorpion_read_pmresrn(region); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - scorpion_write_pmresrn(region, val); - } -} - -static void scorpion_clearpmu(u32 config_base) -{ - u32 val; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - bool venum_event = EVENT_VENUM(config_base); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val = clear_pmresrn_group(val, group); - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = scorpion_read_pmresrn(region); - val = clear_pmresrn_group(val, group); - scorpion_write_pmresrn(region, val); - } -} - -static void scorpion_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* Disable counter and interrupt */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Clear pmresr code (if destined for PMNx counters) - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - scorpion_clearpmu(hwc->config_base); - - /* Disable interrupt for this counter */ - armv7_pmnc_disable_intens(idx); -} - -static void scorpion_pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't set the event for the cycle counter because we - * don't have the ability to perform event filtering. - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - scorpion_evt_setup(idx, hwc->config_base); - else if (idx != ARMV7_IDX_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* Enable interrupt for this counter */ - armv7_pmnc_enable_intens(idx); - - /* Enable counter */ - armv7_pmnc_enable_counter(idx); -} - -static void scorpion_pmu_reset(void *info) -{ - u32 vval, fval; - struct arm_pmu *cpu_pmu = info; - u32 idx, nb_cnt = cpu_pmu->num_events; - - armv7pmu_reset(info); - - /* Clear all pmresrs */ - scorpion_write_pmresrn(0, 0); - scorpion_write_pmresrn(1, 0); - scorpion_write_pmresrn(2, 0); - scorpion_write_pmresrn(3, 0); - - venum_pre_pmresr(&vval, &fval); - venum_write_pmresr(0); - venum_post_pmresr(vval, fval); - - /* Reset PMxEVNCTCR to sane default */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - } -} - -static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, - unsigned int group) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - - if (hwc->config_base & VENUM_EVENT) - bit = SCORPION_VLPM_GROUP0; - else - bit = scorpion_get_pmresrn_event(region); - bit -= scorpion_get_pmresrn_event(0); - bit += group; - /* - * Lower bits are reserved for use by the counters (see - * armv7pmu_get_event_idx() for more info) - */ - bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; - - return bit; -} - -/* - * We check for column exclusion constraints here. - * Two events cant use the same group within a pmresr register. - */ -static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - int bit = -1; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool scorpion_event = EVENT_CPU(hwc->config_base); - - if (venum_event || scorpion_event) { - /* Ignore invalid events */ - if (group > 3 || region > 3) - return -EINVAL; - - bit = scorpion_event_to_bit(event, region, group); - if (test_and_set_bit(bit, cpuc->used_mask)) - return -EAGAIN; - } - - idx = armv7pmu_get_event_idx(cpuc, event); - if (idx < 0 && bit >= 0) - clear_bit(bit, cpuc->used_mask); - - return idx; -} - -static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool scorpion_event = EVENT_CPU(hwc->config_base); - - armv7pmu_clear_event_idx(cpuc, event); - if (venum_event || scorpion_event) { - bit = scorpion_event_to_bit(event, region, group); - clear_bit(bit, cpuc->used_mask); - } -} - -static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_scorpion"; - cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->reset = scorpion_pmu_reset; - cpu_pmu->enable = scorpion_pmu_enable_event; - cpu_pmu->disable = scorpion_pmu_disable_event; - cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; - cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return armv7_probe_num_events(cpu_pmu); -} - -static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_scorpion_mp"; - cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->reset = scorpion_pmu_reset; - cpu_pmu->enable = scorpion_pmu_enable_event; - cpu_pmu->disable = scorpion_pmu_disable_event; - cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; - cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return armv7_probe_num_events(cpu_pmu); -} - -static const struct of_device_id armv7_pmu_of_device_ids[] = { - {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, - {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, - {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, - {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, - {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, - {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, - {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, - {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, - {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, - {}, -}; - -static const struct pmu_probe_info armv7_pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - { /* sentinel value */ } -}; - - -static int armv7_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, - armv7_pmu_probe_table); -} - -static struct platform_driver armv7_pmu_driver = { - .driver = { - .name = "armv7-pmu", - .of_match_table = armv7_pmu_of_device_ids, - .suppress_bind_attrs = true, - }, - .probe = armv7_pmu_device_probe, -}; - -builtin_platform_driver(armv7_pmu_driver); -#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c deleted file mode 100644 index 7a2ba1c689a7..000000000000 --- a/arch/arm/kernel/perf_event_xscale.c +++ /dev/null @@ -1,748 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Copyright (C) 2010, ARM Ltd., Will Deacon - * - * Based on the previous xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -#ifdef CONFIG_CPU_XSCALE - -#include -#include - -#include -#include -#include - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 0, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!event) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void xscale1pmu_enable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); -} - -static void xscale1pmu_disable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); -} - -static int -xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - if (XSCALE_PERFCTR_CCNT == hwc->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) - return XSCALE_COUNTER1; - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) - return XSCALE_COUNTER0; - - return -EAGAIN; - } -} - -static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - clear_bit(event->hw.idx, cpuc->used_mask); -} - -static void xscale1pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); -} - -static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); -} - -static inline u64 xscale1pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static int xscale_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &xscale_perf_map, - &xscale_perf_cache_map, 0xFF); -} - -static int xscale1pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->name = "armv5_xscale1"; - cpu_pmu->handle_irq = xscale1pmu_handle_irq; - cpu_pmu->enable = xscale1pmu_enable_event; - cpu_pmu->disable = xscale1pmu_disable_event; - cpu_pmu->read_counter = xscale1pmu_read_counter; - cpu_pmu->write_counter = xscale1pmu_write_counter; - cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; - cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; - cpu_pmu->start = xscale1pmu_start; - cpu_pmu->stop = xscale1pmu_stop; - cpu_pmu->map_event = xscale_map_event; - cpu_pmu->num_events = 3; - - return 0; -} - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!event) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void xscale2pmu_enable_event(struct perf_event *event) -{ - unsigned long ien, evtsel; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); -} - -static void xscale2pmu_disable_event(struct perf_event *event) -{ - unsigned long ien, evtsel, of_flags; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - of_flags = XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - of_flags = XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - of_flags = XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - of_flags = XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - of_flags = XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - xscale2pmu_write_overflow_flags(of_flags); -} - -static int -xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void xscale2pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); -} - -static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); -} - -static inline u64 xscale2pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static int xscale2pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->name = "armv5_xscale2"; - cpu_pmu->handle_irq = xscale2pmu_handle_irq; - cpu_pmu->enable = xscale2pmu_enable_event; - cpu_pmu->disable = xscale2pmu_disable_event; - cpu_pmu->read_counter = xscale2pmu_read_counter; - cpu_pmu->write_counter = xscale2pmu_write_counter; - cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; - cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; - cpu_pmu->start = xscale2pmu_start; - cpu_pmu->stop = xscale2pmu_stop; - cpu_pmu->map_event = xscale_map_event; - cpu_pmu->num_events = 5; - - return 0; -} - -static const struct pmu_probe_info xscale_pmu_probe_table[] = { - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), - { /* sentinel value */ } -}; - -static int xscale_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); -} - -static struct platform_driver xscale_pmu_driver = { - .driver = { - .name = "xscale-pmu", - }, - .probe = xscale_pmu_device_probe, -}; - -builtin_platform_driver(xscale_pmu_driver); -#endif /* CONFIG_CPU_XSCALE */ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 7526a9e714fa..aa9530b4064f 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -56,6 +56,18 @@ config ARM_PMU Say y if you want to use CPU performance monitors on ARM-based systems. +config ARM_V6_PMU + depends on ARM_PMU && (CPU_V6 || CPU_V6K) + def_bool y + +config ARM_V7_PMU + depends on ARM_PMU && CPU_V7 + def_bool y + +config ARM_XSCALE_PMU + depends on ARM_PMU && CPU_XSCALE + def_bool y + config RISCV_PMU depends on RISCV bool "RISC-V PMU framework" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 29b1c28203ef..d43df81d52f7 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -6,6 +6,9 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o +obj-$(CONFIG_ARM_V6_PMU) += arm_v6_pmu.o +obj-$(CONFIG_ARM_V7_PMU) += arm_v7_pmu.o +obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c new file mode 100644 index 000000000000..f7593843bb85 --- /dev/null +++ b/drivers/perf/arm_v6_pmu.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#include +#include + +#include +#include +#include + +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 0, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + + /* + * The ARM performance counters can count micro DTLB misses, micro ITLB + * misses and main TLB misses. There isn't an event for TLB misses, so + * use the micro misses here and if users want the main TLB misses they + * can use a raw counter. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u64 armv6pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +static void armv6pmu_enable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); +} + +static irqreturn_t +armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv6pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); +} + +static void armv6pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); +} + +static int +armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void armv6pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); +} + +static int armv6_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv6_perf_map, + &armv6_perf_cache_map, 0xFF); +} + +static void armv6pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = armv6pmu_handle_irq; + cpu_pmu->enable = armv6pmu_enable_event; + cpu_pmu->disable = armv6pmu_disable_event; + cpu_pmu->read_counter = armv6pmu_read_counter; + cpu_pmu->write_counter = armv6pmu_write_counter; + cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; + cpu_pmu->start = armv6pmu_start; + cpu_pmu->stop = armv6pmu_stop; + cpu_pmu->map_event = armv6_map_event; + cpu_pmu->num_events = 3; +} + +static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1136"; + return 0; +} + +static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1156"; + return 0; +} + +static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1176"; + return 0; +} + +static const struct of_device_id armv6_pmu_of_device_ids[] = { + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, + { /* sentinel value */ } +}; + +static const struct pmu_probe_info armv6_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), + { /* sentinel value */ } +}; + +static int armv6_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, + armv6_pmu_probe_table); +} + +static struct platform_driver armv6_pmu_driver = { + .driver = { + .name = "armv6-pmu", + .of_match_table = armv6_pmu_of_device_ids, + }, + .probe = armv6_pmu_device_probe, +}; + +builtin_platform_driver(armv6_pmu_driver); diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c new file mode 100644 index 000000000000..fdd936fbd188 --- /dev/null +++ b/drivers/perf/arm_v7_pmu.c @@ -0,0 +1,2002 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#include +#include +#include +#include +#include "../vfp/vfpinstr.h" + +#include +#include +#include + +/* + * Common ARMv7 event types + * + * Note: An implementation may not be able to count all of these events + * but the encodings are considered to be `reserved' in the case that + * they are not available. + */ +#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00 +#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01 +#define ARMV7_PERFCTR_ITLB_REFILL 0x02 +#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03 +#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04 +#define ARMV7_PERFCTR_DTLB_REFILL 0x05 +#define ARMV7_PERFCTR_MEM_READ 0x06 +#define ARMV7_PERFCTR_MEM_WRITE 0x07 +#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08 +#define ARMV7_PERFCTR_EXC_TAKEN 0x09 +#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A +#define ARMV7_PERFCTR_CID_WRITE 0x0B + +/* + * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all (taken) branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ +#define ARMV7_PERFCTR_PC_WRITE 0x0C +#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D +#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E +#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F +#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10 +#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11 +#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12 + +/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ +#define ARMV7_PERFCTR_MEM_ACCESS 0x13 +#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14 +#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15 +#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16 +#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17 +#define ARMV7_PERFCTR_L2_CACHE_WB 0x18 +#define ARMV7_PERFCTR_BUS_ACCESS 0x19 +#define ARMV7_PERFCTR_MEM_ERROR 0x1A +#define ARMV7_PERFCTR_INSTR_SPEC 0x1B +#define ARMV7_PERFCTR_TTBR_WRITE 0x1C +#define ARMV7_PERFCTR_BUS_CYCLES 0x1D + +#define ARMV7_PERFCTR_CPU_CYCLES 0xFF + +/* ARMv7 Cortex-A8 specific event types */ +#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43 +#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44 +#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50 +#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56 + +/* ARMv7 Cortex-A9 specific event types */ +#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68 +#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60 +#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66 + +/* ARMv7 Cortex-A5 specific event types */ +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2 +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3 + +/* ARMv7 Cortex-A15 specific event types */ +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43 + +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D + +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53 + +#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76 + +/* ARMv7 Cortex-A12 specific event types */ +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 + +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 + +#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76 + +#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7 + +/* ARMv7 Krait specific event types */ +#define KRAIT_PMRESR0_GROUP0 0xcc +#define KRAIT_PMRESR1_GROUP0 0xd0 +#define KRAIT_PMRESR2_GROUP0 0xd4 +#define KRAIT_VPMRESR0_GROUP0 0xd8 + +#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011 +#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010 + +#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222 +#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210 + +/* ARMv7 Scorpion specific event types */ +#define SCORPION_LPM0_GROUP0 0x4c +#define SCORPION_LPM1_GROUP0 0x50 +#define SCORPION_LPM2_GROUP0 0x54 +#define SCORPION_L2LPM_GROUP0 0x58 +#define SCORPION_VLPM_GROUP0 0x5c + +#define SCORPION_ICACHE_ACCESS 0x10053 +#define SCORPION_ICACHE_MISS 0x10052 + +#define SCORPION_DTLB_ACCESS 0x12013 +#define SCORPION_DTLB_MISS 0x12012 + +#define SCORPION_ITLB_MISS 0x12021 + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A5 HW events mapping + */ +static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + /* + * The prefetch counters don't differentiate between the I side and the + * D side. + */ + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A15 HW events mapping + */ +static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, + + /* + * Not all performance counters differentiate between read and write + * accesses/misses so we're not always strictly correct, but it's the + * best we can do. Writes and reads get combined in these cases. + */ + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A7 HW events mapping + */ +static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Cortex-A12 HW events mapping + */ +static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + /* + * Not all performance counters differentiate between read and write + * accesses/misses so we're not always strictly correct, but it's the + * best we can do. Writes and reads get combined in these cases. + */ + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Krait HW events mapping + */ +static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* + * Scorpion HW events mapping + */ +static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, + /* + * Only ITLB misses and DTLB refills are supported. If users want the + * DTLB refills misses a raw counter must be used. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *armv7_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group armv7_pmu_format_attr_group = { + .name = "format", + .attrs = armv7_pmu_format_attrs, +}; + +#define ARMV7_EVENT_ATTR_RESOLVE(m) #m +#define ARMV7_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \ + "event=" ARMV7_EVENT_ATTR_RESOLVE(config)) + +ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR); +ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL); +ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL); +ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL); +ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL); +ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ); +ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE); +ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED); +ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN); +ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED); +ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE); +ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE); +ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH); +ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN); +ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS); +ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED); +ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES); +ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED); + +static struct attribute *armv7_pmuv1_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv1_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv1_event_attrs, +}; + +ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); +ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); +ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS); +ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL); +ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB); +ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS); +ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR); +ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); +ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); +ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); + +static struct attribute *armv7_pmuv2_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + &armv7_event_attr_mem_access.attr.attr, + &armv7_event_attr_l1i_cache.attr.attr, + &armv7_event_attr_l1d_cache_wb.attr.attr, + &armv7_event_attr_l2d_cache.attr.attr, + &armv7_event_attr_l2d_cache_refill.attr.attr, + &armv7_event_attr_l2d_cache_wb.attr.attr, + &armv7_event_attr_bus_access.attr.attr, + &armv7_event_attr_memory_error.attr.attr, + &armv7_event_attr_inst_spec.attr.attr, + &armv7_event_attr_ttbr_write_retired.attr.attr, + &armv7_event_attr_bus_cycles.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv2_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv2_event_attrs, +}; + +/* + * Perf Events' indices + */ +#define ARMV7_IDX_CYCLE_COUNTER 0 +#define ARMV7_IDX_COUNTER0 1 +#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \ + (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +#define ARMV7_MAX_COUNTERS 32 +#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Perf Event to low level counters mapping + */ +#define ARMV7_IDX_TO_COUNTER(x) \ + (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK) + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ +#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv2 + */ +#define ARMV7_EXCLUDE_PL1 BIT(31) +#define ARMV7_EXCLUDE_USER BIT(30) +#define ARMV7_INCLUDE_HYP BIT(27) + +/* + * Secure debug enable reg + */ +#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */ + +static inline u32 armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(u32 val) +{ + val &= ARMV7_PMNC_MASK; + isb(); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(u32 pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx) +{ + return idx >= ARMV7_IDX_CYCLE_COUNTER && + idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); +} + +static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) +{ + return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); +} + +static inline void armv7_pmnc_select_counter(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); + isb(); +} + +static inline u64 armv7pmu_read_counter(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u32 value = 0; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + } else { + armv7_pmnc_select_counter(idx); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); + } + + return value; +} + +static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); + } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value)); + } else { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value)); + } +} + +static inline void armv7_pmnc_write_evtsel(int idx, u32 val) +{ + armv7_pmnc_select_counter(idx); + val &= ARMV7_EVTYPE_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); +} + +static inline void armv7_pmnc_enable_counter(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); +} + +static inline void armv7_pmnc_disable_counter(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); +} + +static inline void armv7_pmnc_enable_intens(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); +} + +static inline void armv7_pmnc_disable_intens(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); + isb(); +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) +{ + u32 val; + unsigned int cnt; + + pr_info("PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + pr_info("PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + pr_info("CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + pr_info("INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + pr_info("FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + pr_info("SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + pr_info("CCNT =0x%08x\n", val); + + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + pr_info("CNT[%d] count =0x%08x\n", + ARMV7_IDX_TO_COUNTER(cnt), val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + pr_info("CNT[%d] evtsel=0x%08x\n", + ARMV7_IDX_TO_COUNTER(cnt), val); + } +} +#endif + +static void armv7pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We only need to set the event for the cycle counter if we + * have the ability to perform event filtering. + */ + if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); +} + +static void armv7pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } + + /* + * Disable counter and interrupt + */ + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); +} + +static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + u32 pmnc; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(struct arm_pmu *cpu_pmu) +{ + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); +} + +static void armv7pmu_stop(struct arm_pmu *cpu_pmu) +{ + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); +} + +static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; + + /* Always place a cycle counter into the cycle counter. */ + if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_IDX_CYCLE_COUNTER; + } + + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; +} + +static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +/* + * Add an event filter to a given event. This will only work for PMUv2 PMUs. + */ +static int armv7pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) { + pr_debug("ARM performance counters do not support mode exclusion\n"); + return -EOPNOTSUPP; + } + if (attr->exclude_user) + config_base |= ARMV7_EXCLUDE_USER; + if (attr->exclude_kernel) + config_base |= ARMV7_EXCLUDE_PL1; + if (!attr->exclude_hv) + config_base |= ARMV7_INCLUDE_HYP; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv7pmu_reset(void *info) +{ + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; + u32 idx, nb_cnt = cpu_pmu->num_events, val; + + if (cpu_pmu->secure_access) { + asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val)); + val |= ARMV7_SDER_SUNIDEN; + asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val)); + } + + /* The counter and interrupt enable registers are unknown at reset. */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_intens(idx); + } + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); +} + +static int armv7_a8_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a8_perf_map, + &armv7_a8_perf_cache_map, 0xFF); +} + +static int armv7_a9_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a9_perf_map, + &armv7_a9_perf_cache_map, 0xFF); +} + +static int armv7_a5_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a5_perf_map, + &armv7_a5_perf_cache_map, 0xFF); +} + +static int armv7_a15_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a15_perf_map, + &armv7_a15_perf_cache_map, 0xFF); +} + +static int armv7_a7_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a7_perf_map, + &armv7_a7_perf_cache_map, 0xFF); +} + +static int armv7_a12_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a12_perf_map, + &armv7_a12_perf_cache_map, 0xFF); +} + +static int krait_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map, + &krait_perf_cache_map, 0xFFFFF); +} + +static int krait_map_event_no_branch(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map_no_branch, + &krait_perf_cache_map, 0xFFFFF); +} + +static int scorpion_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &scorpion_perf_map, + &scorpion_perf_cache_map, 0xFFFFF); +} + +static void armv7pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = armv7pmu_handle_irq; + cpu_pmu->enable = armv7pmu_enable_event; + cpu_pmu->disable = armv7pmu_disable_event; + cpu_pmu->read_counter = armv7pmu_read_counter; + cpu_pmu->write_counter = armv7pmu_write_counter; + cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; + cpu_pmu->start = armv7pmu_start; + cpu_pmu->stop = armv7pmu_stop; + cpu_pmu->reset = armv7pmu_reset; +}; + +static void armv7_read_num_pmnc_events(void *info) +{ + int *nb_cnt = info; + + /* Read the nb of CNTx counters supported from PMNC */ + *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter */ + *nb_cnt += 1; +} + +static int armv7_probe_num_events(struct arm_pmu *arm_pmu) +{ + return smp_call_function_any(&arm_pmu->supported_cpus, + armv7_read_num_pmnc_events, + &arm_pmu->num_events, 1); +} + +static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a8"; + cpu_pmu->map_event = armv7_a8_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a9"; + cpu_pmu->map_event = armv7_a9_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a5"; + cpu_pmu->map_event = armv7_a5_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a15"; + cpu_pmu->map_event = armv7_a15_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a7"; + cpu_pmu->map_event = armv7_a7_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a12"; + cpu_pmu->map_event = armv7_a12_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return armv7_probe_num_events(cpu_pmu); +} + +static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) +{ + int ret = armv7_a12_pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_cortex_a17"; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; + return ret; +} + +/* + * Krait Performance Monitor Region Event Selection Register (PMRESRn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +#define KRAIT_EVENT (1 << 16) +#define VENUM_EVENT (2 << 16) +#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) +#define PMRESRn_EN BIT(31) + +#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ +#define EVENT_GROUP(event) ((event) & 0xf) /* G */ +#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ +#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ +#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ + +static u32 krait_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } + + return val; +} + +static void krait_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } +} + +static u32 venum_read_pmresr(void) +{ + u32 val; + asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); + return val; +} + +static void venum_write_pmresr(u32 val) +{ + asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); +} + +static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) +{ + u32 venum_new_val; + u32 fp_new_val; + + BUG_ON(preemptible()); + /* CPACR Enable CP10 and CP11 access */ + *venum_orig_val = get_copro_access(); + venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11); + set_copro_access(venum_new_val); + + /* Enable FPEXC */ + *fp_orig_val = fmrx(FPEXC); + fp_new_val = *fp_orig_val | FPEXC_EN; + fmxr(FPEXC, fp_new_val); +} + +static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) +{ + BUG_ON(preemptible()); + /* Restore FPEXC */ + fmxr(FPEXC, fp_orig_val); + isb(); + /* Restore CPACR */ + set_copro_access(venum_orig_val); +} + +static u32 krait_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0, + KRAIT_PMRESR1_GROUP0, + KRAIT_PMRESR2_GROUP0 }; + return pmresrn_table[region]; +} + +static void krait_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = KRAIT_VPMRESR0_GROUP0; + else + val = krait_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = krait_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + krait_write_pmresrn(region, val); + } +} + +static u32 clear_pmresrn_group(u32 val, int group) +{ + u32 mask; + int group_shift; + + group_shift = group * 8; + mask = 0xff << group_shift; + val &= ~mask; + + /* Don't clear enable bit if entire region isn't disabled */ + if (val & ~PMRESRn_EN) + return val |= PMRESRn_EN; + + return 0; +} + +static void krait_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = krait_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + krait_write_pmresrn(region, val); + } +} + +static void krait_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* Disable counter and interrupt */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); +} + +static void krait_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We set the event for the cycle counter because we + * have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_evt_setup(idx, hwc->config_base); + else + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); +} + +static void krait_pmu_reset(void *info) +{ + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + krait_write_pmresrn(0, 0); + krait_write_pmresrn(1, 0); + krait_write_pmresrn(2, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } + +} + +static int krait_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = KRAIT_VPMRESR0_GROUP0; + else + bit = krait_get_pmresrn_event(region); + bit -= krait_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int code = EVENT_CODE(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); + + if (venum_event || krait_event) { + /* Ignore invalid events */ + if (group > 3 || region > 2) + return -EINVAL; + if (venum_event && (code & 0xe0)) + return -EINVAL; + + bit = krait_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); + + armv7pmu_clear_event_idx(cpuc, event); + if (venum_event || krait_event) { + bit = krait_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int krait_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_krait"; + /* Some early versions of Krait don't support PC write events */ + if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, + "qcom,no-pc-write")) + cpu_pmu->map_event = krait_map_event_no_branch; + else + cpu_pmu->map_event = krait_map_event; + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->reset = krait_pmu_reset; + cpu_pmu->enable = krait_pmu_enable_event; + cpu_pmu->disable = krait_pmu_disable_event; + cpu_pmu->get_event_idx = krait_pmu_get_event_idx; + cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +/* + * Scorpion Local Performance Monitor Register (LPMn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 + * +--------------------------------+ + * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +static u32 scorpion_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + break; + case 3: + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } + + return val; +} + +static void scorpion_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); + break; + case 3: + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } +} + +static u32 scorpion_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, + SCORPION_LPM1_GROUP0, + SCORPION_LPM2_GROUP0, + SCORPION_L2LPM_GROUP0 }; + return pmresrn_table[region]; +} + +static void scorpion_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = SCORPION_VLPM_GROUP0; + else + val = scorpion_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* Disable counter and interrupt */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); +} + +static void scorpion_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't set the event for the cycle counter because we + * don't have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_evt_setup(idx, hwc->config_base); + else if (idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); +} + +static void scorpion_pmu_reset(void *info) +{ + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + scorpion_write_pmresrn(0, 0); + scorpion_write_pmresrn(1, 0); + scorpion_write_pmresrn(2, 0); + scorpion_write_pmresrn(3, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } +} + +static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = SCORPION_VLPM_GROUP0; + else + bit = scorpion_get_pmresrn_event(region); + bit -= scorpion_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + /* Ignore invalid events */ + if (group > 3 || region > 3) + return -EINVAL; + + bit = scorpion_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + armv7pmu_clear_event_idx(cpuc, event); + if (venum_event || scorpion_event) { + bit = scorpion_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion_mp"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +static const struct of_device_id armv7_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, + {}, +}; + +static const struct pmu_probe_info armv7_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), + { /* sentinel value */ } +}; + + +static int armv7_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, + armv7_pmu_probe_table); +} + +static struct platform_driver armv7_pmu_driver = { + .driver = { + .name = "armv7-pmu", + .of_match_table = armv7_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = armv7_pmu_device_probe, +}; + +builtin_platform_driver(armv7_pmu_driver); diff --git a/drivers/perf/arm_xscale_pmu.c b/drivers/perf/arm_xscale_pmu.c new file mode 100644 index 000000000000..3d8b72d6b37f --- /dev/null +++ b/drivers/perf/arm_xscale_pmu.c @@ -0,0 +1,745 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#include +#include + +#include +#include +#include + +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 0, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!event) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void xscale1pmu_enable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); +} + +static void xscale1pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, evt; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); +} + +static int +xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + if (XSCALE_PERFCTR_CCNT == hwc->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void xscale1pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); +} + +static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); +} + +static inline u64 xscale1pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static int xscale_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &xscale_perf_map, + &xscale_perf_cache_map, 0xFF); +} + +static int xscale1pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "armv5_xscale1"; + cpu_pmu->handle_irq = xscale1pmu_handle_irq; + cpu_pmu->enable = xscale1pmu_enable_event; + cpu_pmu->disable = xscale1pmu_disable_event; + cpu_pmu->read_counter = xscale1pmu_read_counter; + cpu_pmu->write_counter = xscale1pmu_write_counter; + cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; + cpu_pmu->start = xscale1pmu_start; + cpu_pmu->stop = xscale1pmu_stop; + cpu_pmu->map_event = xscale_map_event; + cpu_pmu->num_events = 3; + + return 0; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!event) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void xscale2pmu_enable_event(struct perf_event *event) +{ + unsigned long ien, evtsel; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); +} + +static void xscale2pmu_disable_event(struct perf_event *event) +{ + unsigned long ien, evtsel, of_flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + of_flags = XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + of_flags = XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + of_flags = XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + of_flags = XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + of_flags = XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + xscale2pmu_write_overflow_flags(of_flags); +} + +static int +xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void xscale2pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); +} + +static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long val; + + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); +} + +static inline u64 xscale2pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static int xscale2pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "armv5_xscale2"; + cpu_pmu->handle_irq = xscale2pmu_handle_irq; + cpu_pmu->enable = xscale2pmu_enable_event; + cpu_pmu->disable = xscale2pmu_disable_event; + cpu_pmu->read_counter = xscale2pmu_read_counter; + cpu_pmu->write_counter = xscale2pmu_write_counter; + cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; + cpu_pmu->start = xscale2pmu_start; + cpu_pmu->stop = xscale2pmu_stop; + cpu_pmu->map_event = xscale_map_event; + cpu_pmu->num_events = 5; + + return 0; +} + +static const struct pmu_probe_info xscale_pmu_probe_table[] = { + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + +static int xscale_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); +} + +static struct platform_driver xscale_pmu_driver = { + .driver = { + .name = "xscale-pmu", + }, + .probe = xscale_pmu_device_probe, +}; + +builtin_platform_driver(xscale_pmu_driver); -- cgit v1.2.3 From 12f051c987dc91d3bf7c4dbb740321f8b24070b4 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:28 -0600 Subject: perf: arm_v6/7_pmu: Drop non-DT probe support There are no non-DT based PMU users for v6 or v7, so drop the custom non-DT probe table. Unfortunately XScale still needs non-DT probing. Note that this drops support for arm1156 PMU, but there are no arm1156 based systems supported in the kernel. Acked-by: Mark Rutland Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-4-c9784b4f4065@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_v6_pmu.c | 17 +---------------- drivers/perf/arm_v7_pmu.c | 10 +--------- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/drivers/perf/arm_v6_pmu.c b/drivers/perf/arm_v6_pmu.c index f7593843bb85..0bb685b4bac5 100644 --- a/drivers/perf/arm_v6_pmu.c +++ b/drivers/perf/arm_v6_pmu.c @@ -401,13 +401,6 @@ static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1156"; - return 0; -} - static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) { armv6pmu_init(cpu_pmu); @@ -421,17 +414,9 @@ static const struct of_device_id armv6_pmu_of_device_ids[] = { { /* sentinel value */ } }; -static const struct pmu_probe_info armv6_pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - { /* sentinel value */ } -}; - static int armv6_pmu_device_probe(struct platform_device *pdev) { - return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, - armv6_pmu_probe_table); + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL); } static struct platform_driver armv6_pmu_driver = { diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c index fdd936fbd188..928ac3d626ed 100644 --- a/drivers/perf/arm_v7_pmu.c +++ b/drivers/perf/arm_v7_pmu.c @@ -1977,17 +1977,9 @@ static const struct of_device_id armv7_pmu_of_device_ids[] = { {}, }; -static const struct pmu_probe_info armv7_pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - { /* sentinel value */ } -}; - - static int armv7_pmu_device_probe(struct platform_device *pdev) { - return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, - armv7_pmu_probe_table); + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, NULL); } static struct platform_driver armv7_pmu_driver = { -- cgit v1.2.3 From d688ffa269421cec73c7e13fd0cb03879b07db89 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:29 -0600 Subject: perf: arm_pmuv3: Include asm/arm_pmuv3.h from linux/perf/arm_pmuv3.h The arm64 asm/arm_pmuv3.h depends on defines from linux/perf/arm_pmuv3.h. Rather than depend on include order, follow the usual pattern of "linux" headers including "asm" headers of the same name. With this change, the include of linux/kvm_host.h is problematic due to circular includes: In file included from ../arch/arm64/include/asm/arm_pmuv3.h:9, from ../include/linux/perf/arm_pmuv3.h:312, from ../include/kvm/arm_pmu.h:11, from ../arch/arm64/include/asm/kvm_host.h:38, from ../arch/arm64/mm/init.c:41: ../include/linux/kvm_host.h:383:30: error: field 'arch' has incomplete type Switching to asm/kvm_host.h solves the issue. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-5-c9784b4f4065@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 2 +- arch/arm64/kvm/pmu-emul.c | 1 - drivers/perf/arm_pmuv3.c | 2 -- include/linux/perf/arm_pmuv3.h | 2 ++ 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index c27404fa4418..a4697a0b6835 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -6,7 +6,7 @@ #ifndef __ASM_PMUV3_H #define __ASM_PMUV3_H -#include +#include #include #include diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index a35ce10e0a9f..d1a476b08f54 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -14,7 +14,6 @@ #include #include #include -#include #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index d6248493b5c6..cf0430c266a6 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -25,8 +25,6 @@ #include #include -#include - /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 46377e134d67..7867db04ec98 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -309,4 +309,6 @@ } \ } while (0) +#include + #endif -- cgit v1.2.3 From 42b9fed388a510cdb17d25c0dc1c8b72c4828cea Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jun 2024 15:57:09 +0530 Subject: KVM: arm64: Replace custom macros with fields from ID_AA64PFR0_EL1 This replaces custom macros usage (i.e ID_AA64PFR0_EL1_ELx_64BIT_ONLY and ID_AA64PFR0_EL1_ELx_32BIT_64BIT) and instead directly uses register fields from ID_AA64PFR0_EL1 sysreg definition. Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.linux.dev Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240613102710.3295108-2-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 10 +++++----- arch/arm64/kvm/hyp/nvhe/pkvm.c | 4 ++-- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 51f043649146..f957890c7e38 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -52,11 +52,11 @@ * Supported by KVM */ #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 95cf18574251..187a5f4d56c0 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -33,9 +33,9 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) /* Protected KVM does not support AArch32 guests. */ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP); BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP); /* * Linux guests assume support for floating-point and Advanced SIMD. Do diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index edd969a1f36b..2860548d4250 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -276,7 +276,7 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, * of AArch32 feature id registers. */ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_EL1_IMP); return pvm_access_raz_wi(vcpu, p, r); } -- cgit v1.2.3 From 056600ff73307cadb89a07d068c9348b17eb530a Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jun 2024 15:57:10 +0530 Subject: arm64/cpufeature: Replace custom macros with fields from ID_AA64PFR0_EL1 This replaces custom macros usage (i.e ID_AA64PFR0_EL1_ELx_64BIT_ONLY and ID_AA64PFR0_EL1_ELx_32BIT_64BIT) and instead directly uses register fields from ID_AA64PFR0_EL1 sysreg definition. Finally let's drop off both these custom macros as they are now redundant. Cc: Will Deacon Cc: Mark Rutland Cc: Mark Brown Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240613102710.3295108-3-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/sysreg.h | 4 ---- arch/arm64/kernel/cpufeature.c | 4 ++-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8b904a757bd3..558434267271 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -588,14 +588,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT); - return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL1_AARCH32; } static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT); - return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL0_AARCH32; } static inline bool id_aa64pfr0_sve(u64 pfr0) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index af3b206fa423..1b6e436dbb55 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -872,10 +872,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64pfr0 */ -#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 -#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c3913c90797e..646ecd3069fd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -285,8 +285,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP), ARM64_FTR_END, }; -- cgit v1.2.3 From 5e30c16b58a486ff8700512e43eac9949c32621b Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 2 Jul 2024 10:13:49 +0100 Subject: Documentation: arm64: Update memory.rst for TBI Most of memory.rst was written very early, at a time where TBI (Top Byte Ignore) was not enabled. Nowadays TBI0 is always enabled, and TBI1 may be enabled, depending on the kernel configuration. This means that VA bits 63:56 cannot generally be assumed to have any particular value. Regardless of TBI, TTBRx selection is done based on bit 55; update memory.rst accordingly. Signed-off-by: Kevin Brodsky Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240702091349.356008-1-kevin.brodsky@arm.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/memory.rst | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/Documentation/arch/arm64/memory.rst b/Documentation/arch/arm64/memory.rst index 55a55f30eed8..8a658984b8bb 100644 --- a/Documentation/arch/arm64/memory.rst +++ b/Documentation/arch/arm64/memory.rst @@ -18,12 +18,10 @@ ARMv8.2 adds optional support for Large Virtual Address space. This is only available when running with a 64KB page size and expands the number of descriptors in the first level of translation. -User addresses have bits 63:48 set to 0 while the kernel addresses have -the same bits set to 1. TTBRx selection is given by bit 63 of the -virtual address. The swapper_pg_dir contains only kernel (global) -mappings while the user pgd contains only user (non-global) mappings. -The swapper_pg_dir address is written to TTBR1 and never written to -TTBR0. +TTBRx selection is given by bit 55 of the virtual address. The +swapper_pg_dir contains only kernel (global) mappings while the user pgd +contains only user (non-global) mappings. The swapper_pg_dir address is +written to TTBR1 and never written to TTBR0. AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: @@ -65,14 +63,14 @@ Translation table lookup with 4KB pages:: +--------+--------+--------+--------+--------+--------+--------+--------+ |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| +--------+--------+--------+--------+--------+--------+--------+--------+ - | | | | | | - | | | | | v - | | | | | [11:0] in-page offset - | | | | +-> [20:12] L3 index - | | | +-----------> [29:21] L2 index - | | +---------------------> [38:30] L1 index - | +-------------------------------> [47:39] L0 index - +-------------------------------------------------> [63] TTBR0/1 + | | | | | | + | | | | | v + | | | | | [11:0] in-page offset + | | | | +-> [20:12] L3 index + | | | +-----------> [29:21] L2 index + | | +---------------------> [38:30] L1 index + | +-------------------------------> [47:39] L0 index + +----------------------------------------> [55] TTBR0/1 Translation table lookup with 64KB pages:: @@ -80,14 +78,14 @@ Translation table lookup with 64KB pages:: +--------+--------+--------+--------+--------+--------+--------+--------+ |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| +--------+--------+--------+--------+--------+--------+--------+--------+ - | | | | | - | | | | v - | | | | [15:0] in-page offset - | | | +----------> [28:16] L3 index - | | +--------------------------> [41:29] L2 index - | +-------------------------------> [47:42] L1 index (48-bit) - | [51:42] L1 index (52-bit) - +-------------------------------------------------> [63] TTBR0/1 + | | | | | + | | | | v + | | | | [15:0] in-page offset + | | | +----------> [28:16] L3 index + | | +--------------------------> [41:29] L2 index + | +-------------------------------> [47:42] L1 index (48-bit) + | [51:42] L1 index (52-bit) + +----------------------------------------> [55] TTBR0/1 When using KVM without the Virtualization Host Extensions, the -- cgit v1.2.3 From f5a4af3c752704272e9094466e66e21d4ee4e414 Mon Sep 17 00:00:00 2001 From: Liu Wei Date: Tue, 25 Jun 2024 11:05:04 +0800 Subject: ACPI: Add acpi=nospcr to disable ACPI SPCR as default console on ARM64 For varying privacy and security reasons, sometimes we would like to completely silence the _serial_ console, and only enable it when needed. But there are many existing systems that depend on this _serial_ console, so add acpi=nospcr to disable console in ACPI SPCR table as default _serial_ console. Signed-off-by: Liu Wei Suggested-by: Prarit Bhargava Suggested-by: Will Deacon Suggested-by: Andrew Lunn Reviewed-by: Hanjun Guo Reviewed-by: Prarit Bhargava Link: https://lore.kernel.org/r/20240625030504.58025-1-liuwei09@cestc.cn Signed-off-by: Catalin Marinas --- Documentation/admin-guide/kernel-parameters.txt | 10 +++++++--- arch/arm64/kernel/acpi.c | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b600df82669d..68d8acc38c82 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -12,7 +12,7 @@ acpi= [HW,ACPI,X86,ARM64,RISCV64,EARLY] Advanced Configuration and Power Interface Format: { force | on | off | strict | noirq | rsdt | - copy_dsdt } + copy_dsdt | nospcr } force -- enable ACPI if default was off on -- enable ACPI but allow fallback to DT [arm64,riscv64] off -- disable ACPI if default was on @@ -21,8 +21,12 @@ strictly ACPI specification compliant. rsdt -- prefer RSDT over (default) XSDT copy_dsdt -- copy DSDT to memory - For ARM64 and RISCV64, ONLY "acpi=off", "acpi=on" or - "acpi=force" are available + nospcr -- disable console in ACPI SPCR table as + default _serial_ console on ARM64 + For ARM64, ONLY "acpi=off", "acpi=on", "acpi=force" or + "acpi=nospcr" are available + For RISCV64, ONLY "acpi=off", "acpi=on" or "acpi=force" + are available See also Documentation/power/runtime_pm.rst, pci=noacpi diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index fd1917336d3a..a73b87ac815c 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -45,6 +45,7 @@ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; +static bool param_acpi_nospcr __initdata; static int __init parse_acpi(char *arg) { @@ -58,6 +59,8 @@ static int __init parse_acpi(char *arg) param_acpi_on = true; else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ param_acpi_force = true; + else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */ + param_acpi_nospcr = true; else return -EINVAL; /* Core will print when we return error */ @@ -237,7 +240,20 @@ done: acpi_put_table(facs); } #endif - acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + + /* + * For varying privacy and security reasons, sometimes need + * to completely silence the serial console output, and only + * enable it when needed. + * But there are many existing systems that depend on this + * behaviour, use acpi=nospcr to disable console in ACPI SPCR + * table as default serial console. + */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, + !param_acpi_nospcr); + pr_info("Use ACPI SPCR as default console: %s\n", + param_acpi_nospcr ? "No" : "Yes"); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); } -- cgit v1.2.3 From 080402007007ca1bed8bcb103625137a5c8446c6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 5 Jul 2024 12:54:29 +0100 Subject: irqchip/gic-v3: Fix 'broken_rdists' unused warning when !SMP and !ACPI Compiling the GICv3 driver on arm32 with CONFIG_SMP disabled (CONFIG_ACPI is not available) generates an unused variable warning for 'broken_rdists'. Add a __maybe_unused attribute to silence the compiler. Fixes: d633da5d3ab1 ("irqchip/gic-v3: Add support for ACPI's disabled but 'online capable' CPUs") Cc: # .x Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index cc81515c1413..18619d29b2ed 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -44,7 +44,7 @@ #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) -static struct cpumask broken_rdists __read_mostly; +static struct cpumask broken_rdists __read_mostly __maybe_unused; struct redist_region { void __iomem *redist_base; -- cgit v1.2.3 From 916b93f4e865b35563902f5862b443fc122631b4 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 20 Jun 2024 14:36:00 +0800 Subject: arm64: smp: Fix missing IPI statistics commit 83cfac95c018 ("genirq: Allow interrupts to be excluded from /proc/interrupts") is to avoid IPIs appear twice in /proc/interrupts. But the commit 331a1b3a836c ("arm64: smp: Add arch support for backtrace using pseudo-NMI") and commit 2f5cd0c7ffde("arm64: kgdb: Implement kgdb_roundup_cpus() to enable pseudo-NMI roundup") set CPU_BACKTRACE and KGDB_ROUNDUP IPIs "IRQ_HIDDEN" flag but not show them in arch_show_interrupts(), which cause the interrupt kstat_irqs accounting is missing in display. Before this patch, CPU_BACKTRACE and KGDB_ROUNDUP IPIs are missing: / # cat /proc/interrupts CPU0 CPU1 CPU2 CPU3 11: 466 600 309 332 GICv3 27 Level arch_timer 13: 24 0 0 0 GICv3 33 Level uart-pl011 15: 64 0 0 0 GICv3 78 Edge virtio0 16: 0 0 0 0 GICv3 79 Edge virtio1 17: 0 0 0 0 GICv3 34 Level rtc-pl031 18: 3 3 3 3 GICv3 23 Level arm-pmu 19: 0 0 0 0 9030000.pl061 3 Edge GPIO Key Poweroff IPI0: 7 14 9 26 Rescheduling interrupts IPI1: 354 93 233 255 Function call interrupts IPI2: 0 0 0 0 CPU stop interrupts IPI3: 0 0 0 0 CPU stop (for crash dump) interrupts IPI4: 0 0 0 0 Timer broadcast interrupts IPI5: 1 0 0 0 IRQ work interrupts Err: 0 After this pacth, CPU_BACKTRACE and KGDB_ROUNDUP IPIs are displayed: / # cat /proc/interrupts CPU0 CPU1 CPU2 CPU3 11: 393 281 532 449 GICv3 27 Level arch_timer 13: 15 0 0 0 GICv3 33 Level uart-pl011 15: 64 0 0 0 GICv3 78 Edge virtio0 16: 0 0 0 0 GICv3 79 Edge virtio1 17: 0 0 0 0 GICv3 34 Level rtc-pl031 18: 2 2 2 2 GICv3 23 Level arm-pmu 19: 0 0 0 0 9030000.pl061 3 Edge GPIO Key Poweroff IPI0: 11 19 4 23 Rescheduling interrupts IPI1: 279 347 222 72 Function call interrupts IPI2: 0 0 0 0 CPU stop interrupts IPI3: 0 0 0 0 CPU stop (for crash dump) interrupts IPI4: 0 0 0 0 Timer broadcast interrupts IPI5: 1 0 0 1 IRQ work interrupts IPI6: 0 0 0 0 CPU backtrace interrupts IPI7: 0 0 0 0 KGDB roundup interrupts Err: 0 Fixes: 331a1b3a836c ("arm64: smp: Add arch support for backtrace using pseudo-NMI") Signed-off-by: Jinjie Ruan Suggested-by: Doug Anderson Acked-by: Will Deacon Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20240620063600.573559-1-ruanjinjie@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 753360ce72f6..ad9c719bd078 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -762,13 +762,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -static const char *ipi_types[NR_IPI] __tracepoint_string = { +static const char *ipi_types[MAX_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_TIMER] = "Timer broadcast interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); @@ -779,7 +781,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) { unsigned int cpu, i; - for (i = 0; i < NR_IPI; i++) { + for (i = 0; i < MAX_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) -- cgit v1.2.3 From 42bebc7cca79d545f4eb9ec131560cfdd07762e0 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 9 Jul 2024 15:07:55 -0700 Subject: perf: add missing MODULE_DESCRIPTION() macros With ARCH=x86, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/arm-ccn.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/fsl_imx8_ddr_perf.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/marvell_cn10k_ddr_pmu.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/arm_cspmu/arm_cspmu_module.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/arm_cspmu/nvidia_cspmu.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/arm_cspmu/ampere_cspmu.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/perf/cxl_pmu.o Add the missing invocation of the MODULE_DESCRIPTION() macro to all files which have a MODULE_LICENSE(). This includes drivers/perf/hisilicon/hisi_uncore_pmu.c which, although it did not produce a warning with the x86 allmodconfig configuration, may cause this warning with arm64 configurations. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240709-md-drivers-perf-v3-1-513275b75ed0@quicinc.com Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 1 + drivers/perf/arm_cspmu/ampere_cspmu.c | 1 + drivers/perf/arm_cspmu/arm_cspmu.c | 1 + drivers/perf/arm_cspmu/nvidia_cspmu.c | 1 + drivers/perf/cxl_pmu.c | 1 + drivers/perf/fsl_imx8_ddr_perf.c | 1 + drivers/perf/hisilicon/hisi_uncore_pmu.c | 1 + drivers/perf/marvell_cn10k_ddr_pmu.c | 1 + 8 files changed, 8 insertions(+) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 86ef31ac7503..5c66b9278862 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1561,4 +1561,5 @@ module_init(arm_ccn_init); module_exit(arm_ccn_exit); MODULE_AUTHOR("Pawel Moll "); +MODULE_DESCRIPTION("ARM CCN (Cache Coherent Network) Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c index f146a455e838..f72f5689923c 100644 --- a/drivers/perf/arm_cspmu/ampere_cspmu.c +++ b/drivers/perf/arm_cspmu/ampere_cspmu.c @@ -269,4 +269,5 @@ static void __exit ampere_cspmu_exit(void) module_init(ampere_cspmu_init); module_exit(ampere_cspmu_exit); +MODULE_DESCRIPTION("Ampere SoC Performance Monitor Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index c318dc909767..2158a5975c90 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1427,4 +1427,5 @@ EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister); module_init(arm_cspmu_init); module_exit(arm_cspmu_exit); +MODULE_DESCRIPTION("ARM CoreSight Architecture Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index 5b84b701ad62..d0ef611240aa 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -417,4 +417,5 @@ static void __exit nvidia_cspmu_exit(void) module_init(nvidia_cspmu_init); module_exit(nvidia_cspmu_exit); +MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index 1f93a66eff5b..43d68b69e630 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -972,6 +972,7 @@ static __exit void cxl_pmu_exit(void) cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num); } +MODULE_DESCRIPTION("CXL Performance Monitor Driver"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(CXL); module_init(cxl_pmu_init); diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 1bbdb29743c4..746b92330ca7 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -850,4 +850,5 @@ static struct platform_driver imx_ddr_pmu_driver = { }; module_platform_driver(imx_ddr_pmu_driver); +MODULE_DESCRIPTION("Freescale i.MX8 DDR Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 6392cbedcd06..918cdc31de57 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -537,4 +537,5 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) } EXPORT_SYMBOL_GPL(hisi_pmu_init); +MODULE_DESCRIPTION("HiSilicon SoC uncore Performance Monitor driver framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index e2abca188dbe..94f1ebcd2a27 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -763,4 +763,5 @@ module_init(cn10k_ddr_pmu_init); module_exit(cn10k_ddr_pmu_exit); MODULE_AUTHOR("Bharat Bhushan "); +MODULE_DESCRIPTION("Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 9de9c4cc03add590f99aa5fabbe8f5850e34837c Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sun, 2 Jun 2024 18:24:59 +0500 Subject: selftests: arm64: tags_test: conform test to TAP output Conform the layout, informational and status messages to TAP. No functional change is intended other than the layout of output messages. Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240602132502.4186771-1-usama.anjum@collabora.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/tags/tags_test.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c index 955f87c1170d..8ae26e496c89 100644 --- a/tools/testing/selftests/arm64/tags/tags_test.c +++ b/tools/testing/selftests/arm64/tags/tags_test.c @@ -17,19 +17,21 @@ int main(void) static int tbi_enabled = 0; unsigned long tag = 0; struct utsname *ptr; - int err; + + ksft_print_header(); + ksft_set_plan(1); if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) tbi_enabled = 1; ptr = (struct utsname *)malloc(sizeof(*ptr)); if (!ptr) - ksft_exit_fail_msg("Failed to allocate utsname buffer\n"); + ksft_exit_fail_perror("Failed to allocate utsname buffer"); if (tbi_enabled) tag = 0x42; ptr = (struct utsname *)SET_TAG(ptr, tag); - err = uname(ptr); + ksft_test_result(!uname(ptr), "Syscall successful with tagged address\n"); free(ptr); - return err; + ksft_finished(); } -- cgit v1.2.3 From 6e3bc73be02ba604e5fd865008050a6dfc3d1232 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sun, 2 Jun 2024 18:25:00 +0500 Subject: selftests: arm64: tags: remove the result script The run_tags_test.sh script is used to run tags_test and print out if the test succeeded or failed. As tags_test has been TAP conformed, this script is unneeded and hence can be removed. Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240602132502.4186771-2-usama.anjum@collabora.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/tags/Makefile | 1 - tools/testing/selftests/arm64/tags/run_tags_test.sh | 12 ------------ 2 files changed, 13 deletions(-) delete mode 100755 tools/testing/selftests/arm64/tags/run_tags_test.sh diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile index 6d29cfde43a2..0a77f35295fb 100644 --- a/tools/testing/selftests/arm64/tags/Makefile +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -2,6 +2,5 @@ CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := tags_test -TEST_PROGS := run_tags_test.sh include ../../lib.mk diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh deleted file mode 100755 index 745f11379930..000000000000 --- a/tools/testing/selftests/arm64/tags/run_tags_test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -echo "--------------------" -echo "running tags test" -echo "--------------------" -./tags_test -if [ $? -ne 0 ]; then - echo "[FAIL]" -else - echo "[PASS]" -fi -- cgit v1.2.3