summaryrefslogtreecommitdiffstats
path: root/arch/riscv/kernel/cpufeature.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kernel/cpufeature.c')
-rw-r--r--arch/riscv/kernel/cpufeature.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index ef7b4fd9e876..1cfbba65d11a 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -18,12 +18,19 @@
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwcap.h>
+#include <asm/hwprobe.h>
#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/vector.h>
+#include "copy-unaligned.h"
+
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
+#define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
@@ -549,6 +556,103 @@ unsigned long riscv_get_elf_hwcap(void)
return hwcap;
}
+void check_unaligned_access(int cpu)
+{
+ u64 start_cycles, end_cycles;
+ u64 word_cycles;
+ u64 byte_cycles;
+ int ratio;
+ unsigned long start_jiffies, now;
+ struct page *page;
+ void *dst;
+ void *src;
+ long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
+
+ page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
+ if (!page) {
+ pr_warn("Can't alloc pages to measure memcpy performance");
+ return;
+ }
+
+ /* Make an unaligned destination buffer. */
+ dst = (void *)((unsigned long)page_address(page) | 0x1);
+ /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+ src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+ src += 2;
+ word_cycles = -1ULL;
+ /* Do a warmup. */
+ __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ preempt_disable();
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ /*
+ * For a fixed amount of time, repeatedly try the function, and take
+ * the best time in cycles as the measurement.
+ */
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < word_cycles)
+ word_cycles = end_cycles - start_cycles;
+ }
+
+ byte_cycles = -1ULL;
+ __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ mb();
+ __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < byte_cycles)
+ byte_cycles = end_cycles - start_cycles;
+ }
+
+ preempt_enable();
+
+ /* Don't divide by zero. */
+ if (!word_cycles || !byte_cycles) {
+ pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
+ cpu);
+
+ goto out;
+ }
+
+ if (word_cycles < byte_cycles)
+ speed = RISCV_HWPROBE_MISALIGNED_FAST;
+
+ ratio = div_u64((byte_cycles * 100), word_cycles);
+ pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
+ cpu,
+ ratio / 100,
+ ratio % 100,
+ (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
+
+ per_cpu(misaligned_access_speed, cpu) = speed;
+
+out:
+ __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
+}
+
+static int check_unaligned_access_boot_cpu(void)
+{
+ check_unaligned_access(0);
+ return 0;
+}
+
+arch_initcall(check_unaligned_access_boot_cpu);
+
#ifdef CONFIG_RISCV_ALTERNATIVE
/*
* Alternative patch sites consider 48 bits when determining when to patch