diff options
Diffstat (limited to 'arch/riscv/kernel/cpufeature.c')
-rw-r--r-- | arch/riscv/kernel/cpufeature.c | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index ef7b4fd9e876..1cfbba65d11a 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -18,12 +18,19 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> +#include <asm/hwprobe.h> #include <asm/patch.h> #include <asm/processor.h> #include <asm/vector.h> +#include "copy-unaligned.h" + #define NUM_ALPHA_EXTS ('z' - 'a' + 1) +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 +#define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -549,6 +556,103 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } +void check_unaligned_access(int cpu) +{ + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + + page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); + if (!page) { + pr_warn("Can't alloc pages to measure memcpy performance"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + /* Do a warmup. */ + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + preempt_disable(); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + goto out; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; + +out: + __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); +} + +static int check_unaligned_access_boot_cpu(void) +{ + check_unaligned_access(0); + return 0; +} + +arch_initcall(check_unaligned_access_boot_cpu); + #ifdef CONFIG_RISCV_ALTERNATIVE /* * Alternative patch sites consider 48 bits when determining when to patch |