diff options
author | Palmer Dabbelt <palmer@rivosinc.com> | 2023-01-31 20:43:27 +0100 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2023-01-31 20:43:27 +0100 |
commit | 75ab93a244a516d1d3c03c4e27d5d0deff76ebfb (patch) | |
tree | c051b064adbfa73337dea354543faab0de1a9359 /arch/riscv | |
parent | RISC-V: Fix do_notify_resume / do_work_pending prototype (diff) | |
parent | RISC-V: add zbb support to string functions (diff) | |
download | linux-75ab93a244a516d1d3c03c4e27d5d0deff76ebfb.tar.xz linux-75ab93a244a516d1d3c03c4e27d5d0deff76ebfb.zip |
Merge patch series "Zbb string optimizations"
Heiko Stuebner <heiko@sntech.de> says:
From: Heiko Stuebner <heiko.stuebner@vrull.eu>
This series still tries to allow optimized string functions for specific
extensions. The last approach of using an inline base function to hold
the alternative calls did cause some issues in a number of places
So instead of that we're now just using an alternative j at the beginning
of the generic function to jump to a separate place inside the function
itself.
* b4-shazam-merge:
RISC-V: add zbb support to string functions
RISC-V: add infrastructure to allow different str* implementations
Link: https://lore.kernel.org/r/20230113212301.3534711-1-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv')
-rw-r--r-- | arch/riscv/Kconfig | 24 | ||||
-rw-r--r-- | arch/riscv/include/asm/errata_list.h | 3 | ||||
-rw-r--r-- | arch/riscv/include/asm/hwcap.h | 1 | ||||
-rw-r--r-- | arch/riscv/include/asm/string.h | 10 | ||||
-rw-r--r-- | arch/riscv/kernel/cpu.c | 1 | ||||
-rw-r--r-- | arch/riscv/kernel/cpufeature.c | 18 | ||||
-rw-r--r-- | arch/riscv/kernel/riscv_ksyms.c | 3 | ||||
-rw-r--r-- | arch/riscv/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/riscv/lib/strcmp.S | 121 | ||||
-rw-r--r-- | arch/riscv/lib/strlen.S | 133 | ||||
-rw-r--r-- | arch/riscv/lib/strncmp.S | 139 | ||||
-rw-r--r-- | arch/riscv/purgatory/Makefile | 13 |
12 files changed, 468 insertions, 1 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e2b656043abf..7c814fbf9527 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -416,6 +416,30 @@ config RISCV_ISA_SVPBMT If you don't know what to do here, say Y. +config TOOLCHAIN_HAS_ZBB + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 + depends on AS_IS_GNU + +config RISCV_ISA_ZBB + bool "Zbb extension support for bit manipulation instructions" + depends on TOOLCHAIN_HAS_ZBB + depends on !XIP_KERNEL && MMU + select RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZBB + extension (basic bit manipulation) and enable its usage. + + The Zbb extension provides instructions to accelerate a number + of bit-specific operations (count bit population, sign extending, + bitrotation, etc). + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZICBOM bool default y diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 4180312d2a70..95e626b7281e 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -24,7 +24,8 @@ #define CPUFEATURE_SVPBMT 0 #define CPUFEATURE_ZICBOM 1 -#define CPUFEATURE_NUMBER 2 +#define CPUFEATURE_ZBB 2 +#define CPUFEATURE_NUMBER 3 #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 57439da71c77..462d6cde9bac 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -58,6 +58,7 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_SSTC, RISCV_ISA_EXT_SVINVAL, RISCV_ISA_EXT_SVPBMT, + RISCV_ISA_EXT_ZBB, RISCV_ISA_EXT_ZICBOM, RISCV_ISA_EXT_ZIHINTPAUSE, RISCV_ISA_EXT_ID_MAX diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 909049366555..a96b1fea24fe 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMMOVE extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t); + +#define __HAVE_ARCH_STRCMP +extern asmlinkage int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRLEN +extern asmlinkage __kernel_size_t strlen(const char *); + +#define __HAVE_ARCH_STRNCMP +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); + /* For those files which don't want to check by kasan. */ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) #define memcpy(dst, src, len) __memcpy(dst, src, len) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 0bf1c7f663fc..420228e219f7 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -185,6 +185,7 @@ arch_initcall(riscv_cpuinfo_init); * New entries to this struct should follow the ordering rules described above. */ static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index dde0e91d7668..9899806cef29 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -227,6 +227,7 @@ void __init riscv_fill_hwcap(void) SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); + SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB); SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); } @@ -302,6 +303,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage) return true; } +static bool __init_or_module cpufeature_probe_zbb(unsigned int stage) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + if (!riscv_isa_extension_available(NULL, ZBB)) + return false; + + return true; +} + /* * Probe presence of individual extensions. * @@ -320,6 +335,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) if (cpufeature_probe_zicbom(stage)) cpu_req_feature |= BIT(CPUFEATURE_ZICBOM); + if (cpufeature_probe_zbb(stage)) + cpu_req_feature |= BIT(CPUFEATURE_ZBB); + return cpu_req_feature; } diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 5ab1c7e1a6ed..a72879b4249a 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,6 +12,9 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 25d5c9664e57..6c74b0bedd60 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,9 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +lib-y += strcmp.o +lib-y += strlen.o +lib-y += strncmp.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S new file mode 100644 index 000000000000..8148b6418f61 --- /dev/null +++ b/arch/riscv/lib/strcmp.S @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm-generic/export.h> +#include <asm/alternative-macros.h> +#include <asm/errata_list.h> + +/* int strcmp(const char *cs, const char *ct) */ +SYM_FUNC_START(strcmp) + + ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + + /* + * Returns + * a0 - comparison result, value like strcmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * + * Clobbers + * t0, t1 + */ +1: + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 2f + bnez t0, 1b + li a0, 0 + ret +2: + /* + * strcmp only needs to return (< 0, 0, > 0) values + * not necessarily -1, 0, +1 + */ + sub a0, t0, t1 + ret + +/* + * Variant of strcmp using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strcmp_zbb: + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - comparison result, value like strcmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * + * Clobbers + * t0, t1, t2, t3, t4, t5 + */ + + or t2, a0, a1 + li t4, -1 + and t2, t2, SZREG-1 + bnez t2, 3f + + /* Main loop for aligned string. */ + .p2align 3 +1: + REG_L t0, 0(a0) + REG_L t1, 0(a1) + orc.b t3, t0 + bne t3, t4, 2f + addi a0, a0, SZREG + addi a1, a1, SZREG + beq t0, t1, 1b + + /* + * Words don't match, and no null byte in the first + * word. Get bytes in big-endian order and compare. + */ +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t0, t0 + rev8 t1, t1 +#endif + + /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */ + sltu a0, t0, t1 + neg a0, a0 + ori a0, a0, 1 + ret + +2: + /* + * Found a null byte. + * If words don't match, fall back to simple loop. + */ + bne t0, t1, 3f + + /* Otherwise, strings are equal. */ + li a0, 0 + ret + + /* Simple loop for misaligned strings. */ + .p2align 3 +3: + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 4f + bnez t0, 3b + +4: + sub a0, t0, t1 + ret + +.option pop +#endif +SYM_FUNC_END(strcmp) diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S new file mode 100644 index 000000000000..0f9dbf93301a --- /dev/null +++ b/arch/riscv/lib/strlen.S @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm-generic/export.h> +#include <asm/alternative-macros.h> +#include <asm/errata_list.h> + +/* int strlen(const char *s) */ +SYM_FUNC_START(strlen) + + ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + + /* + * Returns + * a0 - string length + * + * Parameters + * a0 - String to measure + * + * Clobbers: + * t0, t1 + */ + mv t1, a0 +1: + lbu t0, 0(t1) + beqz t0, 2f + addi t1, t1, 1 + j 1b +2: + sub a0, t1, a0 + ret + +/* + * Variant of strlen using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strlen_zbb: + +#ifdef CONFIG_CPU_BIG_ENDIAN +# define CZ clz +# define SHIFT sll +#else +# define CZ ctz +# define SHIFT srl +#endif + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - string length + * + * Parameters + * a0 - String to measure + * + * Clobbers + * t0, t1, t2, t3 + */ + + /* Number of irrelevant bytes in the first word. */ + andi t2, a0, SZREG-1 + + /* Align pointer. */ + andi t0, a0, -SZREG + + li t3, SZREG + sub t3, t3, t2 + slli t2, t2, 3 + + /* Get the first word. */ + REG_L t1, 0(t0) + + /* + * Shift away the partial data we loaded to remove the irrelevant bytes + * preceding the string with the effect of adding NUL bytes at the + * end of the string's first word. + */ + SHIFT t1, t1, t2 + + /* Convert non-NUL into 0xff and NUL into 0x00. */ + orc.b t1, t1 + + /* Convert non-NUL into 0x00 and NUL into 0xff. */ + not t1, t1 + + /* + * Search for the first set bit (corresponding to a NUL byte in the + * original chunk). + */ + CZ t1, t1 + + /* + * The first chunk is special: compare against the number + * of valid bytes in this chunk. + */ + srli a0, t1, 3 + bgtu t3, a0, 3f + + /* Prepare for the word comparison loop. */ + addi t2, t0, SZREG + li t3, -1 + + /* + * Our critical loop is 4 instructions and processes data in + * 4 byte or 8 byte chunks. + */ + .p2align 3 +1: + REG_L t1, SZREG(t0) + addi t0, t0, SZREG + orc.b t1, t1 + beq t1, t3, 1b +2: + not t1, t1 + CZ t1, t1 + + /* Get number of processed words. */ + sub t2, t0, t2 + + /* Add number of characters in the first word. */ + add a0, a0, t2 + srli t1, t1, 3 + + /* Add number of characters in the last word. */ + add a0, a0, t1 +3: + ret + +.option pop +#endif +SYM_FUNC_END(strlen) diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S new file mode 100644 index 000000000000..7940ddab2d48 --- /dev/null +++ b/arch/riscv/lib/strncmp.S @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm-generic/export.h> +#include <asm/alternative-macros.h> +#include <asm/errata_list.h> + +/* int strncmp(const char *cs, const char *ct, size_t count) */ +SYM_FUNC_START(strncmp) + + ALTERNATIVE("nop", "j strncmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + + /* + * Returns + * a0 - comparison result, value like strncmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * a2 - number of characters to compare + * + * Clobbers + * t0, t1, t2 + */ + li t2, 0 +1: + beq a2, t2, 2f + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 3f + addi t2, t2, 1 + bnez t0, 1b +2: + li a0, 0 + ret +3: + /* + * strncmp only needs to return (< 0, 0, > 0) values + * not necessarily -1, 0, +1 + */ + sub a0, t0, t1 + ret + +/* + * Variant of strncmp using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strncmp_zbb: + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - comparison result, like strncmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * a2 - number of characters to compare + * + * Clobbers + * t0, t1, t2, t3, t4, t5, t6 + */ + + or t2, a0, a1 + li t5, -1 + and t2, t2, SZREG-1 + add t4, a0, a2 + bnez t2, 4f + + /* Adjust limit for fast-path. */ + andi t6, t4, -SZREG + + /* Main loop for aligned string. */ + .p2align 3 +1: + bgt a0, t6, 3f + REG_L t0, 0(a0) + REG_L t1, 0(a1) + orc.b t3, t0 + bne t3, t5, 2f + addi a0, a0, SZREG + addi a1, a1, SZREG + beq t0, t1, 1b + + /* + * Words don't match, and no null byte in the first + * word. Get bytes in big-endian order and compare. + */ +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t0, t0 + rev8 t1, t1 +#endif + + /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */ + sltu a0, t0, t1 + neg a0, a0 + ori a0, a0, 1 + ret + +2: + /* + * Found a null byte. + * If words don't match, fall back to simple loop. + */ + bne t0, t1, 3f + + /* Otherwise, strings are equal. */ + li a0, 0 + ret + + /* Simple loop for misaligned strings. */ +3: + /* Restore limit for slow-path. */ + .p2align 3 +4: + bge a0, t4, 6f + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 5f + bnez t0, 4b + +5: + sub a0, t0, t1 + ret + +6: + li a0, 0 + ret + +.option pop +#endif +SYM_FUNC_END(strncmp) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index dd58e1d99397..d16bf715a586 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -2,6 +2,7 @@ OBJECT_FILES_NON_STANDARD := y purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +purgatory-y += strcmp.o strlen.o strncmp.o targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE $(call if_changed_rule,as_o_S) +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE + $(call if_changed_rule,as_o_S) + $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) |