diff options
author | Heiko Stuebner <heiko.stuebner@vrull.eu> | 2023-01-13 22:23:01 +0100 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2023-01-31 20:43:24 +0100 |
commit | b6fcdb191e36f82336f9b5e126d51c02e7323480 (patch) | |
tree | 9478e9f2e5141295105137ec3e62e1aa9e6141d2 /arch/riscv/lib/strlen.S | |
parent | RISC-V: add infrastructure to allow different str* implementations (diff) | |
download | linux-b6fcdb191e36f82336f9b5e126d51c02e7323480.tar.xz linux-b6fcdb191e36f82336f9b5e126d51c02e7323480.zip |
RISC-V: add zbb support to string functions
Add handling for ZBB extension and add support for using it as a
variant for optimized string functions.
Support for the Zbb-str-variants is limited to the GNU-assembler
for now, as LLVM has not yet acquired the functionality to
selectively change the arch option in assembler code.
This is still under review at
https://reviews.llvm.org/D123515
Co-developed-by: Christoph Muellner <christoph.muellner@vrull.eu>
Signed-off-by: Christoph Muellner <christoph.muellner@vrull.eu>
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230113212301.3534711-3-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/lib/strlen.S')
-rw-r--r-- | arch/riscv/lib/strlen.S | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 0a3b11853efd..0f9dbf93301a 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -3,9 +3,14 @@ #include <linux/linkage.h> #include <asm/asm.h> #include <asm-generic/export.h> +#include <asm/alternative-macros.h> +#include <asm/errata_list.h> /* int strlen(const char *s) */ SYM_FUNC_START(strlen) + + ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) + /* * Returns * a0 - string length @@ -25,4 +30,104 @@ SYM_FUNC_START(strlen) 2: sub a0, t1, a0 ret + +/* + * Variant of strlen using the ZBB extension if available + */ +#ifdef CONFIG_RISCV_ISA_ZBB +strlen_zbb: + +#ifdef CONFIG_CPU_BIG_ENDIAN +# define CZ clz +# define SHIFT sll +#else +# define CZ ctz +# define SHIFT srl +#endif + +.option push +.option arch,+zbb + + /* + * Returns + * a0 - string length + * + * Parameters + * a0 - String to measure + * + * Clobbers + * t0, t1, t2, t3 + */ + + /* Number of irrelevant bytes in the first word. */ + andi t2, a0, SZREG-1 + + /* Align pointer. */ + andi t0, a0, -SZREG + + li t3, SZREG + sub t3, t3, t2 + slli t2, t2, 3 + + /* Get the first word. */ + REG_L t1, 0(t0) + + /* + * Shift away the partial data we loaded to remove the irrelevant bytes + * preceding the string with the effect of adding NUL bytes at the + * end of the string's first word. + */ + SHIFT t1, t1, t2 + + /* Convert non-NUL into 0xff and NUL into 0x00. */ + orc.b t1, t1 + + /* Convert non-NUL into 0x00 and NUL into 0xff. */ + not t1, t1 + + /* + * Search for the first set bit (corresponding to a NUL byte in the + * original chunk). + */ + CZ t1, t1 + + /* + * The first chunk is special: compare against the number + * of valid bytes in this chunk. + */ + srli a0, t1, 3 + bgtu t3, a0, 3f + + /* Prepare for the word comparison loop. */ + addi t2, t0, SZREG + li t3, -1 + + /* + * Our critical loop is 4 instructions and processes data in + * 4 byte or 8 byte chunks. + */ + .p2align 3 +1: + REG_L t1, SZREG(t0) + addi t0, t0, SZREG + orc.b t1, t1 + beq t1, t3, 1b +2: + not t1, t1 + CZ t1, t1 + + /* Get number of processed words. */ + sub t2, t0, t2 + + /* Add number of characters in the first word. */ + add a0, a0, t2 + srli t1, t1, 3 + + /* Add number of characters in the last word. */ + add a0, a0, t1 +3: + ret + +.option pop +#endif SYM_FUNC_END(strlen) |