From 648eee52ccdc623e21b920d6048e93490a4860a7 Mon Sep 17 00:00:00 2001 From: Robin Getz Date: Tue, 4 May 2010 14:59:21 +0000 Subject: Blackfin: optimize strncpy a bit Add a little strncpy optimization which can easily cut boot time by 20%. When the kernel is booting with initramfs, it builds up the filesystem from a cpio archive by calling strncpy_from_user() via fs/namei.c's do_getname() on every file in the archive (which can be lots) with a length of PATH_MAX (1024). This causes the dest of the strncpy to be padded with many NUL bytes. This optimization mostly causes these NUL bytes to be padded with a call to memset() which is already optimized for filling memory quickly, but the hardware loop helps a little bit as well. Boot time measured with 'loglevel=0' so UART speed doesn't get in the way. Signed-off-by: Robin Getz Signed-off-by: Mike Frysinger --- arch/blackfin/lib/memset.S | 1 + arch/blackfin/lib/strncpy.S | 59 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 47 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/lib/memset.S b/arch/blackfin/lib/memset.S index c30d99b10969..eab1bef3f5bf 100644 --- a/arch/blackfin/lib/memset.S +++ b/arch/blackfin/lib/memset.S @@ -20,6 +20,7 @@ * R1 = filler byte * R2 = count * Favours word aligned data. + * The strncpy assumes that I0 and I1 are not used in this function */ ENTRY(_memset) diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S index 39fbbe6523e5..f3931d50b4a7 100644 --- a/arch/blackfin/lib/strncpy.S +++ b/arch/blackfin/lib/strncpy.S @@ -5,12 +5,14 @@ */ #include +#include /* void *strncpy(char *dest, const char *src, size_t n); * R0 = address (dest) * R1 = address (src) * R2 = size - * Returns a pointer to the destination string dest + * Returns a pointer (R0) to the destination string dest + * we do this by not changing R0 */ #ifdef CONFIG_STRNCPY_L1 @@ -24,29 +26,60 @@ ENTRY(_strncpy) CC = R2 == 0; if CC JUMP 4f; + + P2 = R2 ; /* size */ P0 = R0 ; /* dst*/ P1 = R1 ; /* src*/ + LSETUP (1f, 2f) LC0 = P2; 1: R1 = B [P1++] (Z); B [P0++] = R1; - CC = R1; - if ! cc jump 2f; - R2 += -1; - CC = R2 == 0; - if ! cc jump 1b (bp); - jump 4f; + CC = R1 == 0; 2: - /* if src is shorter than n, we need to null pad bytes in dest */ - R1 = 0; + if CC jump 3f; + + RTS; + + /* if src is shorter than n, we need to null pad bytes in dest + * but, we can get here when the last byte is zero, and we don't + * want to copy an extra byte at the end, so we need to check + */ 3: + R2 = LC0; + CC = R2 + if ! CC jump 6f; + + /* if the required null padded portion is small, do it here, rather than + * handling the overhead of memset (which is OK when things are big). + */ + R3 = 0x20; + CC = R2 < R3; + IF CC jump 4f; + R2 += -1; - CC = R2 == 0; - if cc jump 4f; - B [P0++] = R1; - jump 3b; + + /* Set things up for memset + * R0 = address + * R1 = filler byte (this case it's zero, set above) + * R2 = count (set above) + */ + + I1 = R0; + R0 = RETS; + I0 = R0; + R0 = P0; + pseudo_long_call _memset, p0; + R0 = I0; + RETS = R0; + R0 = I1; + RTS; 4: + LSETUP(5f, 5f) LC0; +5: + B [P0++] = R1; +6: RTS; ENDPROC(_strncpy) -- cgit v1.2.3