powerpc/lib: Adjust .balign inside string functions for PPC32

commit 87a156fb18fe1 ("Align hot loops of some string functions") degraded the performance of string functions by adding useless nops A simple benchmark on an 8xx calling 100000x a memchr() that matches the first byte runs in 41668 TB ticks before this patch and in 35986 TB ticks after this patch. So this gives an improvement of approx 10% Another benchmark doing the same with a memchr() matching the 128th byte runs in 1011365 TB ticks before this patch and 1005682 TB ticks after this patch, so regardless on the number of loops, removing those useless nops improves the test by 5683 TB ticks. Fixes: 87a156fb18fe1 ("Align hot loops of some string functions") Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
author: Christophe Leroy <christophe.leroy@c-s.fr> 2018-05-18 15:01:16 +0200
committer: Michael Ellerman <mpe@ellerman.id.au> 2018-06-03 16:39:19 +0200
commit: 1128bb7813a896bd608fb622eee3c26aaf33b473 (patch)
tree: 38866fc629ccaa69be843be50329319f7b0ed033 /arch/powerpc/lib
parent: powerpc/signal32: Use fault_in_pages_readable() to prefault user context (diff)
download: linux-1128bb7813a896bd608fb622eee3c26aaf33b473.tar.xz
linux-1128bb7813a896bd608fb622eee3c26aaf33b473.zip
1 files changed, 4 insertions, 3 deletions
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index a787776822d8..0378def28d41 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -12,6 +12,7 @@
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
+#include <asm/cache.h>
 
 	.text
 	
@@ -23,7 +24,7 @@ _GLOBAL(strncpy)
 	mtctr	r5
 	addi	r6,r3,-1
 	addi	r4,r4,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r0,1(r4)
 	cmpwi	0,r0,0
 	stbu	r0,1(r6)
@@ -43,7 +44,7 @@ _GLOBAL(strncmp)
 	mtctr	r5
 	addi	r5,r3,-1
 	addi	r4,r4,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r3,1(r5)
 	cmpwi	1,r3,0
 	lbzu	r0,1(r4)
@@ -77,7 +78,7 @@ _GLOBAL(memchr)
 	beq-	2f
 	mtctr	r5
 	addi	r3,r3,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r0,1(r3)
 	cmpw	0,r0,r4
 	bdnzf	2,1b
author	Christophe Leroy <christophe.leroy@c-s.fr>	2018-05-18 15:01:16 +0200
committer	Michael Ellerman <mpe@ellerman.id.au>	2018-06-03 16:39:19 +0200
commit	1128bb7813a896bd608fb622eee3c26aaf33b473 (patch)
tree	38866fc629ccaa69be843be50329319f7b0ed033 /arch/powerpc/lib
parent	powerpc/signal32: Use fault_in_pages_readable() to prefault user context (diff)
download	linux-1128bb7813a896bd608fb622eee3c26aaf33b473.tar.xz linux-1128bb7813a896bd608fb622eee3c26aaf33b473.zip