summaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/xor_64.h
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@suse.com>2012-11-02 15:20:24 +0100
committerIngo Molnar <mingo@kernel.org>2013-01-25 09:23:50 +0100
commitf317820cb6ee3fb173319bf76e0e62437be78ad2 (patch)
treefc57358da4ba9f11a8d80e508d01e99c2c62c1f9 /arch/x86/include/asm/xor_64.h
parentx86/xor: Unify SSE-base xor-block routines (diff)
downloadlinux-f317820cb6ee3fb173319bf76e0e62437be78ad2.tar.xz
linux-f317820cb6ee3fb173319bf76e0e62437be78ad2.zip
x86/xor: Add alternative SSE implementation only prefetching once per 64-byte line
On CPUs with 64-byte last level cache lines, this yields roughly 10% better performance, independent of CPU vendor or specific model (as far as I was able to test). Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/5093E4B802000078000A615E@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include/asm/xor_64.h')
-rw-r--r--arch/x86/include/asm/xor_64.h10
1 files changed, 4 insertions, 6 deletions
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 1baf89dcc423..546f1e3b87cc 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -13,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
/* Also try the AVX routines */
#include <asm/xor_avx.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
AVX_XOR_SPEED; \
+ xor_speed(&xor_block_sse_pf64); \
xor_speed(&xor_block_sse); \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(&xor_block_sse)
-
#endif /* _ASM_X86_XOR_64_H */