summaryrefslogtreecommitdiffstats
path: root/arch/arm/include/asm/futex.h
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-02-21 17:01:48 +0100
committerRussell King <rmk+kernel@arm.linux.org.uk>2014-02-25 12:30:20 +0100
commitc32ffce0f66e5d1d4856254516e24f5ef275cd00 (patch)
tree125229cdd38bfd6e7e62cff7eb8771a34cc999a7 /arch/arm/include/asm/futex.h
parentARM: 7979/1: mm: Remove hugetlb warning from Coherent DMA allocator (diff)
downloadlinux-c32ffce0f66e5d1d4856254516e24f5ef275cd00.tar.xz
linux-c32ffce0f66e5d1d4856254516e24f5ef275cd00.zip
ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw, it turns out that issuing the pldw *after* the dmb instruction can give modest performance gains (~3% atomic_add_return improvement on a dual A15). This patch adds prefetchw invocations to our barriered atomic operations including cmpxchg, test_and_xxx and futexes. Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/include/asm/futex.h')
-rw-r--r--arch/arm/include/asm/futex.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 2aff798fbef4..53e69dae796f 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -23,6 +23,7 @@
#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \
smp_mb(); \
+ prefetchw(uaddr); \
__asm__ __volatile__( \
"1: ldrex %1, [%3]\n" \
" " insn "\n" \
@@ -46,6 +47,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
return -EFAULT;
smp_mb();
+ /* Prefetching cannot fault */
+ prefetchw(uaddr);
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
"1: ldrex %1, [%4]\n"
" teq %1, %2\n"