diff options
author | Nick Piggin <npiggin@suse.de> | 2008-10-21 10:59:15 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-23 18:18:20 +0200 |
commit | a8ddac7e53e89cb877965097d05adfeb1c91def3 (patch) | |
tree | db4ee686e50f7fb57b0cef20e0a8e7f06151e317 | |
parent | Merge git://git.kernel.org/pub/scm/linux/kernel/git/czankel/xtensa-2.6 (diff) | |
download | linux-a8ddac7e53e89cb877965097d05adfeb1c91def3.tar.xz linux-a8ddac7e53e89cb877965097d05adfeb1c91def3.zip |
mutex: speed up generic mutex implementations
- atomic operations which both modify the variable and return something imply
full smp memory barriers before and after the memory operations involved
(failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because
they don't modify the target). See Documentation/atomic_ops.txt.
So remove extra barriers and branches.
- All architectures support atomic_cmpxchg. This has no relation to
__HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally
This reduces a simple single threaded fastpath lock+unlock test from 590 cycles
to 203 cycles on a ppc970 system.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/asm-generic/mutex-dec.h | 26 | ||||
-rw-r--r-- | include/asm-generic/mutex-xchg.h | 9 |
2 files changed, 3 insertions, 32 deletions
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h index ed108be6743f..f104af7cf437 100644 --- a/include/asm-generic/mutex-dec.h +++ b/include/asm-generic/mutex-dec.h @@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) { if (unlikely(atomic_dec_return(count) < 0)) fail_fn(count); - else - smp_mb(); } /** @@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) { if (unlikely(atomic_dec_return(count) < 0)) return fail_fn(count); - else { - smp_mb(); - return 0; - } + return 0; } /** @@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) static inline void __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - smp_mb(); if (unlikely(atomic_inc_return(count) <= 0)) fail_fn(count); } @@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - /* - * We have two variants here. The cmpxchg based one is the best one - * because it never induce a false contention state. It is included - * here because architectures using the inc/dec algorithms over the - * xchg ones are much more likely to support cmpxchg natively. - * - * If not we fall back to the spinlock based variant - that is - * just as efficient (and simpler) as a 'destructive' probing of - * the mutex state would be. - */ -#ifdef __HAVE_ARCH_CMPXCHG - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) { - smp_mb(); + if (likely(atomic_cmpxchg(count, 1, 0) == 1)) return 1; - } return 0; -#else - return fail_fn(count); -#endif } #endif diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h index 7b9cd2cbfebe..580a6d35c700 100644 --- a/include/asm-generic/mutex-xchg.h +++ b/include/asm-generic/mutex-xchg.h @@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) { if (unlikely(atomic_xchg(count, 0) != 1)) fail_fn(count); - else - smp_mb(); } /** @@ -46,10 +44,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) { if (unlikely(atomic_xchg(count, 0) != 1)) return fail_fn(count); - else { - smp_mb(); - return 0; - } + return 0; } /** @@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) static inline void __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - smp_mb(); if (unlikely(atomic_xchg(count, 1) != 0)) fail_fn(count); } @@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) if (prev < 0) prev = 0; } - smp_mb(); return prev; } |