x86: Fix write lock scalability 64-bit issue

With the write lock path simply subtracting RW_LOCK_BIAS there is, on large systems, the theoretical possibility of overflowing the 32-bit value that was used so far (namely if 128 or more CPUs manage to do the subtraction, but don't get to do the inverse addition in the failure path quickly enough). A first measure is to modify RW_LOCK_BIAS itself - with the new value chosen, it is good for up to 2048 CPUs each allowed to nest over 2048 times on the read path without causing an issue. Quite possibly it would even be sufficient to adjust the bias a little further, assuming that allowing for significantly less nesting would suffice. However, as the original value chosen allowed for even more nesting levels, to support more than 2048 CPUs (possible currently only for 64-bit kernels) the lock itself gets widened to 64 bits. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4E258E0D020000780004E3F0@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Jan Beulich <JBeulich@novell.com> 2011-07-19 14:00:45 +0200
committer: Ingo Molnar <mingo@elte.hu> 2011-07-21 09:03:36 +0200
commit: a750036f35cda160ef77408ec92c3dc41f8feebb (patch)
tree: 1198013e1289dfb9b5a299388ee09515642c4030 /arch/x86/include/asm/spinlock.h
parent: x86: Unify rwsem assembly implementation (diff)
download: linux-a750036f35cda160ef77408ec92c3dc41f8feebb.tar.xz
linux-a750036f35cda160ef77408ec92c3dc41f8feebb.zip
1 files changed, 22 insertions, 15 deletions
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 3089f70c0c52..e9e51f710e6c 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_SPINLOCK_H
 
 #include <asm/atomic.h>
-#include <asm/rwlock.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <linux/compiler.h>
@@ -234,7 +233,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
  */
 static inline int arch_read_can_lock(arch_rwlock_t *lock)
 {
-	return (int)(lock)->lock > 0;
+	return lock->lock > 0;
 }
 
 /**
@@ -243,12 +242,12 @@ static inline int arch_read_can_lock(arch_rwlock_t *lock)
  */
 static inline int arch_write_can_lock(arch_rwlock_t *lock)
 {
-	return (lock)->lock == RW_LOCK_BIAS;
+	return lock->write == WRITE_LOCK_CMP;
 }
 
 static inline void arch_read_lock(arch_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
+	asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t"
 		     "jns 1f\n"
 		     "call __read_lock_failed\n\t"
 		     "1:\n"
@@ -257,47 +256,55 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 
 static inline void arch_write_lock(arch_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
+	asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t"
 		     "jz 1f\n"
 		     "call __write_lock_failed\n\t"
 		     "1:\n"
-		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
+		     ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS)
+		     : "memory");
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
-	atomic_t *count = (atomic_t *)lock;
+	READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock;
 
-	if (atomic_dec_return(count) >= 0)
+	if (READ_LOCK_ATOMIC(dec_return)(count) >= 0)
 		return 1;
-	atomic_inc(count);
+	READ_LOCK_ATOMIC(inc)(count);
 	return 0;
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
-	atomic_t *count = (atomic_t *)lock;
+	atomic_t *count = (atomic_t *)&lock->write;
 
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+	if (atomic_sub_and_test(WRITE_LOCK_CMP, count))
 		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
+	atomic_add(WRITE_LOCK_CMP, count);
 	return 0;
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0"
+		     :"+m" (rw->lock) : : "memory");
 }
 
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
-	asm volatile(LOCK_PREFIX "addl %1, %0"
-		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
+	asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
+		     : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
 }
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
+#undef READ_LOCK_SIZE
+#undef READ_LOCK_ATOMIC
+#undef WRITE_LOCK_ADD
+#undef WRITE_LOCK_SUB
+#undef WRITE_LOCK_CMP
+
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
author	Jan Beulich <JBeulich@novell.com>	2011-07-19 14:00:45 +0200
committer	Ingo Molnar <mingo@elte.hu>	2011-07-21 09:03:36 +0200
commit	a750036f35cda160ef77408ec92c3dc41f8feebb (patch)
tree	1198013e1289dfb9b5a299388ee09515642c4030 /arch/x86/include/asm/spinlock.h
parent	x86: Unify rwsem assembly implementation (diff)
download	linux-a750036f35cda160ef77408ec92c3dc41f8feebb.tar.xz linux-a750036f35cda160ef77408ec92c3dc41f8feebb.zip