From 27a84793e42084392181ef2ef51a954f1cf0c519 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2013 12:10:42 +0100 Subject: ARM: smp_on_up: move inline asm ALT_SMP patching macro out of spinlock.h Patching UP/SMP alternatives inside inline assembly blocks is useful outside of the spinlock implementation, where it is used for sev and wfe. This patch lifts the macro into processor.h and gives it a scarier name to (a) avoid conflicts in the global namespace and (b) to try and deter its usage unless you "know what you're doing". The W macro for generating wide instructions when targetting Thumb-2 is also made available under the name WASM, to reduce the potential for conflicts with other headers. Acked-by: Nicolas Pitre Signed-off-by: Will Deacon --- arch/arm/include/asm/spinlock.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/arm/include/asm/spinlock.h') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4f2c28060c9a..e1ce45230913 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -11,15 +11,7 @@ * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K * extensions, so when running on UP, we have to patch these instructions away. */ -#define ALT_SMP(smp, up) \ - "9998: " smp "\n" \ - " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ - " " up "\n" \ - " .popsection\n" - #ifdef CONFIG_THUMB2_KERNEL -#define SEV ALT_SMP("sev.w", "nop.w") /* * For Thumb-2, special care is needed to ensure that the conditional WFE * instruction really does assemble to exactly 4 bytes (as required by @@ -31,17 +23,18 @@ * the assembler won't change IT instructions which are explicitly present * in the input. */ -#define WFE(cond) ALT_SMP( \ +#define WFE(cond) __ALT_SMP_ASM( \ "it " cond "\n\t" \ "wfe" cond ".n", \ \ "nop.w" \ ) #else -#define SEV ALT_SMP("sev", "nop") -#define WFE(cond) ALT_SMP("wfe" cond, "nop") +#define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop") #endif +#define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop)) + static inline void dsb_sev(void) { #if __LINUX_ARM_ARCH__ >= 7 -- cgit v1.2.3 From 9bb17be062de6f5a9c9643258951aa0935652ec3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2013 14:54:33 +0100 Subject: ARM: locks: prefetch the destination word for write prior to strex The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch prefixes our {spin,read,write}_[try]lock implementations with pldw instructions (on CPUs which support them) to try and grab the line in exclusive state from the start. arch_rwlock_t is changed to avoid using a volatile member, since this generates compiler warnings when falling back on the __builtin_prefetch intrinsic which expects a const void * argument. Acked-by: Nicolas Pitre Signed-off-by: Will Deacon --- arch/arm/include/asm/spinlock.h | 13 ++++++++++--- arch/arm/include/asm/spinlock_types.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/arm/include/asm/spinlock.h') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index e1ce45230913..499900781d59 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -5,7 +5,7 @@ #error SMP not supported on pre-ARMv6 CPUs #endif -#include +#include /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K @@ -70,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) u32 newval; arch_spinlock_t lockval; + prefetchw(&lock->slock); __asm__ __volatile__( "1: ldrex %0, [%3]\n" " add %1, %0, %4\n" @@ -93,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned long contended, res; u32 slock; + prefetchw(&lock->slock); do { __asm__ __volatile__( " ldrex %0, [%3]\n" @@ -145,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned long tmp; + prefetchw(&rw->lock); __asm__ __volatile__( "1: ldrex %0, [%1]\n" " teq %0, #0\n" @@ -163,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned long contended, res; + prefetchw(&rw->lock); do { __asm__ __volatile__( " ldrex %0, [%2]\n" @@ -196,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) } /* write_can_lock - would write_trylock() succeed? */ -#define arch_write_can_lock(x) ((x)->lock == 0) +#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) /* * Read locks are a bit more hairy: @@ -214,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp, tmp2; + prefetchw(&rw->lock); __asm__ __volatile__( "1: ldrex %0, [%2]\n" " adds %0, %0, #1\n" @@ -234,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) smp_mb(); + prefetchw(&rw->lock); __asm__ __volatile__( "1: ldrex %0, [%2]\n" " sub %0, %0, #1\n" @@ -252,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned long contended, res; + prefetchw(&rw->lock); do { __asm__ __volatile__( " ldrex %0, [%2]\n" @@ -273,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } /* read_can_lock - would read_trylock() succeed? */ -#define arch_read_can_lock(x) ((x)->lock < 0x80000000) +#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index b262d2f8b478..47663fcb10ad 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -25,7 +25,7 @@ typedef struct { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { - volatile unsigned int lock; + u32 lock; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } -- cgit v1.2.3 From 0cbad9c9dfe0c38e8ec7385b39087c005a6dee3e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 17:19:22 +0100 Subject: ARM: 7854/1: lockref: add support for lockless lockrefs using cmpxchg64 Our spinlocks are only 32-bit (2x16-bit tickets) and, on processors with 64-bit atomic instructions, cmpxchg64 makes use of the double-word exclusive accessors. This patch wires up the cmpxchg-based lockless lockref implementation for ARM. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/include/asm/spinlock.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/arm/include/asm/spinlock.h') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1ad6fb6c094d..fc184bcd7848 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU select CLONE_BACKWARDS diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4f2c28060c9a..ed6c22919e47 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -127,10 +127,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) dsb_sev(); } +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.tickets.owner == lock.tickets.next; +} + static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); - return tickets.owner != tickets.next; + return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) -- cgit v1.2.3