diff options
author | Waiman Long <Waiman.Long@hp.com> | 2015-04-24 20:56:37 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-05-08 12:37:05 +0200 |
commit | a23db284fe0d1879ca2002bf31077b5efa2fe2ca (patch) | |
tree | 816042635ecb0462f1cb3f35bc8091f1cd46ce19 /kernel/locking/qspinlock.c | |
parent | locking/qspinlock: Revert to test-and-set on hypervisors (diff) | |
download | linux-a23db284fe0d1879ca2002bf31077b5efa2fe2ca.tar.xz linux-a23db284fe0d1879ca2002bf31077b5efa2fe2ca.zip |
locking/pvqspinlock: Implement simple paravirt support for the qspinlock
Provide a separate (second) version of the spin_lock_slowpath for
paravirt along with a special unlock path.
The second slowpath is generated by adding a few pv hooks to the
normal slowpath, but where those will compile away for the native
case, they expand into special wait/wake code for the pv version.
The actual MCS queue can use extra storage in the mcs_nodes[] array to
keep track of state and therefore uses directed wakeups.
The head contender has no such storage directly visible to the
unlocker. So the unlocker searches a hash table with open addressing
using a simple binary Galois linear feedback shift register.
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Daniel J Blueman <daniel@numascale.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paolo Bonzini <paolo.bonzini@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1429901803-29771-9-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r-- | kernel/locking/qspinlock.c | 68 |
1 files changed, 67 insertions, 1 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index fd31a474145d..38c49202d532 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -18,6 +18,9 @@ * Authors: Waiman Long <waiman.long@hp.com> * Peter Zijlstra <peterz@infradead.org> */ + +#ifndef _GEN_PV_LOCK_SLOWPATH + #include <linux/smp.h> #include <linux/bug.h> #include <linux/cpumask.h> @@ -65,13 +68,21 @@ #include "mcs_spinlock.h" +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define MAX_NODES 8 +#else +#define MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -220,6 +231,32 @@ static __always_inline void set_locked(struct qspinlock *lock) WRITE_ONCE(l->locked, _Q_LOCKED_VAL); } + +/* + * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for + * all the PV callbacks. + */ + +static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } +static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } +static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { } + +static __always_inline void __pv_wait_head(struct qspinlock *lock, + struct mcs_spinlock *node) { } + +#define pv_enabled() false + +#define pv_init_node __pv_init_node +#define pv_wait_node __pv_wait_node +#define pv_kick_node __pv_kick_node +#define pv_wait_head __pv_wait_head + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath +#endif + +#endif /* _GEN_PV_LOCK_SLOWPATH */ + /** * queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure @@ -249,6 +286,9 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + if (pv_enabled()) + goto queue; + if (virt_queued_spin_lock(lock)) return; @@ -325,6 +365,7 @@ queue: node += idx; node->locked = 0; node->next = NULL; + pv_init_node(node); /* * We touched a (possibly) cold cacheline in the per-cpu queue node; @@ -350,6 +391,7 @@ queue: prev = decode_tail(old); WRITE_ONCE(prev->next, node); + pv_wait_node(node); arch_mcs_spin_lock_contended(&node->locked); } @@ -365,6 +407,7 @@ queue: * does not imply a full barrier. * */ + pv_wait_head(lock, node); while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK) cpu_relax(); @@ -397,6 +440,7 @@ queue: cpu_relax(); arch_mcs_spin_unlock_contended(&next->locked); + pv_kick_node(next); release: /* @@ -405,3 +449,25 @@ release: this_cpu_dec(mcs_nodes[0].count); } EXPORT_SYMBOL(queued_spin_lock_slowpath); + +/* + * Generate the paravirt code for queued_spin_unlock_slowpath(). + */ +#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) +#define _GEN_PV_LOCK_SLOWPATH + +#undef pv_enabled +#define pv_enabled() true + +#undef pv_init_node +#undef pv_wait_node +#undef pv_kick_node +#undef pv_wait_head + +#undef queued_spin_lock_slowpath +#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath + +#include "qspinlock_paravirt.h" +#include "qspinlock.c" + +#endif |