summaryrefslogtreecommitdiffstats
path: root/kernel/locking/qspinlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r--kernel/locking/qspinlock.c66
1 files changed, 50 insertions, 16 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index e17efe7b8d4d..033872113ebb 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -105,24 +105,37 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
* By using the whole 2nd least significant byte for the pending bit, we
* can allow better optimization of the lock acquisition for the pending
* bit holder.
+ *
+ * This internal structure is also used by the set_locked function which
+ * is not restricted to _Q_PENDING_BITS == 8.
*/
-#if _Q_PENDING_BITS == 8
-
struct __qspinlock {
union {
atomic_t val;
- struct {
#ifdef __LITTLE_ENDIAN
+ struct {
+ u8 locked;
+ u8 pending;
+ };
+ struct {
u16 locked_pending;
u16 tail;
+ };
#else
+ struct {
u16 tail;
u16 locked_pending;
-#endif
};
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
};
};
+#if _Q_PENDING_BITS == 8
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
@@ -195,6 +208,19 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
#endif /* _Q_PENDING_BITS == 8 */
/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+}
+
+/**
* queued_spin_lock_slowpath - acquire the queued spinlock
* @lock: Pointer to queued spinlock structure
* @val: Current value of the queued spinlock 32-bit word
@@ -329,8 +355,14 @@ queue:
* go away.
*
* *,x,y -> *,0,0
+ *
+ * this wait loop must use a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because the set_locked() function below
+ * does not imply a full barrier.
+ *
*/
- while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
+ while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
cpu_relax();
/*
@@ -338,15 +370,19 @@ queue:
*
* n,0,0 -> 0,0,1 : lock, uncontended
* *,0,0 -> *,0,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail),
+ * clear the tail code and grab the lock. Otherwise, we only need
+ * to grab the lock.
*/
for (;;) {
- new = _Q_LOCKED_VAL;
- if (val != tail)
- new |= val;
-
- old = atomic_cmpxchg(&lock->val, val, new);
- if (old == val)
+ if (val != tail) {
+ set_locked(lock);
break;
+ }
+ old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+ if (old == val)
+ goto release; /* No contention */
val = old;
}
@@ -354,12 +390,10 @@ queue:
/*
* contended path; wait for next, release.
*/
- if (new != _Q_LOCKED_VAL) {
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
- arch_mcs_spin_unlock_contended(&next->locked);
- }
+ arch_mcs_spin_unlock_contended(&next->locked);
release:
/*