xref: /linux/kernel/bpf/rqspinlock.c (revision 30ff133277eba8b7f30013c9f27b1c8257418e6a)
1a8fcf2a3SKumar Kartikeya Dwivedi // SPDX-License-Identifier: GPL-2.0-or-later
2a8fcf2a3SKumar Kartikeya Dwivedi /*
3a8fcf2a3SKumar Kartikeya Dwivedi  * Resilient Queued Spin Lock
4a8fcf2a3SKumar Kartikeya Dwivedi  *
5a8fcf2a3SKumar Kartikeya Dwivedi  * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
6a8fcf2a3SKumar Kartikeya Dwivedi  * (C) Copyright 2013-2014,2018 Red Hat, Inc.
7a8fcf2a3SKumar Kartikeya Dwivedi  * (C) Copyright 2015 Intel Corp.
8a8fcf2a3SKumar Kartikeya Dwivedi  * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
9a8fcf2a3SKumar Kartikeya Dwivedi  *
10a8fcf2a3SKumar Kartikeya Dwivedi  * Authors: Waiman Long <longman@redhat.com>
11a8fcf2a3SKumar Kartikeya Dwivedi  *          Peter Zijlstra <peterz@infradead.org>
12a8fcf2a3SKumar Kartikeya Dwivedi  */
13a8fcf2a3SKumar Kartikeya Dwivedi 
14a8fcf2a3SKumar Kartikeya Dwivedi #ifndef _GEN_PV_LOCK_SLOWPATH
15a8fcf2a3SKumar Kartikeya Dwivedi 
16a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/smp.h>
17a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/bug.h>
18a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/cpumask.h>
19a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/percpu.h>
20a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/hardirq.h>
21a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/mutex.h>
22a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/prefetch.h>
23a8fcf2a3SKumar Kartikeya Dwivedi #include <asm/byteorder.h>
24a8fcf2a3SKumar Kartikeya Dwivedi #include <asm/qspinlock.h>
25a8fcf2a3SKumar Kartikeya Dwivedi #include <trace/events/lock.h>
26*30ff1332SKumar Kartikeya Dwivedi #include <asm/rqspinlock.h>
27a8fcf2a3SKumar Kartikeya Dwivedi 
28a8fcf2a3SKumar Kartikeya Dwivedi /*
29a8fcf2a3SKumar Kartikeya Dwivedi  * Include queued spinlock definitions and statistics code
30a8fcf2a3SKumar Kartikeya Dwivedi  */
31a8fcf2a3SKumar Kartikeya Dwivedi #include "../locking/qspinlock.h"
32a8fcf2a3SKumar Kartikeya Dwivedi #include "../locking/qspinlock_stat.h"
33a8fcf2a3SKumar Kartikeya Dwivedi 
34a8fcf2a3SKumar Kartikeya Dwivedi /*
35a8fcf2a3SKumar Kartikeya Dwivedi  * The basic principle of a queue-based spinlock can best be understood
36a8fcf2a3SKumar Kartikeya Dwivedi  * by studying a classic queue-based spinlock implementation called the
37a8fcf2a3SKumar Kartikeya Dwivedi  * MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalable
38a8fcf2a3SKumar Kartikeya Dwivedi  * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and
39a8fcf2a3SKumar Kartikeya Dwivedi  * Scott") is available at
40a8fcf2a3SKumar Kartikeya Dwivedi  *
41a8fcf2a3SKumar Kartikeya Dwivedi  * https://bugzilla.kernel.org/show_bug.cgi?id=206115
42a8fcf2a3SKumar Kartikeya Dwivedi  *
43a8fcf2a3SKumar Kartikeya Dwivedi  * This queued spinlock implementation is based on the MCS lock, however to
44a8fcf2a3SKumar Kartikeya Dwivedi  * make it fit the 4 bytes we assume spinlock_t to be, and preserve its
45a8fcf2a3SKumar Kartikeya Dwivedi  * existing API, we must modify it somehow.
46a8fcf2a3SKumar Kartikeya Dwivedi  *
47a8fcf2a3SKumar Kartikeya Dwivedi  * In particular; where the traditional MCS lock consists of a tail pointer
48a8fcf2a3SKumar Kartikeya Dwivedi  * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
49a8fcf2a3SKumar Kartikeya Dwivedi  * unlock the next pending (next->locked), we compress both these: {tail,
50a8fcf2a3SKumar Kartikeya Dwivedi  * next->locked} into a single u32 value.
51a8fcf2a3SKumar Kartikeya Dwivedi  *
52a8fcf2a3SKumar Kartikeya Dwivedi  * Since a spinlock disables recursion of its own context and there is a limit
53a8fcf2a3SKumar Kartikeya Dwivedi  * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
54a8fcf2a3SKumar Kartikeya Dwivedi  * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
55a8fcf2a3SKumar Kartikeya Dwivedi  * we can encode the tail by combining the 2-bit nesting level with the cpu
56a8fcf2a3SKumar Kartikeya Dwivedi  * number. With one byte for the lock value and 3 bytes for the tail, only a
57a8fcf2a3SKumar Kartikeya Dwivedi  * 32-bit word is now needed. Even though we only need 1 bit for the lock,
58a8fcf2a3SKumar Kartikeya Dwivedi  * we extend it to a full byte to achieve better performance for architectures
59a8fcf2a3SKumar Kartikeya Dwivedi  * that support atomic byte write.
60a8fcf2a3SKumar Kartikeya Dwivedi  *
61a8fcf2a3SKumar Kartikeya Dwivedi  * We also change the first spinner to spin on the lock bit instead of its
62a8fcf2a3SKumar Kartikeya Dwivedi  * node; whereby avoiding the need to carry a node from lock to unlock, and
63a8fcf2a3SKumar Kartikeya Dwivedi  * preserving existing lock API. This also makes the unlock code simpler and
64a8fcf2a3SKumar Kartikeya Dwivedi  * faster.
65a8fcf2a3SKumar Kartikeya Dwivedi  *
66a8fcf2a3SKumar Kartikeya Dwivedi  * N.B. The current implementation only supports architectures that allow
67a8fcf2a3SKumar Kartikeya Dwivedi  *      atomic operations on smaller 8-bit and 16-bit data types.
68a8fcf2a3SKumar Kartikeya Dwivedi  *
69a8fcf2a3SKumar Kartikeya Dwivedi  */
70a8fcf2a3SKumar Kartikeya Dwivedi 
71a8fcf2a3SKumar Kartikeya Dwivedi #include "../locking/mcs_spinlock.h"
72a8fcf2a3SKumar Kartikeya Dwivedi 
73a8fcf2a3SKumar Kartikeya Dwivedi /*
74a8fcf2a3SKumar Kartikeya Dwivedi  * Per-CPU queue node structures; we can never have more than 4 nested
75a8fcf2a3SKumar Kartikeya Dwivedi  * contexts: task, softirq, hardirq, nmi.
76a8fcf2a3SKumar Kartikeya Dwivedi  *
77a8fcf2a3SKumar Kartikeya Dwivedi  * Exactly fits one 64-byte cacheline on a 64-bit architecture.
78a8fcf2a3SKumar Kartikeya Dwivedi  *
79a8fcf2a3SKumar Kartikeya Dwivedi  * PV doubles the storage and uses the second cacheline for PV state.
80a8fcf2a3SKumar Kartikeya Dwivedi  */
81a8fcf2a3SKumar Kartikeya Dwivedi static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
82a8fcf2a3SKumar Kartikeya Dwivedi 
83a8fcf2a3SKumar Kartikeya Dwivedi /*
84a8fcf2a3SKumar Kartikeya Dwivedi  * Generate the native code for resilient_queued_spin_unlock_slowpath(); provide NOPs
85a8fcf2a3SKumar Kartikeya Dwivedi  * for all the PV callbacks.
86a8fcf2a3SKumar Kartikeya Dwivedi  */
87a8fcf2a3SKumar Kartikeya Dwivedi 
88a8fcf2a3SKumar Kartikeya Dwivedi static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
89a8fcf2a3SKumar Kartikeya Dwivedi static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
90a8fcf2a3SKumar Kartikeya Dwivedi 					   struct mcs_spinlock *prev) { }
91a8fcf2a3SKumar Kartikeya Dwivedi static __always_inline void __pv_kick_node(struct qspinlock *lock,
92a8fcf2a3SKumar Kartikeya Dwivedi 					   struct mcs_spinlock *node) { }
93a8fcf2a3SKumar Kartikeya Dwivedi static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
94a8fcf2a3SKumar Kartikeya Dwivedi 						   struct mcs_spinlock *node)
95a8fcf2a3SKumar Kartikeya Dwivedi 						   { return 0; }
96a8fcf2a3SKumar Kartikeya Dwivedi 
97a8fcf2a3SKumar Kartikeya Dwivedi #define pv_enabled()		false
98a8fcf2a3SKumar Kartikeya Dwivedi 
99a8fcf2a3SKumar Kartikeya Dwivedi #define pv_init_node		__pv_init_node
100a8fcf2a3SKumar Kartikeya Dwivedi #define pv_wait_node		__pv_wait_node
101a8fcf2a3SKumar Kartikeya Dwivedi #define pv_kick_node		__pv_kick_node
102a8fcf2a3SKumar Kartikeya Dwivedi #define pv_wait_head_or_lock	__pv_wait_head_or_lock
103a8fcf2a3SKumar Kartikeya Dwivedi 
104a8fcf2a3SKumar Kartikeya Dwivedi #ifdef CONFIG_PARAVIRT_SPINLOCKS
105a8fcf2a3SKumar Kartikeya Dwivedi #define resilient_queued_spin_lock_slowpath	native_resilient_queued_spin_lock_slowpath
106a8fcf2a3SKumar Kartikeya Dwivedi #endif
107a8fcf2a3SKumar Kartikeya Dwivedi 
108a8fcf2a3SKumar Kartikeya Dwivedi #endif /* _GEN_PV_LOCK_SLOWPATH */
109a8fcf2a3SKumar Kartikeya Dwivedi 
110a8fcf2a3SKumar Kartikeya Dwivedi /**
111a8fcf2a3SKumar Kartikeya Dwivedi  * resilient_queued_spin_lock_slowpath - acquire the queued spinlock
112a8fcf2a3SKumar Kartikeya Dwivedi  * @lock: Pointer to queued spinlock structure
113a8fcf2a3SKumar Kartikeya Dwivedi  * @val: Current value of the queued spinlock 32-bit word
114a8fcf2a3SKumar Kartikeya Dwivedi  *
115a8fcf2a3SKumar Kartikeya Dwivedi  * (queue tail, pending bit, lock value)
116a8fcf2a3SKumar Kartikeya Dwivedi  *
117a8fcf2a3SKumar Kartikeya Dwivedi  *              fast     :    slow                                  :    unlock
118a8fcf2a3SKumar Kartikeya Dwivedi  *                       :                                          :
119a8fcf2a3SKumar Kartikeya Dwivedi  * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
120a8fcf2a3SKumar Kartikeya Dwivedi  *                       :       | ^--------.------.             /  :
121a8fcf2a3SKumar Kartikeya Dwivedi  *                       :       v           \      \            |  :
122a8fcf2a3SKumar Kartikeya Dwivedi  * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
123a8fcf2a3SKumar Kartikeya Dwivedi  *                       :       | ^--'              |           |  :
124a8fcf2a3SKumar Kartikeya Dwivedi  *                       :       v                   |           |  :
125a8fcf2a3SKumar Kartikeya Dwivedi  * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
126a8fcf2a3SKumar Kartikeya Dwivedi  *   queue               :       | ^--'                          |  :
127a8fcf2a3SKumar Kartikeya Dwivedi  *                       :       v                               |  :
128a8fcf2a3SKumar Kartikeya Dwivedi  * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
129a8fcf2a3SKumar Kartikeya Dwivedi  *   queue               :         ^--'                             :
130a8fcf2a3SKumar Kartikeya Dwivedi  */
131*30ff1332SKumar Kartikeya Dwivedi void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
132a8fcf2a3SKumar Kartikeya Dwivedi {
133a8fcf2a3SKumar Kartikeya Dwivedi 	struct mcs_spinlock *prev, *next, *node;
134a8fcf2a3SKumar Kartikeya Dwivedi 	u32 old, tail;
135a8fcf2a3SKumar Kartikeya Dwivedi 	int idx;
136a8fcf2a3SKumar Kartikeya Dwivedi 
137a8fcf2a3SKumar Kartikeya Dwivedi 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
138a8fcf2a3SKumar Kartikeya Dwivedi 
139a8fcf2a3SKumar Kartikeya Dwivedi 	if (pv_enabled())
140a8fcf2a3SKumar Kartikeya Dwivedi 		goto pv_queue;
141a8fcf2a3SKumar Kartikeya Dwivedi 
142a8fcf2a3SKumar Kartikeya Dwivedi 	if (virt_spin_lock(lock))
143a8fcf2a3SKumar Kartikeya Dwivedi 		return;
144a8fcf2a3SKumar Kartikeya Dwivedi 
145a8fcf2a3SKumar Kartikeya Dwivedi 	/*
146a8fcf2a3SKumar Kartikeya Dwivedi 	 * Wait for in-progress pending->locked hand-overs with a bounded
147a8fcf2a3SKumar Kartikeya Dwivedi 	 * number of spins so that we guarantee forward progress.
148a8fcf2a3SKumar Kartikeya Dwivedi 	 *
149a8fcf2a3SKumar Kartikeya Dwivedi 	 * 0,1,0 -> 0,0,1
150a8fcf2a3SKumar Kartikeya Dwivedi 	 */
151a8fcf2a3SKumar Kartikeya Dwivedi 	if (val == _Q_PENDING_VAL) {
152a8fcf2a3SKumar Kartikeya Dwivedi 		int cnt = _Q_PENDING_LOOPS;
153a8fcf2a3SKumar Kartikeya Dwivedi 		val = atomic_cond_read_relaxed(&lock->val,
154a8fcf2a3SKumar Kartikeya Dwivedi 					       (VAL != _Q_PENDING_VAL) || !cnt--);
155a8fcf2a3SKumar Kartikeya Dwivedi 	}
156a8fcf2a3SKumar Kartikeya Dwivedi 
157a8fcf2a3SKumar Kartikeya Dwivedi 	/*
158a8fcf2a3SKumar Kartikeya Dwivedi 	 * If we observe any contention; queue.
159a8fcf2a3SKumar Kartikeya Dwivedi 	 */
160a8fcf2a3SKumar Kartikeya Dwivedi 	if (val & ~_Q_LOCKED_MASK)
161a8fcf2a3SKumar Kartikeya Dwivedi 		goto queue;
162a8fcf2a3SKumar Kartikeya Dwivedi 
163a8fcf2a3SKumar Kartikeya Dwivedi 	/*
164a8fcf2a3SKumar Kartikeya Dwivedi 	 * trylock || pending
165a8fcf2a3SKumar Kartikeya Dwivedi 	 *
166a8fcf2a3SKumar Kartikeya Dwivedi 	 * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
167a8fcf2a3SKumar Kartikeya Dwivedi 	 */
168a8fcf2a3SKumar Kartikeya Dwivedi 	val = queued_fetch_set_pending_acquire(lock);
169a8fcf2a3SKumar Kartikeya Dwivedi 
170a8fcf2a3SKumar Kartikeya Dwivedi 	/*
171a8fcf2a3SKumar Kartikeya Dwivedi 	 * If we observe contention, there is a concurrent locker.
172a8fcf2a3SKumar Kartikeya Dwivedi 	 *
173a8fcf2a3SKumar Kartikeya Dwivedi 	 * Undo and queue; our setting of PENDING might have made the
174a8fcf2a3SKumar Kartikeya Dwivedi 	 * n,0,0 -> 0,0,0 transition fail and it will now be waiting
175a8fcf2a3SKumar Kartikeya Dwivedi 	 * on @next to become !NULL.
176a8fcf2a3SKumar Kartikeya Dwivedi 	 */
177a8fcf2a3SKumar Kartikeya Dwivedi 	if (unlikely(val & ~_Q_LOCKED_MASK)) {
178a8fcf2a3SKumar Kartikeya Dwivedi 
179a8fcf2a3SKumar Kartikeya Dwivedi 		/* Undo PENDING if we set it. */
180a8fcf2a3SKumar Kartikeya Dwivedi 		if (!(val & _Q_PENDING_MASK))
181a8fcf2a3SKumar Kartikeya Dwivedi 			clear_pending(lock);
182a8fcf2a3SKumar Kartikeya Dwivedi 
183a8fcf2a3SKumar Kartikeya Dwivedi 		goto queue;
184a8fcf2a3SKumar Kartikeya Dwivedi 	}
185a8fcf2a3SKumar Kartikeya Dwivedi 
186a8fcf2a3SKumar Kartikeya Dwivedi 	/*
187a8fcf2a3SKumar Kartikeya Dwivedi 	 * We're pending, wait for the owner to go away.
188a8fcf2a3SKumar Kartikeya Dwivedi 	 *
189a8fcf2a3SKumar Kartikeya Dwivedi 	 * 0,1,1 -> *,1,0
190a8fcf2a3SKumar Kartikeya Dwivedi 	 *
191a8fcf2a3SKumar Kartikeya Dwivedi 	 * this wait loop must be a load-acquire such that we match the
192a8fcf2a3SKumar Kartikeya Dwivedi 	 * store-release that clears the locked bit and create lock
193a8fcf2a3SKumar Kartikeya Dwivedi 	 * sequentiality; this is because not all
194a8fcf2a3SKumar Kartikeya Dwivedi 	 * clear_pending_set_locked() implementations imply full
195a8fcf2a3SKumar Kartikeya Dwivedi 	 * barriers.
196a8fcf2a3SKumar Kartikeya Dwivedi 	 */
197a8fcf2a3SKumar Kartikeya Dwivedi 	if (val & _Q_LOCKED_MASK)
198a8fcf2a3SKumar Kartikeya Dwivedi 		smp_cond_load_acquire(&lock->locked, !VAL);
199a8fcf2a3SKumar Kartikeya Dwivedi 
200a8fcf2a3SKumar Kartikeya Dwivedi 	/*
201a8fcf2a3SKumar Kartikeya Dwivedi 	 * take ownership and clear the pending bit.
202a8fcf2a3SKumar Kartikeya Dwivedi 	 *
203a8fcf2a3SKumar Kartikeya Dwivedi 	 * 0,1,0 -> 0,0,1
204a8fcf2a3SKumar Kartikeya Dwivedi 	 */
205a8fcf2a3SKumar Kartikeya Dwivedi 	clear_pending_set_locked(lock);
206a8fcf2a3SKumar Kartikeya Dwivedi 	lockevent_inc(lock_pending);
207a8fcf2a3SKumar Kartikeya Dwivedi 	return;
208a8fcf2a3SKumar Kartikeya Dwivedi 
209a8fcf2a3SKumar Kartikeya Dwivedi 	/*
210a8fcf2a3SKumar Kartikeya Dwivedi 	 * End of pending bit optimistic spinning and beginning of MCS
211a8fcf2a3SKumar Kartikeya Dwivedi 	 * queuing.
212a8fcf2a3SKumar Kartikeya Dwivedi 	 */
213a8fcf2a3SKumar Kartikeya Dwivedi queue:
214a8fcf2a3SKumar Kartikeya Dwivedi 	lockevent_inc(lock_slowpath);
215a8fcf2a3SKumar Kartikeya Dwivedi pv_queue:
216a8fcf2a3SKumar Kartikeya Dwivedi 	node = this_cpu_ptr(&rqnodes[0].mcs);
217a8fcf2a3SKumar Kartikeya Dwivedi 	idx = node->count++;
218a8fcf2a3SKumar Kartikeya Dwivedi 	tail = encode_tail(smp_processor_id(), idx);
219a8fcf2a3SKumar Kartikeya Dwivedi 
220a8fcf2a3SKumar Kartikeya Dwivedi 	trace_contention_begin(lock, LCB_F_SPIN);
221a8fcf2a3SKumar Kartikeya Dwivedi 
222a8fcf2a3SKumar Kartikeya Dwivedi 	/*
223a8fcf2a3SKumar Kartikeya Dwivedi 	 * 4 nodes are allocated based on the assumption that there will
224a8fcf2a3SKumar Kartikeya Dwivedi 	 * not be nested NMIs taking spinlocks. That may not be true in
225a8fcf2a3SKumar Kartikeya Dwivedi 	 * some architectures even though the chance of needing more than
226a8fcf2a3SKumar Kartikeya Dwivedi 	 * 4 nodes will still be extremely unlikely. When that happens,
227a8fcf2a3SKumar Kartikeya Dwivedi 	 * we fall back to spinning on the lock directly without using
228a8fcf2a3SKumar Kartikeya Dwivedi 	 * any MCS node. This is not the most elegant solution, but is
229a8fcf2a3SKumar Kartikeya Dwivedi 	 * simple enough.
230a8fcf2a3SKumar Kartikeya Dwivedi 	 */
231a8fcf2a3SKumar Kartikeya Dwivedi 	if (unlikely(idx >= _Q_MAX_NODES)) {
232a8fcf2a3SKumar Kartikeya Dwivedi 		lockevent_inc(lock_no_node);
233a8fcf2a3SKumar Kartikeya Dwivedi 		while (!queued_spin_trylock(lock))
234a8fcf2a3SKumar Kartikeya Dwivedi 			cpu_relax();
235a8fcf2a3SKumar Kartikeya Dwivedi 		goto release;
236a8fcf2a3SKumar Kartikeya Dwivedi 	}
237a8fcf2a3SKumar Kartikeya Dwivedi 
238a8fcf2a3SKumar Kartikeya Dwivedi 	node = grab_mcs_node(node, idx);
239a8fcf2a3SKumar Kartikeya Dwivedi 
240a8fcf2a3SKumar Kartikeya Dwivedi 	/*
241a8fcf2a3SKumar Kartikeya Dwivedi 	 * Keep counts of non-zero index values:
242a8fcf2a3SKumar Kartikeya Dwivedi 	 */
243a8fcf2a3SKumar Kartikeya Dwivedi 	lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
244a8fcf2a3SKumar Kartikeya Dwivedi 
245a8fcf2a3SKumar Kartikeya Dwivedi 	/*
246a8fcf2a3SKumar Kartikeya Dwivedi 	 * Ensure that we increment the head node->count before initialising
247a8fcf2a3SKumar Kartikeya Dwivedi 	 * the actual node. If the compiler is kind enough to reorder these
248a8fcf2a3SKumar Kartikeya Dwivedi 	 * stores, then an IRQ could overwrite our assignments.
249a8fcf2a3SKumar Kartikeya Dwivedi 	 */
250a8fcf2a3SKumar Kartikeya Dwivedi 	barrier();
251a8fcf2a3SKumar Kartikeya Dwivedi 
252a8fcf2a3SKumar Kartikeya Dwivedi 	node->locked = 0;
253a8fcf2a3SKumar Kartikeya Dwivedi 	node->next = NULL;
254a8fcf2a3SKumar Kartikeya Dwivedi 	pv_init_node(node);
255a8fcf2a3SKumar Kartikeya Dwivedi 
256a8fcf2a3SKumar Kartikeya Dwivedi 	/*
257a8fcf2a3SKumar Kartikeya Dwivedi 	 * We touched a (possibly) cold cacheline in the per-cpu queue node;
258a8fcf2a3SKumar Kartikeya Dwivedi 	 * attempt the trylock once more in the hope someone let go while we
259a8fcf2a3SKumar Kartikeya Dwivedi 	 * weren't watching.
260a8fcf2a3SKumar Kartikeya Dwivedi 	 */
261a8fcf2a3SKumar Kartikeya Dwivedi 	if (queued_spin_trylock(lock))
262a8fcf2a3SKumar Kartikeya Dwivedi 		goto release;
263a8fcf2a3SKumar Kartikeya Dwivedi 
264a8fcf2a3SKumar Kartikeya Dwivedi 	/*
265a8fcf2a3SKumar Kartikeya Dwivedi 	 * Ensure that the initialisation of @node is complete before we
266a8fcf2a3SKumar Kartikeya Dwivedi 	 * publish the updated tail via xchg_tail() and potentially link
267a8fcf2a3SKumar Kartikeya Dwivedi 	 * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
268a8fcf2a3SKumar Kartikeya Dwivedi 	 */
269a8fcf2a3SKumar Kartikeya Dwivedi 	smp_wmb();
270a8fcf2a3SKumar Kartikeya Dwivedi 
271a8fcf2a3SKumar Kartikeya Dwivedi 	/*
272a8fcf2a3SKumar Kartikeya Dwivedi 	 * Publish the updated tail.
273a8fcf2a3SKumar Kartikeya Dwivedi 	 * We have already touched the queueing cacheline; don't bother with
274a8fcf2a3SKumar Kartikeya Dwivedi 	 * pending stuff.
275a8fcf2a3SKumar Kartikeya Dwivedi 	 *
276a8fcf2a3SKumar Kartikeya Dwivedi 	 * p,*,* -> n,*,*
277a8fcf2a3SKumar Kartikeya Dwivedi 	 */
278a8fcf2a3SKumar Kartikeya Dwivedi 	old = xchg_tail(lock, tail);
279a8fcf2a3SKumar Kartikeya Dwivedi 	next = NULL;
280a8fcf2a3SKumar Kartikeya Dwivedi 
281a8fcf2a3SKumar Kartikeya Dwivedi 	/*
282a8fcf2a3SKumar Kartikeya Dwivedi 	 * if there was a previous node; link it and wait until reaching the
283a8fcf2a3SKumar Kartikeya Dwivedi 	 * head of the waitqueue.
284a8fcf2a3SKumar Kartikeya Dwivedi 	 */
285a8fcf2a3SKumar Kartikeya Dwivedi 	if (old & _Q_TAIL_MASK) {
286a8fcf2a3SKumar Kartikeya Dwivedi 		prev = decode_tail(old, rqnodes);
287a8fcf2a3SKumar Kartikeya Dwivedi 
288a8fcf2a3SKumar Kartikeya Dwivedi 		/* Link @node into the waitqueue. */
289a8fcf2a3SKumar Kartikeya Dwivedi 		WRITE_ONCE(prev->next, node);
290a8fcf2a3SKumar Kartikeya Dwivedi 
291a8fcf2a3SKumar Kartikeya Dwivedi 		pv_wait_node(node, prev);
292a8fcf2a3SKumar Kartikeya Dwivedi 		arch_mcs_spin_lock_contended(&node->locked);
293a8fcf2a3SKumar Kartikeya Dwivedi 
294a8fcf2a3SKumar Kartikeya Dwivedi 		/*
295a8fcf2a3SKumar Kartikeya Dwivedi 		 * While waiting for the MCS lock, the next pointer may have
296a8fcf2a3SKumar Kartikeya Dwivedi 		 * been set by another lock waiter. We optimistically load
297a8fcf2a3SKumar Kartikeya Dwivedi 		 * the next pointer & prefetch the cacheline for writing
298a8fcf2a3SKumar Kartikeya Dwivedi 		 * to reduce latency in the upcoming MCS unlock operation.
299a8fcf2a3SKumar Kartikeya Dwivedi 		 */
300a8fcf2a3SKumar Kartikeya Dwivedi 		next = READ_ONCE(node->next);
301a8fcf2a3SKumar Kartikeya Dwivedi 		if (next)
302a8fcf2a3SKumar Kartikeya Dwivedi 			prefetchw(next);
303a8fcf2a3SKumar Kartikeya Dwivedi 	}
304a8fcf2a3SKumar Kartikeya Dwivedi 
305a8fcf2a3SKumar Kartikeya Dwivedi 	/*
306a8fcf2a3SKumar Kartikeya Dwivedi 	 * we're at the head of the waitqueue, wait for the owner & pending to
307a8fcf2a3SKumar Kartikeya Dwivedi 	 * go away.
308a8fcf2a3SKumar Kartikeya Dwivedi 	 *
309a8fcf2a3SKumar Kartikeya Dwivedi 	 * *,x,y -> *,0,0
310a8fcf2a3SKumar Kartikeya Dwivedi 	 *
311a8fcf2a3SKumar Kartikeya Dwivedi 	 * this wait loop must use a load-acquire such that we match the
312a8fcf2a3SKumar Kartikeya Dwivedi 	 * store-release that clears the locked bit and create lock
313a8fcf2a3SKumar Kartikeya Dwivedi 	 * sequentiality; this is because the set_locked() function below
314a8fcf2a3SKumar Kartikeya Dwivedi 	 * does not imply a full barrier.
315a8fcf2a3SKumar Kartikeya Dwivedi 	 *
316a8fcf2a3SKumar Kartikeya Dwivedi 	 * The PV pv_wait_head_or_lock function, if active, will acquire
317a8fcf2a3SKumar Kartikeya Dwivedi 	 * the lock and return a non-zero value. So we have to skip the
318a8fcf2a3SKumar Kartikeya Dwivedi 	 * atomic_cond_read_acquire() call. As the next PV queue head hasn't
319a8fcf2a3SKumar Kartikeya Dwivedi 	 * been designated yet, there is no way for the locked value to become
320a8fcf2a3SKumar Kartikeya Dwivedi 	 * _Q_SLOW_VAL. So both the set_locked() and the
321a8fcf2a3SKumar Kartikeya Dwivedi 	 * atomic_cmpxchg_relaxed() calls will be safe.
322a8fcf2a3SKumar Kartikeya Dwivedi 	 *
323a8fcf2a3SKumar Kartikeya Dwivedi 	 * If PV isn't active, 0 will be returned instead.
324a8fcf2a3SKumar Kartikeya Dwivedi 	 *
325a8fcf2a3SKumar Kartikeya Dwivedi 	 */
326a8fcf2a3SKumar Kartikeya Dwivedi 	if ((val = pv_wait_head_or_lock(lock, node)))
327a8fcf2a3SKumar Kartikeya Dwivedi 		goto locked;
328a8fcf2a3SKumar Kartikeya Dwivedi 
329a8fcf2a3SKumar Kartikeya Dwivedi 	val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
330a8fcf2a3SKumar Kartikeya Dwivedi 
331a8fcf2a3SKumar Kartikeya Dwivedi locked:
332a8fcf2a3SKumar Kartikeya Dwivedi 	/*
333a8fcf2a3SKumar Kartikeya Dwivedi 	 * claim the lock:
334a8fcf2a3SKumar Kartikeya Dwivedi 	 *
335a8fcf2a3SKumar Kartikeya Dwivedi 	 * n,0,0 -> 0,0,1 : lock, uncontended
336a8fcf2a3SKumar Kartikeya Dwivedi 	 * *,*,0 -> *,*,1 : lock, contended
337a8fcf2a3SKumar Kartikeya Dwivedi 	 *
338a8fcf2a3SKumar Kartikeya Dwivedi 	 * If the queue head is the only one in the queue (lock value == tail)
339a8fcf2a3SKumar Kartikeya Dwivedi 	 * and nobody is pending, clear the tail code and grab the lock.
340a8fcf2a3SKumar Kartikeya Dwivedi 	 * Otherwise, we only need to grab the lock.
341a8fcf2a3SKumar Kartikeya Dwivedi 	 */
342a8fcf2a3SKumar Kartikeya Dwivedi 
343a8fcf2a3SKumar Kartikeya Dwivedi 	/*
344a8fcf2a3SKumar Kartikeya Dwivedi 	 * In the PV case we might already have _Q_LOCKED_VAL set, because
345a8fcf2a3SKumar Kartikeya Dwivedi 	 * of lock stealing; therefore we must also allow:
346a8fcf2a3SKumar Kartikeya Dwivedi 	 *
347a8fcf2a3SKumar Kartikeya Dwivedi 	 * n,0,1 -> 0,0,1
348a8fcf2a3SKumar Kartikeya Dwivedi 	 *
349a8fcf2a3SKumar Kartikeya Dwivedi 	 * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
350a8fcf2a3SKumar Kartikeya Dwivedi 	 *       above wait condition, therefore any concurrent setting of
351a8fcf2a3SKumar Kartikeya Dwivedi 	 *       PENDING will make the uncontended transition fail.
352a8fcf2a3SKumar Kartikeya Dwivedi 	 */
353a8fcf2a3SKumar Kartikeya Dwivedi 	if ((val & _Q_TAIL_MASK) == tail) {
354a8fcf2a3SKumar Kartikeya Dwivedi 		if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
355a8fcf2a3SKumar Kartikeya Dwivedi 			goto release; /* No contention */
356a8fcf2a3SKumar Kartikeya Dwivedi 	}
357a8fcf2a3SKumar Kartikeya Dwivedi 
358a8fcf2a3SKumar Kartikeya Dwivedi 	/*
359a8fcf2a3SKumar Kartikeya Dwivedi 	 * Either somebody is queued behind us or _Q_PENDING_VAL got set
360a8fcf2a3SKumar Kartikeya Dwivedi 	 * which will then detect the remaining tail and queue behind us
361a8fcf2a3SKumar Kartikeya Dwivedi 	 * ensuring we'll see a @next.
362a8fcf2a3SKumar Kartikeya Dwivedi 	 */
363a8fcf2a3SKumar Kartikeya Dwivedi 	set_locked(lock);
364a8fcf2a3SKumar Kartikeya Dwivedi 
365a8fcf2a3SKumar Kartikeya Dwivedi 	/*
366a8fcf2a3SKumar Kartikeya Dwivedi 	 * contended path; wait for next if not observed yet, release.
367a8fcf2a3SKumar Kartikeya Dwivedi 	 */
368a8fcf2a3SKumar Kartikeya Dwivedi 	if (!next)
369a8fcf2a3SKumar Kartikeya Dwivedi 		next = smp_cond_load_relaxed(&node->next, (VAL));
370a8fcf2a3SKumar Kartikeya Dwivedi 
371a8fcf2a3SKumar Kartikeya Dwivedi 	arch_mcs_spin_unlock_contended(&next->locked);
372a8fcf2a3SKumar Kartikeya Dwivedi 	pv_kick_node(lock, next);
373a8fcf2a3SKumar Kartikeya Dwivedi 
374a8fcf2a3SKumar Kartikeya Dwivedi release:
375a8fcf2a3SKumar Kartikeya Dwivedi 	trace_contention_end(lock, 0);
376a8fcf2a3SKumar Kartikeya Dwivedi 
377a8fcf2a3SKumar Kartikeya Dwivedi 	/*
378a8fcf2a3SKumar Kartikeya Dwivedi 	 * release the node
379a8fcf2a3SKumar Kartikeya Dwivedi 	 */
380a8fcf2a3SKumar Kartikeya Dwivedi 	__this_cpu_dec(rqnodes[0].mcs.count);
381a8fcf2a3SKumar Kartikeya Dwivedi }
382a8fcf2a3SKumar Kartikeya Dwivedi EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath);
383a8fcf2a3SKumar Kartikeya Dwivedi 
384a8fcf2a3SKumar Kartikeya Dwivedi /*
385a8fcf2a3SKumar Kartikeya Dwivedi  * Generate the paravirt code for resilient_queued_spin_unlock_slowpath().
386a8fcf2a3SKumar Kartikeya Dwivedi  */
387a8fcf2a3SKumar Kartikeya Dwivedi #if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
388a8fcf2a3SKumar Kartikeya Dwivedi #define _GEN_PV_LOCK_SLOWPATH
389a8fcf2a3SKumar Kartikeya Dwivedi 
390a8fcf2a3SKumar Kartikeya Dwivedi #undef  pv_enabled
391a8fcf2a3SKumar Kartikeya Dwivedi #define pv_enabled()	true
392a8fcf2a3SKumar Kartikeya Dwivedi 
393a8fcf2a3SKumar Kartikeya Dwivedi #undef pv_init_node
394a8fcf2a3SKumar Kartikeya Dwivedi #undef pv_wait_node
395a8fcf2a3SKumar Kartikeya Dwivedi #undef pv_kick_node
396a8fcf2a3SKumar Kartikeya Dwivedi #undef pv_wait_head_or_lock
397a8fcf2a3SKumar Kartikeya Dwivedi 
398a8fcf2a3SKumar Kartikeya Dwivedi #undef  resilient_queued_spin_lock_slowpath
399a8fcf2a3SKumar Kartikeya Dwivedi #define resilient_queued_spin_lock_slowpath	__pv_resilient_queued_spin_lock_slowpath
400a8fcf2a3SKumar Kartikeya Dwivedi 
401a8fcf2a3SKumar Kartikeya Dwivedi #include "../locking/qspinlock_paravirt.h"
402a8fcf2a3SKumar Kartikeya Dwivedi #include "rqspinlock.c"
403a8fcf2a3SKumar Kartikeya Dwivedi 
404a8fcf2a3SKumar Kartikeya Dwivedi bool nopvspin;
405a8fcf2a3SKumar Kartikeya Dwivedi static __init int parse_nopvspin(char *arg)
406a8fcf2a3SKumar Kartikeya Dwivedi {
407a8fcf2a3SKumar Kartikeya Dwivedi 	nopvspin = true;
408a8fcf2a3SKumar Kartikeya Dwivedi 	return 0;
409a8fcf2a3SKumar Kartikeya Dwivedi }
410a8fcf2a3SKumar Kartikeya Dwivedi early_param("nopvspin", parse_nopvspin);
411a8fcf2a3SKumar Kartikeya Dwivedi #endif
412