xref: /linux/arch/powerpc/lib/qspinlock.c (revision 07fdad3a93756b872da7b53647715c48d0f4a2d0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/bug.h>
3 #include <linux/compiler.h>
4 #include <linux/export.h>
5 #include <linux/percpu.h>
6 #include <linux/processor.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/sched/clock.h>
10 #include <asm/qspinlock.h>
11 #include <asm/paravirt.h>
12 #include <trace/events/lock.h>
13 
14 #define MAX_NODES	4
15 
16 struct qnode {
17 	struct qnode	*next;
18 	struct qspinlock *lock;
19 	int		cpu;
20 	u8		sleepy; /* 1 if the previous vCPU was preempted or
21 				 * if the previous node was sleepy */
22 	u8		locked; /* 1 if lock acquired */
23 };
24 
25 struct qnodes {
26 	int		count;
27 	struct qnode nodes[MAX_NODES];
28 };
29 
30 /* Tuning parameters */
31 static int steal_spins __read_mostly = (1 << 5);
32 static int remote_steal_spins __read_mostly = (1 << 2);
33 #if _Q_SPIN_TRY_LOCK_STEAL == 1
34 static const bool maybe_stealers = true;
35 #else
36 static bool maybe_stealers __read_mostly = true;
37 #endif
38 static int head_spins __read_mostly = (1 << 8);
39 
40 static bool pv_yield_owner __read_mostly = true;
41 static bool pv_yield_allow_steal __read_mostly = false;
42 static bool pv_spin_on_preempted_owner __read_mostly = false;
43 static bool pv_sleepy_lock __read_mostly = true;
44 static bool pv_sleepy_lock_sticky __read_mostly = false;
45 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
46 static int pv_sleepy_lock_factor __read_mostly = 256;
47 static bool pv_yield_prev __read_mostly = true;
48 static bool pv_yield_sleepy_owner __read_mostly = true;
49 static bool pv_prod_head __read_mostly = false;
50 
51 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
52 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
53 
54 #if _Q_SPIN_SPEC_BARRIER == 1
55 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
56 #else
57 #define spec_barrier() do { } while (0)
58 #endif
59 
60 static __always_inline bool recently_sleepy(void)
61 {
62 	/* pv_sleepy_lock is true when this is called */
63 	if (pv_sleepy_lock_interval_ns) {
64 		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
65 
66 		if (seen) {
67 			u64 delta = sched_clock() - seen;
68 			if (delta < pv_sleepy_lock_interval_ns)
69 				return true;
70 			this_cpu_write(sleepy_lock_seen_clock, 0);
71 		}
72 	}
73 
74 	return false;
75 }
76 
77 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
78 {
79 	if (paravirt && sleepy)
80 		return steal_spins * pv_sleepy_lock_factor;
81 	else
82 		return steal_spins;
83 }
84 
85 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
86 {
87 	if (paravirt && sleepy)
88 		return remote_steal_spins * pv_sleepy_lock_factor;
89 	else
90 		return remote_steal_spins;
91 }
92 
93 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
94 {
95 	if (paravirt && sleepy)
96 		return head_spins * pv_sleepy_lock_factor;
97 	else
98 		return head_spins;
99 }
100 
101 static inline u32 encode_tail_cpu(int cpu)
102 {
103 	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
104 }
105 
106 static inline int decode_tail_cpu(u32 val)
107 {
108 	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
109 }
110 
111 static inline int get_owner_cpu(u32 val)
112 {
113 	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
114 }
115 
116 /*
117  * Try to acquire the lock if it was not already locked. If the tail matches
118  * mytail then clear it, otherwise leave it unchnaged. Return previous value.
119  *
120  * This is used by the head of the queue to acquire the lock and clean up
121  * its tail if it was the last one queued.
122  */
123 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
124 {
125 	u32 newval = queued_spin_encode_locked_val();
126 	u32 prev, tmp;
127 
128 	asm volatile(
129 "1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
130 	/* This test is necessary if there could be stealers */
131 "	andi.	%1,%0,%5						\n"
132 "	bne	3f							\n"
133 	/* Test whether the lock tail == mytail */
134 "	and	%1,%0,%6						\n"
135 "	cmpw	0,%1,%3							\n"
136 	/* Merge the new locked value */
137 "	or	%1,%1,%4						\n"
138 "	bne	2f							\n"
139 	/* If the lock tail matched, then clear it, otherwise leave it. */
140 "	andc	%1,%1,%6						\n"
141 "2:	stwcx.	%1,0,%2							\n"
142 "	bne-	1b							\n"
143 "\t"	PPC_ACQUIRE_BARRIER "						\n"
144 "3:									\n"
145 	: "=&r" (prev), "=&r" (tmp)
146 	: "r" (&lock->val), "r"(tail), "r" (newval),
147 	  "i" (_Q_LOCKED_VAL),
148 	  "r" (_Q_TAIL_CPU_MASK),
149 	  "i" (_Q_SPIN_EH_HINT)
150 	: "cr0", "memory");
151 
152 	return prev;
153 }
154 
155 /*
156  * Publish our tail, replacing previous tail. Return previous value.
157  *
158  * This provides a release barrier for publishing node, this pairs with the
159  * acquire barrier in get_tail_qnode() when the next CPU finds this tail
160  * value.
161  */
162 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
163 {
164 	u32 prev, tmp;
165 
166 	kcsan_release();
167 
168 	asm volatile(
169 "\t"	PPC_RELEASE_BARRIER "						\n"
170 "1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
171 "	andc	%1,%0,%4						\n"
172 "	or	%1,%1,%3						\n"
173 "	stwcx.	%1,0,%2							\n"
174 "	bne-	1b							\n"
175 	: "=&r" (prev), "=&r"(tmp)
176 	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
177 	: "cr0", "memory");
178 
179 	return prev;
180 }
181 
182 static __always_inline u32 set_mustq(struct qspinlock *lock)
183 {
184 	u32 prev;
185 
186 	asm volatile(
187 "1:	lwarx	%0,0,%1		# set_mustq				\n"
188 "	or	%0,%0,%2						\n"
189 "	stwcx.	%0,0,%1							\n"
190 "	bne-	1b							\n"
191 	: "=&r" (prev)
192 	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
193 	: "cr0", "memory");
194 
195 	return prev;
196 }
197 
198 static __always_inline u32 clear_mustq(struct qspinlock *lock)
199 {
200 	u32 prev;
201 
202 	asm volatile(
203 "1:	lwarx	%0,0,%1		# clear_mustq				\n"
204 "	andc	%0,%0,%2						\n"
205 "	stwcx.	%0,0,%1							\n"
206 "	bne-	1b							\n"
207 	: "=&r" (prev)
208 	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
209 	: "cr0", "memory");
210 
211 	return prev;
212 }
213 
214 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
215 {
216 	u32 prev;
217 	u32 new = old | _Q_SLEEPY_VAL;
218 
219 	BUG_ON(!(old & _Q_LOCKED_VAL));
220 	BUG_ON(old & _Q_SLEEPY_VAL);
221 
222 	asm volatile(
223 "1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
224 "	cmpw	0,%0,%2							\n"
225 "	bne-	2f							\n"
226 "	stwcx.	%3,0,%1							\n"
227 "	bne-	1b							\n"
228 "2:									\n"
229 	: "=&r" (prev)
230 	: "r" (&lock->val), "r"(old), "r" (new)
231 	: "cr0", "memory");
232 
233 	return likely(prev == old);
234 }
235 
236 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
237 {
238 	if (pv_sleepy_lock) {
239 		if (pv_sleepy_lock_interval_ns)
240 			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
241 		if (!(val & _Q_SLEEPY_VAL))
242 			try_set_sleepy(lock, val);
243 	}
244 }
245 
246 static __always_inline void seen_sleepy_lock(void)
247 {
248 	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
249 		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
250 }
251 
252 static __always_inline void seen_sleepy_node(void)
253 {
254 	if (pv_sleepy_lock) {
255 		if (pv_sleepy_lock_interval_ns)
256 			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
257 		/* Don't set sleepy because we likely have a stale val */
258 	}
259 }
260 
261 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
262 {
263 	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
264 	int idx;
265 
266 	/*
267 	 * After publishing the new tail and finding a previous tail in the
268 	 * previous val (which is the control dependency), this barrier
269 	 * orders the release barrier in publish_tail_cpu performed by the
270 	 * last CPU, with subsequently looking at its qnode structures
271 	 * after the barrier.
272 	 */
273 	smp_acquire__after_ctrl_dep();
274 
275 	for (idx = 0; idx < MAX_NODES; idx++) {
276 		struct qnode *qnode = &qnodesp->nodes[idx];
277 		if (qnode->lock == lock)
278 			return qnode;
279 	}
280 
281 	BUG();
282 }
283 
284 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
285 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
286 {
287 	int owner;
288 	u32 yield_count;
289 	bool preempted = false;
290 
291 	BUG_ON(!(val & _Q_LOCKED_VAL));
292 
293 	if (!paravirt)
294 		goto relax;
295 
296 	if (!pv_yield_owner)
297 		goto relax;
298 
299 	owner = get_owner_cpu(val);
300 	yield_count = yield_count_of(owner);
301 
302 	if ((yield_count & 1) == 0)
303 		goto relax; /* owner vcpu is running */
304 
305 	spin_end();
306 
307 	seen_sleepy_owner(lock, val);
308 	preempted = true;
309 
310 	/*
311 	 * Read the lock word after sampling the yield count. On the other side
312 	 * there may a wmb because the yield count update is done by the
313 	 * hypervisor preemption and the value update by the OS, however this
314 	 * ordering might reduce the chance of out of order accesses and
315 	 * improve the heuristic.
316 	 */
317 	smp_rmb();
318 
319 	if (READ_ONCE(lock->val) == val) {
320 		if (mustq)
321 			clear_mustq(lock);
322 		yield_to_preempted(owner, yield_count);
323 		if (mustq)
324 			set_mustq(lock);
325 		spin_begin();
326 
327 		/* Don't relax if we yielded. Maybe we should? */
328 		return preempted;
329 	}
330 	spin_begin();
331 relax:
332 	spin_cpu_relax();
333 
334 	return preempted;
335 }
336 
337 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
338 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
339 {
340 	return __yield_to_locked_owner(lock, val, paravirt, false);
341 }
342 
343 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
344 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
345 {
346 	bool mustq = false;
347 
348 	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
349 		mustq = true;
350 
351 	return __yield_to_locked_owner(lock, val, paravirt, mustq);
352 }
353 
354 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
355 {
356 	struct qnode *next;
357 	int owner;
358 
359 	if (!paravirt)
360 		return;
361 	if (!pv_yield_sleepy_owner)
362 		return;
363 
364 	next = READ_ONCE(node->next);
365 	if (!next)
366 		return;
367 
368 	if (next->sleepy)
369 		return;
370 
371 	owner = get_owner_cpu(val);
372 	if (vcpu_is_preempted(owner))
373 		next->sleepy = 1;
374 }
375 
376 /* Called inside spin_begin() */
377 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
378 {
379 	u32 yield_count;
380 	bool preempted = false;
381 
382 	if (!paravirt)
383 		goto relax;
384 
385 	if (!pv_yield_sleepy_owner)
386 		goto yield_prev;
387 
388 	/*
389 	 * If the previous waiter was preempted it might not be able to
390 	 * propagate sleepy to us, so check the lock in that case too.
391 	 */
392 	if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
393 		u32 val = READ_ONCE(lock->val);
394 
395 		if (val & _Q_LOCKED_VAL) {
396 			if (node->next && !node->next->sleepy) {
397 				/*
398 				 * Propagate sleepy to next waiter. Only if
399 				 * owner is preempted, which allows the queue
400 				 * to become "non-sleepy" if vCPU preemption
401 				 * ceases to occur, even if the lock remains
402 				 * highly contended.
403 				 */
404 				if (vcpu_is_preempted(get_owner_cpu(val)))
405 					node->next->sleepy = 1;
406 			}
407 
408 			preempted = yield_to_locked_owner(lock, val, paravirt);
409 			if (preempted)
410 				return preempted;
411 		}
412 		node->sleepy = false;
413 	}
414 
415 yield_prev:
416 	if (!pv_yield_prev)
417 		goto relax;
418 
419 	yield_count = yield_count_of(prev_cpu);
420 	if ((yield_count & 1) == 0)
421 		goto relax; /* owner vcpu is running */
422 
423 	spin_end();
424 
425 	preempted = true;
426 	seen_sleepy_node();
427 
428 	smp_rmb(); /* See __yield_to_locked_owner comment */
429 
430 	if (!READ_ONCE(node->locked)) {
431 		yield_to_preempted(prev_cpu, yield_count);
432 		spin_begin();
433 		return preempted;
434 	}
435 	spin_begin();
436 
437 relax:
438 	spin_cpu_relax();
439 
440 	return preempted;
441 }
442 
443 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
444 {
445 	if (iters >= get_steal_spins(paravirt, sleepy))
446 		return true;
447 
448 	if (IS_ENABLED(CONFIG_NUMA) &&
449 	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
450 		int cpu = get_owner_cpu(val);
451 		if (numa_node_id() != cpu_to_node(cpu))
452 			return true;
453 	}
454 	return false;
455 }
456 
457 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
458 {
459 	bool seen_preempted = false;
460 	bool sleepy = false;
461 	int iters = 0;
462 	u32 val;
463 
464 	if (!steal_spins) {
465 		/* XXX: should spin_on_preempted_owner do anything here? */
466 		return false;
467 	}
468 
469 	/* Attempt to steal the lock */
470 	spin_begin();
471 	do {
472 		bool preempted = false;
473 
474 		val = READ_ONCE(lock->val);
475 		if (val & _Q_MUST_Q_VAL)
476 			break;
477 		spec_barrier();
478 
479 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
480 			spin_end();
481 			if (__queued_spin_trylock_steal(lock))
482 				return true;
483 			spin_begin();
484 		} else {
485 			preempted = yield_to_locked_owner(lock, val, paravirt);
486 		}
487 
488 		if (paravirt && pv_sleepy_lock) {
489 			if (!sleepy) {
490 				if (val & _Q_SLEEPY_VAL) {
491 					seen_sleepy_lock();
492 					sleepy = true;
493 				} else if (recently_sleepy()) {
494 					sleepy = true;
495 				}
496 			}
497 			if (pv_sleepy_lock_sticky && seen_preempted &&
498 			    !(val & _Q_SLEEPY_VAL)) {
499 				if (try_set_sleepy(lock, val))
500 					val |= _Q_SLEEPY_VAL;
501 			}
502 		}
503 
504 		if (preempted) {
505 			seen_preempted = true;
506 			sleepy = true;
507 			if (!pv_spin_on_preempted_owner)
508 				iters++;
509 			/*
510 			 * pv_spin_on_preempted_owner don't increase iters
511 			 * while the owner is preempted -- we won't interfere
512 			 * with it by definition. This could introduce some
513 			 * latency issue if we continually observe preempted
514 			 * owners, but hopefully that's a rare corner case of
515 			 * a badly oversubscribed system.
516 			 */
517 		} else {
518 			iters++;
519 		}
520 	} while (!steal_break(val, iters, paravirt, sleepy));
521 
522 	spin_end();
523 
524 	return false;
525 }
526 
527 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
528 {
529 	struct qnodes *qnodesp;
530 	struct qnode *next, *node;
531 	u32 val, old, tail;
532 	bool seen_preempted = false;
533 	bool sleepy = false;
534 	bool mustq = false;
535 	int idx;
536 	int iters = 0;
537 
538 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
539 
540 	qnodesp = this_cpu_ptr(&qnodes);
541 	if (unlikely(qnodesp->count >= MAX_NODES)) {
542 		spec_barrier();
543 		while (!queued_spin_trylock(lock))
544 			cpu_relax();
545 		return;
546 	}
547 
548 	idx = qnodesp->count++;
549 	/*
550 	 * Ensure that we increment the head node->count before initialising
551 	 * the actual node. If the compiler is kind enough to reorder these
552 	 * stores, then an IRQ could overwrite our assignments.
553 	 */
554 	barrier();
555 	node = &qnodesp->nodes[idx];
556 	node->next = NULL;
557 	node->lock = lock;
558 	node->cpu = smp_processor_id();
559 	node->sleepy = 0;
560 	node->locked = 0;
561 
562 	tail = encode_tail_cpu(node->cpu);
563 
564 	/*
565 	 * Assign all attributes of a node before it can be published.
566 	 * Issues an lwsync, serving as a release barrier, as well as a
567 	 * compiler barrier.
568 	 */
569 	old = publish_tail_cpu(lock, tail);
570 
571 	/*
572 	 * If there was a previous node; link it and wait until reaching the
573 	 * head of the waitqueue.
574 	 */
575 	if (old & _Q_TAIL_CPU_MASK) {
576 		int prev_cpu = decode_tail_cpu(old);
577 		struct qnode *prev = get_tail_qnode(lock, prev_cpu);
578 
579 		/* Link @node into the waitqueue. */
580 		WRITE_ONCE(prev->next, node);
581 
582 		/* Wait for mcs node lock to be released */
583 		spin_begin();
584 		while (!READ_ONCE(node->locked)) {
585 			spec_barrier();
586 
587 			if (yield_to_prev(lock, node, prev_cpu, paravirt))
588 				seen_preempted = true;
589 		}
590 		spec_barrier();
591 		spin_end();
592 
593 		smp_rmb(); /* acquire barrier for the mcs lock */
594 
595 		/*
596 		 * Generic qspinlocks have this prefetch here, but it seems
597 		 * like it could cause additional line transitions because
598 		 * the waiter will keep loading from it.
599 		 */
600 		if (_Q_SPIN_PREFETCH_NEXT) {
601 			next = READ_ONCE(node->next);
602 			if (next)
603 				prefetchw(next);
604 		}
605 	}
606 
607 	/* We're at the head of the waitqueue, wait for the lock. */
608 again:
609 	spin_begin();
610 	for (;;) {
611 		bool preempted;
612 
613 		val = READ_ONCE(lock->val);
614 		if (!(val & _Q_LOCKED_VAL))
615 			break;
616 		spec_barrier();
617 
618 		if (paravirt && pv_sleepy_lock && maybe_stealers) {
619 			if (!sleepy) {
620 				if (val & _Q_SLEEPY_VAL) {
621 					seen_sleepy_lock();
622 					sleepy = true;
623 				} else if (recently_sleepy()) {
624 					sleepy = true;
625 				}
626 			}
627 			if (pv_sleepy_lock_sticky && seen_preempted &&
628 			    !(val & _Q_SLEEPY_VAL)) {
629 				if (try_set_sleepy(lock, val))
630 					val |= _Q_SLEEPY_VAL;
631 			}
632 		}
633 
634 		propagate_sleepy(node, val, paravirt);
635 		preempted = yield_head_to_locked_owner(lock, val, paravirt);
636 		if (!maybe_stealers)
637 			continue;
638 
639 		if (preempted)
640 			seen_preempted = true;
641 
642 		if (paravirt && preempted) {
643 			sleepy = true;
644 
645 			if (!pv_spin_on_preempted_owner)
646 				iters++;
647 		} else {
648 			iters++;
649 		}
650 
651 		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
652 			mustq = true;
653 			set_mustq(lock);
654 			val |= _Q_MUST_Q_VAL;
655 		}
656 	}
657 	spec_barrier();
658 	spin_end();
659 
660 	/* If we're the last queued, must clean up the tail. */
661 	old = trylock_clean_tail(lock, tail);
662 	if (unlikely(old & _Q_LOCKED_VAL)) {
663 		BUG_ON(!maybe_stealers);
664 		goto again; /* Can only be true if maybe_stealers. */
665 	}
666 
667 	if ((old & _Q_TAIL_CPU_MASK) == tail)
668 		goto release; /* We were the tail, no next. */
669 
670 	/* There is a next, must wait for node->next != NULL (MCS protocol) */
671 	next = READ_ONCE(node->next);
672 	if (!next) {
673 		spin_begin();
674 		while (!(next = READ_ONCE(node->next)))
675 			cpu_relax();
676 		spin_end();
677 	}
678 	spec_barrier();
679 
680 	/*
681 	 * Unlock the next mcs waiter node. Release barrier is not required
682 	 * here because the acquirer is only accessing the lock word, and
683 	 * the acquire barrier we took the lock with orders that update vs
684 	 * this store to locked. The corresponding barrier is the smp_rmb()
685 	 * acquire barrier for mcs lock, above.
686 	 */
687 	if (paravirt && pv_prod_head) {
688 		int next_cpu = next->cpu;
689 		WRITE_ONCE(next->locked, 1);
690 		if (_Q_SPIN_MISO)
691 			asm volatile("miso" ::: "memory");
692 		if (vcpu_is_preempted(next_cpu))
693 			prod_cpu(next_cpu);
694 	} else {
695 		WRITE_ONCE(next->locked, 1);
696 		if (_Q_SPIN_MISO)
697 			asm volatile("miso" ::: "memory");
698 	}
699 
700 release:
701 	/*
702 	 * Clear the lock before releasing the node, as another CPU might see stale
703 	 * values if an interrupt occurs after we increment qnodesp->count
704 	 * but before node->lock is initialized. The barrier ensures that
705 	 * there are no further stores to the node after it has been released.
706 	 */
707 	node->lock = NULL;
708 	barrier();
709 	qnodesp->count--;
710 }
711 
712 void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
713 {
714 	trace_contention_begin(lock, LCB_F_SPIN);
715 	/*
716 	 * This looks funny, but it induces the compiler to inline both
717 	 * sides of the branch rather than share code as when the condition
718 	 * is passed as the paravirt argument to the functions.
719 	 */
720 	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
721 		if (try_to_steal_lock(lock, true))
722 			spec_barrier();
723 		else
724 			queued_spin_lock_mcs_queue(lock, true);
725 	} else {
726 		if (try_to_steal_lock(lock, false))
727 			spec_barrier();
728 		else
729 			queued_spin_lock_mcs_queue(lock, false);
730 	}
731 	trace_contention_end(lock, 0);
732 }
733 EXPORT_SYMBOL(queued_spin_lock_slowpath);
734 
735 #ifdef CONFIG_PARAVIRT_SPINLOCKS
736 void pv_spinlocks_init(void)
737 {
738 }
739 #endif
740 
741 #include <linux/debugfs.h>
742 static int steal_spins_set(void *data, u64 val)
743 {
744 #if _Q_SPIN_TRY_LOCK_STEAL == 1
745 	/* MAYBE_STEAL remains true */
746 	steal_spins = val;
747 #else
748 	static DEFINE_MUTEX(lock);
749 
750 	/*
751 	 * The lock slow path has a !maybe_stealers case that can assume
752 	 * the head of queue will not see concurrent waiters. That waiter
753 	 * is unsafe in the presence of stealers, so must keep them away
754 	 * from one another.
755 	 */
756 
757 	mutex_lock(&lock);
758 	if (val && !steal_spins) {
759 		maybe_stealers = true;
760 		/* wait for queue head waiter to go away */
761 		synchronize_rcu();
762 		steal_spins = val;
763 	} else if (!val && steal_spins) {
764 		steal_spins = val;
765 		/* wait for all possible stealers to go away */
766 		synchronize_rcu();
767 		maybe_stealers = false;
768 	} else {
769 		steal_spins = val;
770 	}
771 	mutex_unlock(&lock);
772 #endif
773 
774 	return 0;
775 }
776 
777 static int steal_spins_get(void *data, u64 *val)
778 {
779 	*val = steal_spins;
780 
781 	return 0;
782 }
783 
784 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
785 
786 static int remote_steal_spins_set(void *data, u64 val)
787 {
788 	remote_steal_spins = val;
789 
790 	return 0;
791 }
792 
793 static int remote_steal_spins_get(void *data, u64 *val)
794 {
795 	*val = remote_steal_spins;
796 
797 	return 0;
798 }
799 
800 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
801 
802 static int head_spins_set(void *data, u64 val)
803 {
804 	head_spins = val;
805 
806 	return 0;
807 }
808 
809 static int head_spins_get(void *data, u64 *val)
810 {
811 	*val = head_spins;
812 
813 	return 0;
814 }
815 
816 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
817 
818 static int pv_yield_owner_set(void *data, u64 val)
819 {
820 	pv_yield_owner = !!val;
821 
822 	return 0;
823 }
824 
825 static int pv_yield_owner_get(void *data, u64 *val)
826 {
827 	*val = pv_yield_owner;
828 
829 	return 0;
830 }
831 
832 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
833 
834 static int pv_yield_allow_steal_set(void *data, u64 val)
835 {
836 	pv_yield_allow_steal = !!val;
837 
838 	return 0;
839 }
840 
841 static int pv_yield_allow_steal_get(void *data, u64 *val)
842 {
843 	*val = pv_yield_allow_steal;
844 
845 	return 0;
846 }
847 
848 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
849 
850 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
851 {
852 	pv_spin_on_preempted_owner = !!val;
853 
854 	return 0;
855 }
856 
857 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
858 {
859 	*val = pv_spin_on_preempted_owner;
860 
861 	return 0;
862 }
863 
864 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
865 
866 static int pv_sleepy_lock_set(void *data, u64 val)
867 {
868 	pv_sleepy_lock = !!val;
869 
870 	return 0;
871 }
872 
873 static int pv_sleepy_lock_get(void *data, u64 *val)
874 {
875 	*val = pv_sleepy_lock;
876 
877 	return 0;
878 }
879 
880 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
881 
882 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
883 {
884 	pv_sleepy_lock_sticky = !!val;
885 
886 	return 0;
887 }
888 
889 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
890 {
891 	*val = pv_sleepy_lock_sticky;
892 
893 	return 0;
894 }
895 
896 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
897 
898 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
899 {
900 	pv_sleepy_lock_interval_ns = val;
901 
902 	return 0;
903 }
904 
905 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
906 {
907 	*val = pv_sleepy_lock_interval_ns;
908 
909 	return 0;
910 }
911 
912 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
913 
914 static int pv_sleepy_lock_factor_set(void *data, u64 val)
915 {
916 	pv_sleepy_lock_factor = val;
917 
918 	return 0;
919 }
920 
921 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
922 {
923 	*val = pv_sleepy_lock_factor;
924 
925 	return 0;
926 }
927 
928 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
929 
930 static int pv_yield_prev_set(void *data, u64 val)
931 {
932 	pv_yield_prev = !!val;
933 
934 	return 0;
935 }
936 
937 static int pv_yield_prev_get(void *data, u64 *val)
938 {
939 	*val = pv_yield_prev;
940 
941 	return 0;
942 }
943 
944 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
945 
946 static int pv_yield_sleepy_owner_set(void *data, u64 val)
947 {
948 	pv_yield_sleepy_owner = !!val;
949 
950 	return 0;
951 }
952 
953 static int pv_yield_sleepy_owner_get(void *data, u64 *val)
954 {
955 	*val = pv_yield_sleepy_owner;
956 
957 	return 0;
958 }
959 
960 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
961 
962 static int pv_prod_head_set(void *data, u64 val)
963 {
964 	pv_prod_head = !!val;
965 
966 	return 0;
967 }
968 
969 static int pv_prod_head_get(void *data, u64 *val)
970 {
971 	*val = pv_prod_head;
972 
973 	return 0;
974 }
975 
976 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
977 
978 static __init int spinlock_debugfs_init(void)
979 {
980 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
981 	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
982 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
983 	if (is_shared_processor()) {
984 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
985 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
986 		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
987 		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
988 		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
989 		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
990 		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
991 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
992 		debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
993 		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
994 	}
995 
996 	return 0;
997 }
998 device_initcall(spinlock_debugfs_init);
999