xref: /linux/arch/powerpc/lib/qspinlock.c (revision 3d0fe49454652117522f60bfbefb978ba0e5300b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/bug.h>
3 #include <linux/compiler.h>
4 #include <linux/export.h>
5 #include <linux/percpu.h>
6 #include <linux/processor.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/sched/clock.h>
10 #include <asm/qspinlock.h>
11 #include <asm/paravirt.h>
12 
13 #define MAX_NODES	4
14 
15 struct qnode {
16 	struct qnode	*next;
17 	struct qspinlock *lock;
18 	int		cpu;
19 	u8		sleepy; /* 1 if the previous vCPU was preempted or
20 				 * if the previous node was sleepy */
21 	u8		locked; /* 1 if lock acquired */
22 };
23 
24 struct qnodes {
25 	int		count;
26 	struct qnode nodes[MAX_NODES];
27 };
28 
29 /* Tuning parameters */
30 static int steal_spins __read_mostly = (1 << 5);
31 static int remote_steal_spins __read_mostly = (1 << 2);
32 #if _Q_SPIN_TRY_LOCK_STEAL == 1
33 static const bool maybe_stealers = true;
34 #else
35 static bool maybe_stealers __read_mostly = true;
36 #endif
37 static int head_spins __read_mostly = (1 << 8);
38 
39 static bool pv_yield_owner __read_mostly = true;
40 static bool pv_yield_allow_steal __read_mostly = false;
41 static bool pv_spin_on_preempted_owner __read_mostly = false;
42 static bool pv_sleepy_lock __read_mostly = true;
43 static bool pv_sleepy_lock_sticky __read_mostly = false;
44 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
45 static int pv_sleepy_lock_factor __read_mostly = 256;
46 static bool pv_yield_prev __read_mostly = true;
47 static bool pv_yield_sleepy_owner __read_mostly = true;
48 static bool pv_prod_head __read_mostly = false;
49 
50 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
51 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
52 
53 #if _Q_SPIN_SPEC_BARRIER == 1
54 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
55 #else
56 #define spec_barrier() do { } while (0)
57 #endif
58 
59 static __always_inline bool recently_sleepy(void)
60 {
61 	/* pv_sleepy_lock is true when this is called */
62 	if (pv_sleepy_lock_interval_ns) {
63 		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
64 
65 		if (seen) {
66 			u64 delta = sched_clock() - seen;
67 			if (delta < pv_sleepy_lock_interval_ns)
68 				return true;
69 			this_cpu_write(sleepy_lock_seen_clock, 0);
70 		}
71 	}
72 
73 	return false;
74 }
75 
76 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
77 {
78 	if (paravirt && sleepy)
79 		return steal_spins * pv_sleepy_lock_factor;
80 	else
81 		return steal_spins;
82 }
83 
84 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
85 {
86 	if (paravirt && sleepy)
87 		return remote_steal_spins * pv_sleepy_lock_factor;
88 	else
89 		return remote_steal_spins;
90 }
91 
92 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
93 {
94 	if (paravirt && sleepy)
95 		return head_spins * pv_sleepy_lock_factor;
96 	else
97 		return head_spins;
98 }
99 
100 static inline u32 encode_tail_cpu(int cpu)
101 {
102 	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
103 }
104 
105 static inline int decode_tail_cpu(u32 val)
106 {
107 	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
108 }
109 
110 static inline int get_owner_cpu(u32 val)
111 {
112 	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
113 }
114 
115 /*
116  * Try to acquire the lock if it was not already locked. If the tail matches
117  * mytail then clear it, otherwise leave it unchnaged. Return previous value.
118  *
119  * This is used by the head of the queue to acquire the lock and clean up
120  * its tail if it was the last one queued.
121  */
122 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
123 {
124 	u32 newval = queued_spin_encode_locked_val();
125 	u32 prev, tmp;
126 
127 	asm volatile(
128 "1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
129 	/* This test is necessary if there could be stealers */
130 "	andi.	%1,%0,%5						\n"
131 "	bne	3f							\n"
132 	/* Test whether the lock tail == mytail */
133 "	and	%1,%0,%6						\n"
134 "	cmpw	0,%1,%3							\n"
135 	/* Merge the new locked value */
136 "	or	%1,%1,%4						\n"
137 "	bne	2f							\n"
138 	/* If the lock tail matched, then clear it, otherwise leave it. */
139 "	andc	%1,%1,%6						\n"
140 "2:	stwcx.	%1,0,%2							\n"
141 "	bne-	1b							\n"
142 "\t"	PPC_ACQUIRE_BARRIER "						\n"
143 "3:									\n"
144 	: "=&r" (prev), "=&r" (tmp)
145 	: "r" (&lock->val), "r"(tail), "r" (newval),
146 	  "i" (_Q_LOCKED_VAL),
147 	  "r" (_Q_TAIL_CPU_MASK),
148 	  "i" (_Q_SPIN_EH_HINT)
149 	: "cr0", "memory");
150 
151 	return prev;
152 }
153 
154 /*
155  * Publish our tail, replacing previous tail. Return previous value.
156  *
157  * This provides a release barrier for publishing node, this pairs with the
158  * acquire barrier in get_tail_qnode() when the next CPU finds this tail
159  * value.
160  */
161 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
162 {
163 	u32 prev, tmp;
164 
165 	kcsan_release();
166 
167 	asm volatile(
168 "\t"	PPC_RELEASE_BARRIER "						\n"
169 "1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
170 "	andc	%1,%0,%4						\n"
171 "	or	%1,%1,%3						\n"
172 "	stwcx.	%1,0,%2							\n"
173 "	bne-	1b							\n"
174 	: "=&r" (prev), "=&r"(tmp)
175 	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
176 	: "cr0", "memory");
177 
178 	return prev;
179 }
180 
181 static __always_inline u32 set_mustq(struct qspinlock *lock)
182 {
183 	u32 prev;
184 
185 	asm volatile(
186 "1:	lwarx	%0,0,%1		# set_mustq				\n"
187 "	or	%0,%0,%2						\n"
188 "	stwcx.	%0,0,%1							\n"
189 "	bne-	1b							\n"
190 	: "=&r" (prev)
191 	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
192 	: "cr0", "memory");
193 
194 	return prev;
195 }
196 
197 static __always_inline u32 clear_mustq(struct qspinlock *lock)
198 {
199 	u32 prev;
200 
201 	asm volatile(
202 "1:	lwarx	%0,0,%1		# clear_mustq				\n"
203 "	andc	%0,%0,%2						\n"
204 "	stwcx.	%0,0,%1							\n"
205 "	bne-	1b							\n"
206 	: "=&r" (prev)
207 	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
208 	: "cr0", "memory");
209 
210 	return prev;
211 }
212 
213 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
214 {
215 	u32 prev;
216 	u32 new = old | _Q_SLEEPY_VAL;
217 
218 	BUG_ON(!(old & _Q_LOCKED_VAL));
219 	BUG_ON(old & _Q_SLEEPY_VAL);
220 
221 	asm volatile(
222 "1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
223 "	cmpw	0,%0,%2							\n"
224 "	bne-	2f							\n"
225 "	stwcx.	%3,0,%1							\n"
226 "	bne-	1b							\n"
227 "2:									\n"
228 	: "=&r" (prev)
229 	: "r" (&lock->val), "r"(old), "r" (new)
230 	: "cr0", "memory");
231 
232 	return likely(prev == old);
233 }
234 
235 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
236 {
237 	if (pv_sleepy_lock) {
238 		if (pv_sleepy_lock_interval_ns)
239 			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
240 		if (!(val & _Q_SLEEPY_VAL))
241 			try_set_sleepy(lock, val);
242 	}
243 }
244 
245 static __always_inline void seen_sleepy_lock(void)
246 {
247 	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
248 		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
249 }
250 
251 static __always_inline void seen_sleepy_node(void)
252 {
253 	if (pv_sleepy_lock) {
254 		if (pv_sleepy_lock_interval_ns)
255 			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
256 		/* Don't set sleepy because we likely have a stale val */
257 	}
258 }
259 
260 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
261 {
262 	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
263 	int idx;
264 
265 	/*
266 	 * After publishing the new tail and finding a previous tail in the
267 	 * previous val (which is the control dependency), this barrier
268 	 * orders the release barrier in publish_tail_cpu performed by the
269 	 * last CPU, with subsequently looking at its qnode structures
270 	 * after the barrier.
271 	 */
272 	smp_acquire__after_ctrl_dep();
273 
274 	for (idx = 0; idx < MAX_NODES; idx++) {
275 		struct qnode *qnode = &qnodesp->nodes[idx];
276 		if (qnode->lock == lock)
277 			return qnode;
278 	}
279 
280 	BUG();
281 }
282 
283 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
284 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
285 {
286 	int owner;
287 	u32 yield_count;
288 	bool preempted = false;
289 
290 	BUG_ON(!(val & _Q_LOCKED_VAL));
291 
292 	if (!paravirt)
293 		goto relax;
294 
295 	if (!pv_yield_owner)
296 		goto relax;
297 
298 	owner = get_owner_cpu(val);
299 	yield_count = yield_count_of(owner);
300 
301 	if ((yield_count & 1) == 0)
302 		goto relax; /* owner vcpu is running */
303 
304 	spin_end();
305 
306 	seen_sleepy_owner(lock, val);
307 	preempted = true;
308 
309 	/*
310 	 * Read the lock word after sampling the yield count. On the other side
311 	 * there may a wmb because the yield count update is done by the
312 	 * hypervisor preemption and the value update by the OS, however this
313 	 * ordering might reduce the chance of out of order accesses and
314 	 * improve the heuristic.
315 	 */
316 	smp_rmb();
317 
318 	if (READ_ONCE(lock->val) == val) {
319 		if (mustq)
320 			clear_mustq(lock);
321 		yield_to_preempted(owner, yield_count);
322 		if (mustq)
323 			set_mustq(lock);
324 		spin_begin();
325 
326 		/* Don't relax if we yielded. Maybe we should? */
327 		return preempted;
328 	}
329 	spin_begin();
330 relax:
331 	spin_cpu_relax();
332 
333 	return preempted;
334 }
335 
336 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
337 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
338 {
339 	return __yield_to_locked_owner(lock, val, paravirt, false);
340 }
341 
342 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
343 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
344 {
345 	bool mustq = false;
346 
347 	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
348 		mustq = true;
349 
350 	return __yield_to_locked_owner(lock, val, paravirt, mustq);
351 }
352 
353 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
354 {
355 	struct qnode *next;
356 	int owner;
357 
358 	if (!paravirt)
359 		return;
360 	if (!pv_yield_sleepy_owner)
361 		return;
362 
363 	next = READ_ONCE(node->next);
364 	if (!next)
365 		return;
366 
367 	if (next->sleepy)
368 		return;
369 
370 	owner = get_owner_cpu(val);
371 	if (vcpu_is_preempted(owner))
372 		next->sleepy = 1;
373 }
374 
375 /* Called inside spin_begin() */
376 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
377 {
378 	u32 yield_count;
379 	bool preempted = false;
380 
381 	if (!paravirt)
382 		goto relax;
383 
384 	if (!pv_yield_sleepy_owner)
385 		goto yield_prev;
386 
387 	/*
388 	 * If the previous waiter was preempted it might not be able to
389 	 * propagate sleepy to us, so check the lock in that case too.
390 	 */
391 	if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
392 		u32 val = READ_ONCE(lock->val);
393 
394 		if (val & _Q_LOCKED_VAL) {
395 			if (node->next && !node->next->sleepy) {
396 				/*
397 				 * Propagate sleepy to next waiter. Only if
398 				 * owner is preempted, which allows the queue
399 				 * to become "non-sleepy" if vCPU preemption
400 				 * ceases to occur, even if the lock remains
401 				 * highly contended.
402 				 */
403 				if (vcpu_is_preempted(get_owner_cpu(val)))
404 					node->next->sleepy = 1;
405 			}
406 
407 			preempted = yield_to_locked_owner(lock, val, paravirt);
408 			if (preempted)
409 				return preempted;
410 		}
411 		node->sleepy = false;
412 	}
413 
414 yield_prev:
415 	if (!pv_yield_prev)
416 		goto relax;
417 
418 	yield_count = yield_count_of(prev_cpu);
419 	if ((yield_count & 1) == 0)
420 		goto relax; /* owner vcpu is running */
421 
422 	spin_end();
423 
424 	preempted = true;
425 	seen_sleepy_node();
426 
427 	smp_rmb(); /* See __yield_to_locked_owner comment */
428 
429 	if (!READ_ONCE(node->locked)) {
430 		yield_to_preempted(prev_cpu, yield_count);
431 		spin_begin();
432 		return preempted;
433 	}
434 	spin_begin();
435 
436 relax:
437 	spin_cpu_relax();
438 
439 	return preempted;
440 }
441 
442 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
443 {
444 	if (iters >= get_steal_spins(paravirt, sleepy))
445 		return true;
446 
447 	if (IS_ENABLED(CONFIG_NUMA) &&
448 	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
449 		int cpu = get_owner_cpu(val);
450 		if (numa_node_id() != cpu_to_node(cpu))
451 			return true;
452 	}
453 	return false;
454 }
455 
456 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
457 {
458 	bool seen_preempted = false;
459 	bool sleepy = false;
460 	int iters = 0;
461 	u32 val;
462 
463 	if (!steal_spins) {
464 		/* XXX: should spin_on_preempted_owner do anything here? */
465 		return false;
466 	}
467 
468 	/* Attempt to steal the lock */
469 	spin_begin();
470 	do {
471 		bool preempted = false;
472 
473 		val = READ_ONCE(lock->val);
474 		if (val & _Q_MUST_Q_VAL)
475 			break;
476 		spec_barrier();
477 
478 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
479 			spin_end();
480 			if (__queued_spin_trylock_steal(lock))
481 				return true;
482 			spin_begin();
483 		} else {
484 			preempted = yield_to_locked_owner(lock, val, paravirt);
485 		}
486 
487 		if (paravirt && pv_sleepy_lock) {
488 			if (!sleepy) {
489 				if (val & _Q_SLEEPY_VAL) {
490 					seen_sleepy_lock();
491 					sleepy = true;
492 				} else if (recently_sleepy()) {
493 					sleepy = true;
494 				}
495 			}
496 			if (pv_sleepy_lock_sticky && seen_preempted &&
497 			    !(val & _Q_SLEEPY_VAL)) {
498 				if (try_set_sleepy(lock, val))
499 					val |= _Q_SLEEPY_VAL;
500 			}
501 		}
502 
503 		if (preempted) {
504 			seen_preempted = true;
505 			sleepy = true;
506 			if (!pv_spin_on_preempted_owner)
507 				iters++;
508 			/*
509 			 * pv_spin_on_preempted_owner don't increase iters
510 			 * while the owner is preempted -- we won't interfere
511 			 * with it by definition. This could introduce some
512 			 * latency issue if we continually observe preempted
513 			 * owners, but hopefully that's a rare corner case of
514 			 * a badly oversubscribed system.
515 			 */
516 		} else {
517 			iters++;
518 		}
519 	} while (!steal_break(val, iters, paravirt, sleepy));
520 
521 	spin_end();
522 
523 	return false;
524 }
525 
526 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
527 {
528 	struct qnodes *qnodesp;
529 	struct qnode *next, *node;
530 	u32 val, old, tail;
531 	bool seen_preempted = false;
532 	bool sleepy = false;
533 	bool mustq = false;
534 	int idx;
535 	int iters = 0;
536 
537 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
538 
539 	qnodesp = this_cpu_ptr(&qnodes);
540 	if (unlikely(qnodesp->count >= MAX_NODES)) {
541 		spec_barrier();
542 		while (!queued_spin_trylock(lock))
543 			cpu_relax();
544 		return;
545 	}
546 
547 	idx = qnodesp->count++;
548 	/*
549 	 * Ensure that we increment the head node->count before initialising
550 	 * the actual node. If the compiler is kind enough to reorder these
551 	 * stores, then an IRQ could overwrite our assignments.
552 	 */
553 	barrier();
554 	node = &qnodesp->nodes[idx];
555 	node->next = NULL;
556 	node->lock = lock;
557 	node->cpu = smp_processor_id();
558 	node->sleepy = 0;
559 	node->locked = 0;
560 
561 	tail = encode_tail_cpu(node->cpu);
562 
563 	/*
564 	 * Assign all attributes of a node before it can be published.
565 	 * Issues an lwsync, serving as a release barrier, as well as a
566 	 * compiler barrier.
567 	 */
568 	old = publish_tail_cpu(lock, tail);
569 
570 	/*
571 	 * If there was a previous node; link it and wait until reaching the
572 	 * head of the waitqueue.
573 	 */
574 	if (old & _Q_TAIL_CPU_MASK) {
575 		int prev_cpu = decode_tail_cpu(old);
576 		struct qnode *prev = get_tail_qnode(lock, prev_cpu);
577 
578 		/* Link @node into the waitqueue. */
579 		WRITE_ONCE(prev->next, node);
580 
581 		/* Wait for mcs node lock to be released */
582 		spin_begin();
583 		while (!READ_ONCE(node->locked)) {
584 			spec_barrier();
585 
586 			if (yield_to_prev(lock, node, prev_cpu, paravirt))
587 				seen_preempted = true;
588 		}
589 		spec_barrier();
590 		spin_end();
591 
592 		smp_rmb(); /* acquire barrier for the mcs lock */
593 
594 		/*
595 		 * Generic qspinlocks have this prefetch here, but it seems
596 		 * like it could cause additional line transitions because
597 		 * the waiter will keep loading from it.
598 		 */
599 		if (_Q_SPIN_PREFETCH_NEXT) {
600 			next = READ_ONCE(node->next);
601 			if (next)
602 				prefetchw(next);
603 		}
604 	}
605 
606 	/* We're at the head of the waitqueue, wait for the lock. */
607 again:
608 	spin_begin();
609 	for (;;) {
610 		bool preempted;
611 
612 		val = READ_ONCE(lock->val);
613 		if (!(val & _Q_LOCKED_VAL))
614 			break;
615 		spec_barrier();
616 
617 		if (paravirt && pv_sleepy_lock && maybe_stealers) {
618 			if (!sleepy) {
619 				if (val & _Q_SLEEPY_VAL) {
620 					seen_sleepy_lock();
621 					sleepy = true;
622 				} else if (recently_sleepy()) {
623 					sleepy = true;
624 				}
625 			}
626 			if (pv_sleepy_lock_sticky && seen_preempted &&
627 			    !(val & _Q_SLEEPY_VAL)) {
628 				if (try_set_sleepy(lock, val))
629 					val |= _Q_SLEEPY_VAL;
630 			}
631 		}
632 
633 		propagate_sleepy(node, val, paravirt);
634 		preempted = yield_head_to_locked_owner(lock, val, paravirt);
635 		if (!maybe_stealers)
636 			continue;
637 
638 		if (preempted)
639 			seen_preempted = true;
640 
641 		if (paravirt && preempted) {
642 			sleepy = true;
643 
644 			if (!pv_spin_on_preempted_owner)
645 				iters++;
646 		} else {
647 			iters++;
648 		}
649 
650 		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
651 			mustq = true;
652 			set_mustq(lock);
653 			val |= _Q_MUST_Q_VAL;
654 		}
655 	}
656 	spec_barrier();
657 	spin_end();
658 
659 	/* If we're the last queued, must clean up the tail. */
660 	old = trylock_clean_tail(lock, tail);
661 	if (unlikely(old & _Q_LOCKED_VAL)) {
662 		BUG_ON(!maybe_stealers);
663 		goto again; /* Can only be true if maybe_stealers. */
664 	}
665 
666 	if ((old & _Q_TAIL_CPU_MASK) == tail)
667 		goto release; /* We were the tail, no next. */
668 
669 	/* There is a next, must wait for node->next != NULL (MCS protocol) */
670 	next = READ_ONCE(node->next);
671 	if (!next) {
672 		spin_begin();
673 		while (!(next = READ_ONCE(node->next)))
674 			cpu_relax();
675 		spin_end();
676 	}
677 	spec_barrier();
678 
679 	/*
680 	 * Unlock the next mcs waiter node. Release barrier is not required
681 	 * here because the acquirer is only accessing the lock word, and
682 	 * the acquire barrier we took the lock with orders that update vs
683 	 * this store to locked. The corresponding barrier is the smp_rmb()
684 	 * acquire barrier for mcs lock, above.
685 	 */
686 	if (paravirt && pv_prod_head) {
687 		int next_cpu = next->cpu;
688 		WRITE_ONCE(next->locked, 1);
689 		if (_Q_SPIN_MISO)
690 			asm volatile("miso" ::: "memory");
691 		if (vcpu_is_preempted(next_cpu))
692 			prod_cpu(next_cpu);
693 	} else {
694 		WRITE_ONCE(next->locked, 1);
695 		if (_Q_SPIN_MISO)
696 			asm volatile("miso" ::: "memory");
697 	}
698 
699 release:
700 	qnodesp->count--; /* release the node */
701 }
702 
703 void queued_spin_lock_slowpath(struct qspinlock *lock)
704 {
705 	/*
706 	 * This looks funny, but it induces the compiler to inline both
707 	 * sides of the branch rather than share code as when the condition
708 	 * is passed as the paravirt argument to the functions.
709 	 */
710 	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
711 		if (try_to_steal_lock(lock, true)) {
712 			spec_barrier();
713 			return;
714 		}
715 		queued_spin_lock_mcs_queue(lock, true);
716 	} else {
717 		if (try_to_steal_lock(lock, false)) {
718 			spec_barrier();
719 			return;
720 		}
721 		queued_spin_lock_mcs_queue(lock, false);
722 	}
723 }
724 EXPORT_SYMBOL(queued_spin_lock_slowpath);
725 
726 #ifdef CONFIG_PARAVIRT_SPINLOCKS
727 void pv_spinlocks_init(void)
728 {
729 }
730 #endif
731 
732 #include <linux/debugfs.h>
733 static int steal_spins_set(void *data, u64 val)
734 {
735 #if _Q_SPIN_TRY_LOCK_STEAL == 1
736 	/* MAYBE_STEAL remains true */
737 	steal_spins = val;
738 #else
739 	static DEFINE_MUTEX(lock);
740 
741 	/*
742 	 * The lock slow path has a !maybe_stealers case that can assume
743 	 * the head of queue will not see concurrent waiters. That waiter
744 	 * is unsafe in the presence of stealers, so must keep them away
745 	 * from one another.
746 	 */
747 
748 	mutex_lock(&lock);
749 	if (val && !steal_spins) {
750 		maybe_stealers = true;
751 		/* wait for queue head waiter to go away */
752 		synchronize_rcu();
753 		steal_spins = val;
754 	} else if (!val && steal_spins) {
755 		steal_spins = val;
756 		/* wait for all possible stealers to go away */
757 		synchronize_rcu();
758 		maybe_stealers = false;
759 	} else {
760 		steal_spins = val;
761 	}
762 	mutex_unlock(&lock);
763 #endif
764 
765 	return 0;
766 }
767 
768 static int steal_spins_get(void *data, u64 *val)
769 {
770 	*val = steal_spins;
771 
772 	return 0;
773 }
774 
775 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
776 
777 static int remote_steal_spins_set(void *data, u64 val)
778 {
779 	remote_steal_spins = val;
780 
781 	return 0;
782 }
783 
784 static int remote_steal_spins_get(void *data, u64 *val)
785 {
786 	*val = remote_steal_spins;
787 
788 	return 0;
789 }
790 
791 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
792 
793 static int head_spins_set(void *data, u64 val)
794 {
795 	head_spins = val;
796 
797 	return 0;
798 }
799 
800 static int head_spins_get(void *data, u64 *val)
801 {
802 	*val = head_spins;
803 
804 	return 0;
805 }
806 
807 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
808 
809 static int pv_yield_owner_set(void *data, u64 val)
810 {
811 	pv_yield_owner = !!val;
812 
813 	return 0;
814 }
815 
816 static int pv_yield_owner_get(void *data, u64 *val)
817 {
818 	*val = pv_yield_owner;
819 
820 	return 0;
821 }
822 
823 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
824 
825 static int pv_yield_allow_steal_set(void *data, u64 val)
826 {
827 	pv_yield_allow_steal = !!val;
828 
829 	return 0;
830 }
831 
832 static int pv_yield_allow_steal_get(void *data, u64 *val)
833 {
834 	*val = pv_yield_allow_steal;
835 
836 	return 0;
837 }
838 
839 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
840 
841 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
842 {
843 	pv_spin_on_preempted_owner = !!val;
844 
845 	return 0;
846 }
847 
848 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
849 {
850 	*val = pv_spin_on_preempted_owner;
851 
852 	return 0;
853 }
854 
855 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
856 
857 static int pv_sleepy_lock_set(void *data, u64 val)
858 {
859 	pv_sleepy_lock = !!val;
860 
861 	return 0;
862 }
863 
864 static int pv_sleepy_lock_get(void *data, u64 *val)
865 {
866 	*val = pv_sleepy_lock;
867 
868 	return 0;
869 }
870 
871 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
872 
873 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
874 {
875 	pv_sleepy_lock_sticky = !!val;
876 
877 	return 0;
878 }
879 
880 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
881 {
882 	*val = pv_sleepy_lock_sticky;
883 
884 	return 0;
885 }
886 
887 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
888 
889 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
890 {
891 	pv_sleepy_lock_interval_ns = val;
892 
893 	return 0;
894 }
895 
896 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
897 {
898 	*val = pv_sleepy_lock_interval_ns;
899 
900 	return 0;
901 }
902 
903 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
904 
905 static int pv_sleepy_lock_factor_set(void *data, u64 val)
906 {
907 	pv_sleepy_lock_factor = val;
908 
909 	return 0;
910 }
911 
912 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
913 {
914 	*val = pv_sleepy_lock_factor;
915 
916 	return 0;
917 }
918 
919 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
920 
921 static int pv_yield_prev_set(void *data, u64 val)
922 {
923 	pv_yield_prev = !!val;
924 
925 	return 0;
926 }
927 
928 static int pv_yield_prev_get(void *data, u64 *val)
929 {
930 	*val = pv_yield_prev;
931 
932 	return 0;
933 }
934 
935 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
936 
937 static int pv_yield_sleepy_owner_set(void *data, u64 val)
938 {
939 	pv_yield_sleepy_owner = !!val;
940 
941 	return 0;
942 }
943 
944 static int pv_yield_sleepy_owner_get(void *data, u64 *val)
945 {
946 	*val = pv_yield_sleepy_owner;
947 
948 	return 0;
949 }
950 
951 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
952 
953 static int pv_prod_head_set(void *data, u64 val)
954 {
955 	pv_prod_head = !!val;
956 
957 	return 0;
958 }
959 
960 static int pv_prod_head_get(void *data, u64 *val)
961 {
962 	*val = pv_prod_head;
963 
964 	return 0;
965 }
966 
967 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
968 
969 static __init int spinlock_debugfs_init(void)
970 {
971 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
972 	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
973 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
974 	if (is_shared_processor()) {
975 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
976 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
977 		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
978 		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
979 		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
980 		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
981 		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
982 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
983 		debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
984 		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
985 	}
986 
987 	return 0;
988 }
989 device_initcall(spinlock_debugfs_init);
990