1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/bug.h>
3 #include <linux/compiler.h>
4 #include <linux/export.h>
5 #include <linux/percpu.h>
6 #include <linux/processor.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/sched/clock.h>
10 #include <asm/qspinlock.h>
11 #include <asm/paravirt.h>
12 #include <trace/events/lock.h>
13
14 #define MAX_NODES 4
15
16 struct qnode {
17 struct qnode *next;
18 struct qspinlock *lock;
19 int cpu;
20 u8 sleepy; /* 1 if the previous vCPU was preempted or
21 * if the previous node was sleepy */
22 u8 locked; /* 1 if lock acquired */
23 };
24
25 struct qnodes {
26 int count;
27 struct qnode nodes[MAX_NODES];
28 };
29
30 /* Tuning parameters */
31 static int steal_spins __read_mostly = (1 << 5);
32 static int remote_steal_spins __read_mostly = (1 << 2);
33 #if _Q_SPIN_TRY_LOCK_STEAL == 1
34 static const bool maybe_stealers = true;
35 #else
36 static bool maybe_stealers __read_mostly = true;
37 #endif
38 static int head_spins __read_mostly = (1 << 8);
39
40 static bool pv_yield_owner __read_mostly = true;
41 static bool pv_yield_allow_steal __read_mostly = false;
42 static bool pv_spin_on_preempted_owner __read_mostly = false;
43 static bool pv_sleepy_lock __read_mostly = true;
44 static bool pv_sleepy_lock_sticky __read_mostly = false;
45 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
46 static int pv_sleepy_lock_factor __read_mostly = 256;
47 static bool pv_yield_prev __read_mostly = true;
48 static bool pv_yield_sleepy_owner __read_mostly = true;
49 static bool pv_prod_head __read_mostly = false;
50
51 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
52 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
53
54 #if _Q_SPIN_SPEC_BARRIER == 1
55 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
56 #else
57 #define spec_barrier() do { } while (0)
58 #endif
59
recently_sleepy(void)60 static __always_inline bool recently_sleepy(void)
61 {
62 /* pv_sleepy_lock is true when this is called */
63 if (pv_sleepy_lock_interval_ns) {
64 u64 seen = this_cpu_read(sleepy_lock_seen_clock);
65
66 if (seen) {
67 u64 delta = sched_clock() - seen;
68 if (delta < pv_sleepy_lock_interval_ns)
69 return true;
70 this_cpu_write(sleepy_lock_seen_clock, 0);
71 }
72 }
73
74 return false;
75 }
76
get_steal_spins(bool paravirt,bool sleepy)77 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
78 {
79 if (paravirt && sleepy)
80 return steal_spins * pv_sleepy_lock_factor;
81 else
82 return steal_spins;
83 }
84
get_remote_steal_spins(bool paravirt,bool sleepy)85 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
86 {
87 if (paravirt && sleepy)
88 return remote_steal_spins * pv_sleepy_lock_factor;
89 else
90 return remote_steal_spins;
91 }
92
get_head_spins(bool paravirt,bool sleepy)93 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
94 {
95 if (paravirt && sleepy)
96 return head_spins * pv_sleepy_lock_factor;
97 else
98 return head_spins;
99 }
100
encode_tail_cpu(int cpu)101 static inline u32 encode_tail_cpu(int cpu)
102 {
103 return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
104 }
105
decode_tail_cpu(u32 val)106 static inline int decode_tail_cpu(u32 val)
107 {
108 return (val >> _Q_TAIL_CPU_OFFSET) - 1;
109 }
110
get_owner_cpu(u32 val)111 static inline int get_owner_cpu(u32 val)
112 {
113 return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
114 }
115
116 /*
117 * Try to acquire the lock if it was not already locked. If the tail matches
118 * mytail then clear it, otherwise leave it unchnaged. Return previous value.
119 *
120 * This is used by the head of the queue to acquire the lock and clean up
121 * its tail if it was the last one queued.
122 */
trylock_clean_tail(struct qspinlock * lock,u32 tail)123 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
124 {
125 u32 newval = queued_spin_encode_locked_val();
126 u32 prev, tmp;
127
128 asm volatile(
129 "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
130 /* This test is necessary if there could be stealers */
131 " andi. %1,%0,%5 \n"
132 " bne 3f \n"
133 /* Test whether the lock tail == mytail */
134 " and %1,%0,%6 \n"
135 " cmpw 0,%1,%3 \n"
136 /* Merge the new locked value */
137 " or %1,%1,%4 \n"
138 " bne 2f \n"
139 /* If the lock tail matched, then clear it, otherwise leave it. */
140 " andc %1,%1,%6 \n"
141 "2: stwcx. %1,0,%2 \n"
142 " bne- 1b \n"
143 "\t" PPC_ACQUIRE_BARRIER " \n"
144 "3: \n"
145 : "=&r" (prev), "=&r" (tmp)
146 : "r" (&lock->val), "r"(tail), "r" (newval),
147 "i" (_Q_LOCKED_VAL),
148 "r" (_Q_TAIL_CPU_MASK),
149 "i" (_Q_SPIN_EH_HINT)
150 : "cr0", "memory");
151
152 return prev;
153 }
154
155 /*
156 * Publish our tail, replacing previous tail. Return previous value.
157 *
158 * This provides a release barrier for publishing node, this pairs with the
159 * acquire barrier in get_tail_qnode() when the next CPU finds this tail
160 * value.
161 */
publish_tail_cpu(struct qspinlock * lock,u32 tail)162 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
163 {
164 u32 prev, tmp;
165
166 kcsan_release();
167
168 asm volatile(
169 "\t" PPC_RELEASE_BARRIER " \n"
170 "1: lwarx %0,0,%2 # publish_tail_cpu \n"
171 " andc %1,%0,%4 \n"
172 " or %1,%1,%3 \n"
173 " stwcx. %1,0,%2 \n"
174 " bne- 1b \n"
175 : "=&r" (prev), "=&r"(tmp)
176 : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
177 : "cr0", "memory");
178
179 return prev;
180 }
181
set_mustq(struct qspinlock * lock)182 static __always_inline u32 set_mustq(struct qspinlock *lock)
183 {
184 u32 prev;
185
186 asm volatile(
187 "1: lwarx %0,0,%1 # set_mustq \n"
188 " or %0,%0,%2 \n"
189 " stwcx. %0,0,%1 \n"
190 " bne- 1b \n"
191 : "=&r" (prev)
192 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
193 : "cr0", "memory");
194
195 return prev;
196 }
197
clear_mustq(struct qspinlock * lock)198 static __always_inline u32 clear_mustq(struct qspinlock *lock)
199 {
200 u32 prev;
201
202 asm volatile(
203 "1: lwarx %0,0,%1 # clear_mustq \n"
204 " andc %0,%0,%2 \n"
205 " stwcx. %0,0,%1 \n"
206 " bne- 1b \n"
207 : "=&r" (prev)
208 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
209 : "cr0", "memory");
210
211 return prev;
212 }
213
try_set_sleepy(struct qspinlock * lock,u32 old)214 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
215 {
216 u32 prev;
217 u32 new = old | _Q_SLEEPY_VAL;
218
219 BUG_ON(!(old & _Q_LOCKED_VAL));
220 BUG_ON(old & _Q_SLEEPY_VAL);
221
222 asm volatile(
223 "1: lwarx %0,0,%1 # try_set_sleepy \n"
224 " cmpw 0,%0,%2 \n"
225 " bne- 2f \n"
226 " stwcx. %3,0,%1 \n"
227 " bne- 1b \n"
228 "2: \n"
229 : "=&r" (prev)
230 : "r" (&lock->val), "r"(old), "r" (new)
231 : "cr0", "memory");
232
233 return likely(prev == old);
234 }
235
seen_sleepy_owner(struct qspinlock * lock,u32 val)236 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
237 {
238 if (pv_sleepy_lock) {
239 if (pv_sleepy_lock_interval_ns)
240 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
241 if (!(val & _Q_SLEEPY_VAL))
242 try_set_sleepy(lock, val);
243 }
244 }
245
seen_sleepy_lock(void)246 static __always_inline void seen_sleepy_lock(void)
247 {
248 if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
249 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
250 }
251
seen_sleepy_node(void)252 static __always_inline void seen_sleepy_node(void)
253 {
254 if (pv_sleepy_lock) {
255 if (pv_sleepy_lock_interval_ns)
256 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
257 /* Don't set sleepy because we likely have a stale val */
258 }
259 }
260
get_tail_qnode(struct qspinlock * lock,int prev_cpu)261 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
262 {
263 struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
264 int idx;
265
266 /*
267 * After publishing the new tail and finding a previous tail in the
268 * previous val (which is the control dependency), this barrier
269 * orders the release barrier in publish_tail_cpu performed by the
270 * last CPU, with subsequently looking at its qnode structures
271 * after the barrier.
272 */
273 smp_acquire__after_ctrl_dep();
274
275 for (idx = 0; idx < MAX_NODES; idx++) {
276 struct qnode *qnode = &qnodesp->nodes[idx];
277 if (qnode->lock == lock)
278 return qnode;
279 }
280
281 BUG();
282 }
283
284 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
__yield_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt,bool mustq)285 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
286 {
287 int owner;
288 u32 yield_count;
289 bool preempted = false;
290
291 BUG_ON(!(val & _Q_LOCKED_VAL));
292
293 if (!paravirt)
294 goto relax;
295
296 if (!pv_yield_owner)
297 goto relax;
298
299 owner = get_owner_cpu(val);
300 yield_count = yield_count_of(owner);
301
302 if ((yield_count & 1) == 0)
303 goto relax; /* owner vcpu is running */
304
305 spin_end();
306
307 seen_sleepy_owner(lock, val);
308 preempted = true;
309
310 /*
311 * Read the lock word after sampling the yield count. On the other side
312 * there may a wmb because the yield count update is done by the
313 * hypervisor preemption and the value update by the OS, however this
314 * ordering might reduce the chance of out of order accesses and
315 * improve the heuristic.
316 */
317 smp_rmb();
318
319 if (READ_ONCE(lock->val) == val) {
320 if (mustq)
321 clear_mustq(lock);
322 yield_to_preempted(owner, yield_count);
323 if (mustq)
324 set_mustq(lock);
325 spin_begin();
326
327 /* Don't relax if we yielded. Maybe we should? */
328 return preempted;
329 }
330 spin_begin();
331 relax:
332 spin_cpu_relax();
333
334 return preempted;
335 }
336
337 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
yield_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt)338 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
339 {
340 return __yield_to_locked_owner(lock, val, paravirt, false);
341 }
342
343 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
yield_head_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt)344 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
345 {
346 bool mustq = false;
347
348 if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
349 mustq = true;
350
351 return __yield_to_locked_owner(lock, val, paravirt, mustq);
352 }
353
propagate_sleepy(struct qnode * node,u32 val,bool paravirt)354 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
355 {
356 struct qnode *next;
357 int owner;
358
359 if (!paravirt)
360 return;
361 if (!pv_yield_sleepy_owner)
362 return;
363
364 next = READ_ONCE(node->next);
365 if (!next)
366 return;
367
368 if (next->sleepy)
369 return;
370
371 owner = get_owner_cpu(val);
372 if (vcpu_is_preempted(owner))
373 next->sleepy = 1;
374 }
375
376 /* Called inside spin_begin() */
yield_to_prev(struct qspinlock * lock,struct qnode * node,int prev_cpu,bool paravirt)377 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
378 {
379 u32 yield_count;
380 bool preempted = false;
381
382 if (!paravirt)
383 goto relax;
384
385 if (!pv_yield_sleepy_owner)
386 goto yield_prev;
387
388 /*
389 * If the previous waiter was preempted it might not be able to
390 * propagate sleepy to us, so check the lock in that case too.
391 */
392 if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
393 u32 val = READ_ONCE(lock->val);
394
395 if (val & _Q_LOCKED_VAL) {
396 if (node->next && !node->next->sleepy) {
397 /*
398 * Propagate sleepy to next waiter. Only if
399 * owner is preempted, which allows the queue
400 * to become "non-sleepy" if vCPU preemption
401 * ceases to occur, even if the lock remains
402 * highly contended.
403 */
404 if (vcpu_is_preempted(get_owner_cpu(val)))
405 node->next->sleepy = 1;
406 }
407
408 preempted = yield_to_locked_owner(lock, val, paravirt);
409 if (preempted)
410 return preempted;
411 }
412 node->sleepy = false;
413 }
414
415 yield_prev:
416 if (!pv_yield_prev)
417 goto relax;
418
419 yield_count = yield_count_of(prev_cpu);
420 if ((yield_count & 1) == 0)
421 goto relax; /* owner vcpu is running */
422
423 spin_end();
424
425 preempted = true;
426 seen_sleepy_node();
427
428 smp_rmb(); /* See __yield_to_locked_owner comment */
429
430 if (!READ_ONCE(node->locked)) {
431 yield_to_preempted(prev_cpu, yield_count);
432 spin_begin();
433 return preempted;
434 }
435 spin_begin();
436
437 relax:
438 spin_cpu_relax();
439
440 return preempted;
441 }
442
steal_break(u32 val,int iters,bool paravirt,bool sleepy)443 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
444 {
445 if (iters >= get_steal_spins(paravirt, sleepy))
446 return true;
447
448 if (IS_ENABLED(CONFIG_NUMA) &&
449 (iters >= get_remote_steal_spins(paravirt, sleepy))) {
450 int cpu = get_owner_cpu(val);
451 if (numa_node_id() != cpu_to_node(cpu))
452 return true;
453 }
454 return false;
455 }
456
try_to_steal_lock(struct qspinlock * lock,bool paravirt)457 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
458 {
459 bool seen_preempted = false;
460 bool sleepy = false;
461 int iters = 0;
462 u32 val;
463
464 if (!steal_spins) {
465 /* XXX: should spin_on_preempted_owner do anything here? */
466 return false;
467 }
468
469 /* Attempt to steal the lock */
470 spin_begin();
471 do {
472 bool preempted = false;
473
474 val = READ_ONCE(lock->val);
475 if (val & _Q_MUST_Q_VAL)
476 break;
477 spec_barrier();
478
479 if (unlikely(!(val & _Q_LOCKED_VAL))) {
480 spin_end();
481 if (__queued_spin_trylock_steal(lock))
482 return true;
483 spin_begin();
484 } else {
485 preempted = yield_to_locked_owner(lock, val, paravirt);
486 }
487
488 if (paravirt && pv_sleepy_lock) {
489 if (!sleepy) {
490 if (val & _Q_SLEEPY_VAL) {
491 seen_sleepy_lock();
492 sleepy = true;
493 } else if (recently_sleepy()) {
494 sleepy = true;
495 }
496 }
497 if (pv_sleepy_lock_sticky && seen_preempted &&
498 !(val & _Q_SLEEPY_VAL)) {
499 if (try_set_sleepy(lock, val))
500 val |= _Q_SLEEPY_VAL;
501 }
502 }
503
504 if (preempted) {
505 seen_preempted = true;
506 sleepy = true;
507 if (!pv_spin_on_preempted_owner)
508 iters++;
509 /*
510 * pv_spin_on_preempted_owner don't increase iters
511 * while the owner is preempted -- we won't interfere
512 * with it by definition. This could introduce some
513 * latency issue if we continually observe preempted
514 * owners, but hopefully that's a rare corner case of
515 * a badly oversubscribed system.
516 */
517 } else {
518 iters++;
519 }
520 } while (!steal_break(val, iters, paravirt, sleepy));
521
522 spin_end();
523
524 return false;
525 }
526
queued_spin_lock_mcs_queue(struct qspinlock * lock,bool paravirt)527 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
528 {
529 struct qnodes *qnodesp;
530 struct qnode *next, *node;
531 u32 val, old, tail;
532 bool seen_preempted = false;
533 bool sleepy = false;
534 bool mustq = false;
535 int idx;
536 int iters = 0;
537
538 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
539
540 qnodesp = this_cpu_ptr(&qnodes);
541 if (unlikely(qnodesp->count >= MAX_NODES)) {
542 spec_barrier();
543 while (!queued_spin_trylock(lock))
544 cpu_relax();
545 return;
546 }
547
548 idx = qnodesp->count++;
549 /*
550 * Ensure that we increment the head node->count before initialising
551 * the actual node. If the compiler is kind enough to reorder these
552 * stores, then an IRQ could overwrite our assignments.
553 */
554 barrier();
555 node = &qnodesp->nodes[idx];
556 node->next = NULL;
557 node->lock = lock;
558 node->cpu = smp_processor_id();
559 node->sleepy = 0;
560 node->locked = 0;
561
562 tail = encode_tail_cpu(node->cpu);
563
564 /*
565 * Assign all attributes of a node before it can be published.
566 * Issues an lwsync, serving as a release barrier, as well as a
567 * compiler barrier.
568 */
569 old = publish_tail_cpu(lock, tail);
570
571 /*
572 * If there was a previous node; link it and wait until reaching the
573 * head of the waitqueue.
574 */
575 if (old & _Q_TAIL_CPU_MASK) {
576 int prev_cpu = decode_tail_cpu(old);
577 struct qnode *prev = get_tail_qnode(lock, prev_cpu);
578
579 /* Link @node into the waitqueue. */
580 WRITE_ONCE(prev->next, node);
581
582 /* Wait for mcs node lock to be released */
583 spin_begin();
584 while (!READ_ONCE(node->locked)) {
585 spec_barrier();
586
587 if (yield_to_prev(lock, node, prev_cpu, paravirt))
588 seen_preempted = true;
589 }
590 spec_barrier();
591 spin_end();
592
593 smp_rmb(); /* acquire barrier for the mcs lock */
594
595 /*
596 * Generic qspinlocks have this prefetch here, but it seems
597 * like it could cause additional line transitions because
598 * the waiter will keep loading from it.
599 */
600 if (_Q_SPIN_PREFETCH_NEXT) {
601 next = READ_ONCE(node->next);
602 if (next)
603 prefetchw(next);
604 }
605 }
606
607 /* We're at the head of the waitqueue, wait for the lock. */
608 again:
609 spin_begin();
610 for (;;) {
611 bool preempted;
612
613 val = READ_ONCE(lock->val);
614 if (!(val & _Q_LOCKED_VAL))
615 break;
616 spec_barrier();
617
618 if (paravirt && pv_sleepy_lock && maybe_stealers) {
619 if (!sleepy) {
620 if (val & _Q_SLEEPY_VAL) {
621 seen_sleepy_lock();
622 sleepy = true;
623 } else if (recently_sleepy()) {
624 sleepy = true;
625 }
626 }
627 if (pv_sleepy_lock_sticky && seen_preempted &&
628 !(val & _Q_SLEEPY_VAL)) {
629 if (try_set_sleepy(lock, val))
630 val |= _Q_SLEEPY_VAL;
631 }
632 }
633
634 propagate_sleepy(node, val, paravirt);
635 preempted = yield_head_to_locked_owner(lock, val, paravirt);
636 if (!maybe_stealers)
637 continue;
638
639 if (preempted)
640 seen_preempted = true;
641
642 if (paravirt && preempted) {
643 sleepy = true;
644
645 if (!pv_spin_on_preempted_owner)
646 iters++;
647 } else {
648 iters++;
649 }
650
651 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
652 mustq = true;
653 set_mustq(lock);
654 val |= _Q_MUST_Q_VAL;
655 }
656 }
657 spec_barrier();
658 spin_end();
659
660 /* If we're the last queued, must clean up the tail. */
661 old = trylock_clean_tail(lock, tail);
662 if (unlikely(old & _Q_LOCKED_VAL)) {
663 BUG_ON(!maybe_stealers);
664 goto again; /* Can only be true if maybe_stealers. */
665 }
666
667 if ((old & _Q_TAIL_CPU_MASK) == tail)
668 goto release; /* We were the tail, no next. */
669
670 /* There is a next, must wait for node->next != NULL (MCS protocol) */
671 next = READ_ONCE(node->next);
672 if (!next) {
673 spin_begin();
674 while (!(next = READ_ONCE(node->next)))
675 cpu_relax();
676 spin_end();
677 }
678 spec_barrier();
679
680 /*
681 * Unlock the next mcs waiter node. Release barrier is not required
682 * here because the acquirer is only accessing the lock word, and
683 * the acquire barrier we took the lock with orders that update vs
684 * this store to locked. The corresponding barrier is the smp_rmb()
685 * acquire barrier for mcs lock, above.
686 */
687 if (paravirt && pv_prod_head) {
688 int next_cpu = next->cpu;
689 WRITE_ONCE(next->locked, 1);
690 if (_Q_SPIN_MISO)
691 asm volatile("miso" ::: "memory");
692 if (vcpu_is_preempted(next_cpu))
693 prod_cpu(next_cpu);
694 } else {
695 WRITE_ONCE(next->locked, 1);
696 if (_Q_SPIN_MISO)
697 asm volatile("miso" ::: "memory");
698 }
699
700 release:
701 /*
702 * Clear the lock before releasing the node, as another CPU might see stale
703 * values if an interrupt occurs after we increment qnodesp->count
704 * but before node->lock is initialized. The barrier ensures that
705 * there are no further stores to the node after it has been released.
706 */
707 node->lock = NULL;
708 barrier();
709 qnodesp->count--;
710 }
711
queued_spin_lock_slowpath(struct qspinlock * lock)712 void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
713 {
714 trace_contention_begin(lock, LCB_F_SPIN);
715 /*
716 * This looks funny, but it induces the compiler to inline both
717 * sides of the branch rather than share code as when the condition
718 * is passed as the paravirt argument to the functions.
719 */
720 if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
721 if (try_to_steal_lock(lock, true))
722 spec_barrier();
723 else
724 queued_spin_lock_mcs_queue(lock, true);
725 } else {
726 if (try_to_steal_lock(lock, false))
727 spec_barrier();
728 else
729 queued_spin_lock_mcs_queue(lock, false);
730 }
731 trace_contention_end(lock, 0);
732 }
733 EXPORT_SYMBOL(queued_spin_lock_slowpath);
734
735 #ifdef CONFIG_PARAVIRT_SPINLOCKS
pv_spinlocks_init(void)736 void pv_spinlocks_init(void)
737 {
738 }
739 #endif
740
741 #include <linux/debugfs.h>
steal_spins_set(void * data,u64 val)742 static int steal_spins_set(void *data, u64 val)
743 {
744 #if _Q_SPIN_TRY_LOCK_STEAL == 1
745 /* MAYBE_STEAL remains true */
746 steal_spins = val;
747 #else
748 static DEFINE_MUTEX(lock);
749
750 /*
751 * The lock slow path has a !maybe_stealers case that can assume
752 * the head of queue will not see concurrent waiters. That waiter
753 * is unsafe in the presence of stealers, so must keep them away
754 * from one another.
755 */
756
757 mutex_lock(&lock);
758 if (val && !steal_spins) {
759 maybe_stealers = true;
760 /* wait for queue head waiter to go away */
761 synchronize_rcu();
762 steal_spins = val;
763 } else if (!val && steal_spins) {
764 steal_spins = val;
765 /* wait for all possible stealers to go away */
766 synchronize_rcu();
767 maybe_stealers = false;
768 } else {
769 steal_spins = val;
770 }
771 mutex_unlock(&lock);
772 #endif
773
774 return 0;
775 }
776
steal_spins_get(void * data,u64 * val)777 static int steal_spins_get(void *data, u64 *val)
778 {
779 *val = steal_spins;
780
781 return 0;
782 }
783
784 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
785
remote_steal_spins_set(void * data,u64 val)786 static int remote_steal_spins_set(void *data, u64 val)
787 {
788 remote_steal_spins = val;
789
790 return 0;
791 }
792
remote_steal_spins_get(void * data,u64 * val)793 static int remote_steal_spins_get(void *data, u64 *val)
794 {
795 *val = remote_steal_spins;
796
797 return 0;
798 }
799
800 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
801
head_spins_set(void * data,u64 val)802 static int head_spins_set(void *data, u64 val)
803 {
804 head_spins = val;
805
806 return 0;
807 }
808
head_spins_get(void * data,u64 * val)809 static int head_spins_get(void *data, u64 *val)
810 {
811 *val = head_spins;
812
813 return 0;
814 }
815
816 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
817
pv_yield_owner_set(void * data,u64 val)818 static int pv_yield_owner_set(void *data, u64 val)
819 {
820 pv_yield_owner = !!val;
821
822 return 0;
823 }
824
pv_yield_owner_get(void * data,u64 * val)825 static int pv_yield_owner_get(void *data, u64 *val)
826 {
827 *val = pv_yield_owner;
828
829 return 0;
830 }
831
832 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
833
pv_yield_allow_steal_set(void * data,u64 val)834 static int pv_yield_allow_steal_set(void *data, u64 val)
835 {
836 pv_yield_allow_steal = !!val;
837
838 return 0;
839 }
840
pv_yield_allow_steal_get(void * data,u64 * val)841 static int pv_yield_allow_steal_get(void *data, u64 *val)
842 {
843 *val = pv_yield_allow_steal;
844
845 return 0;
846 }
847
848 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
849
pv_spin_on_preempted_owner_set(void * data,u64 val)850 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
851 {
852 pv_spin_on_preempted_owner = !!val;
853
854 return 0;
855 }
856
pv_spin_on_preempted_owner_get(void * data,u64 * val)857 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
858 {
859 *val = pv_spin_on_preempted_owner;
860
861 return 0;
862 }
863
864 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
865
pv_sleepy_lock_set(void * data,u64 val)866 static int pv_sleepy_lock_set(void *data, u64 val)
867 {
868 pv_sleepy_lock = !!val;
869
870 return 0;
871 }
872
pv_sleepy_lock_get(void * data,u64 * val)873 static int pv_sleepy_lock_get(void *data, u64 *val)
874 {
875 *val = pv_sleepy_lock;
876
877 return 0;
878 }
879
880 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
881
pv_sleepy_lock_sticky_set(void * data,u64 val)882 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
883 {
884 pv_sleepy_lock_sticky = !!val;
885
886 return 0;
887 }
888
pv_sleepy_lock_sticky_get(void * data,u64 * val)889 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
890 {
891 *val = pv_sleepy_lock_sticky;
892
893 return 0;
894 }
895
896 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
897
pv_sleepy_lock_interval_ns_set(void * data,u64 val)898 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
899 {
900 pv_sleepy_lock_interval_ns = val;
901
902 return 0;
903 }
904
pv_sleepy_lock_interval_ns_get(void * data,u64 * val)905 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
906 {
907 *val = pv_sleepy_lock_interval_ns;
908
909 return 0;
910 }
911
912 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
913
pv_sleepy_lock_factor_set(void * data,u64 val)914 static int pv_sleepy_lock_factor_set(void *data, u64 val)
915 {
916 pv_sleepy_lock_factor = val;
917
918 return 0;
919 }
920
pv_sleepy_lock_factor_get(void * data,u64 * val)921 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
922 {
923 *val = pv_sleepy_lock_factor;
924
925 return 0;
926 }
927
928 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
929
pv_yield_prev_set(void * data,u64 val)930 static int pv_yield_prev_set(void *data, u64 val)
931 {
932 pv_yield_prev = !!val;
933
934 return 0;
935 }
936
pv_yield_prev_get(void * data,u64 * val)937 static int pv_yield_prev_get(void *data, u64 *val)
938 {
939 *val = pv_yield_prev;
940
941 return 0;
942 }
943
944 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
945
pv_yield_sleepy_owner_set(void * data,u64 val)946 static int pv_yield_sleepy_owner_set(void *data, u64 val)
947 {
948 pv_yield_sleepy_owner = !!val;
949
950 return 0;
951 }
952
pv_yield_sleepy_owner_get(void * data,u64 * val)953 static int pv_yield_sleepy_owner_get(void *data, u64 *val)
954 {
955 *val = pv_yield_sleepy_owner;
956
957 return 0;
958 }
959
960 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
961
pv_prod_head_set(void * data,u64 val)962 static int pv_prod_head_set(void *data, u64 val)
963 {
964 pv_prod_head = !!val;
965
966 return 0;
967 }
968
pv_prod_head_get(void * data,u64 * val)969 static int pv_prod_head_get(void *data, u64 *val)
970 {
971 *val = pv_prod_head;
972
973 return 0;
974 }
975
976 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
977
spinlock_debugfs_init(void)978 static __init int spinlock_debugfs_init(void)
979 {
980 debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
981 debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
982 debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
983 if (is_shared_processor()) {
984 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
985 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
986 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
987 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
988 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
989 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
990 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
991 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
992 debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
993 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
994 }
995
996 return 0;
997 }
998 device_initcall(spinlock_debugfs_init);
999