1 // SPDX-License-Identifier: GPL-2.0-or-later 2 #include <linux/bug.h> 3 #include <linux/compiler.h> 4 #include <linux/export.h> 5 #include <linux/percpu.h> 6 #include <linux/processor.h> 7 #include <linux/smp.h> 8 #include <linux/topology.h> 9 #include <linux/sched/clock.h> 10 #include <asm/qspinlock.h> 11 #include <asm/paravirt.h> 12 #include <trace/events/lock.h> 13 14 #define MAX_NODES 4 15 16 struct qnode { 17 struct qnode *next; 18 struct qspinlock *lock; 19 int cpu; 20 u8 sleepy; /* 1 if the previous vCPU was preempted or 21 * if the previous node was sleepy */ 22 u8 locked; /* 1 if lock acquired */ 23 }; 24 25 struct qnodes { 26 int count; 27 struct qnode nodes[MAX_NODES]; 28 }; 29 30 /* Tuning parameters */ 31 static int steal_spins __read_mostly = (1 << 5); 32 static int remote_steal_spins __read_mostly = (1 << 2); 33 #if _Q_SPIN_TRY_LOCK_STEAL == 1 34 static const bool maybe_stealers = true; 35 #else 36 static bool maybe_stealers __read_mostly = true; 37 #endif 38 static int head_spins __read_mostly = (1 << 8); 39 40 static bool pv_yield_owner __read_mostly = true; 41 static bool pv_yield_allow_steal __read_mostly = false; 42 static bool pv_spin_on_preempted_owner __read_mostly = false; 43 static bool pv_sleepy_lock __read_mostly = true; 44 static bool pv_sleepy_lock_sticky __read_mostly = false; 45 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; 46 static int pv_sleepy_lock_factor __read_mostly = 256; 47 static bool pv_yield_prev __read_mostly = true; 48 static bool pv_yield_sleepy_owner __read_mostly = true; 49 static bool pv_prod_head __read_mostly = false; 50 51 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); 52 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); 53 54 #if _Q_SPIN_SPEC_BARRIER == 1 55 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) 56 #else 57 #define spec_barrier() do { } while (0) 58 #endif 59 60 static __always_inline bool recently_sleepy(void) 61 { 62 /* pv_sleepy_lock is true when this is called */ 63 if (pv_sleepy_lock_interval_ns) { 64 u64 seen = this_cpu_read(sleepy_lock_seen_clock); 65 66 if (seen) { 67 u64 delta = sched_clock() - seen; 68 if (delta < pv_sleepy_lock_interval_ns) 69 return true; 70 this_cpu_write(sleepy_lock_seen_clock, 0); 71 } 72 } 73 74 return false; 75 } 76 77 static __always_inline int get_steal_spins(bool paravirt, bool sleepy) 78 { 79 if (paravirt && sleepy) 80 return steal_spins * pv_sleepy_lock_factor; 81 else 82 return steal_spins; 83 } 84 85 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) 86 { 87 if (paravirt && sleepy) 88 return remote_steal_spins * pv_sleepy_lock_factor; 89 else 90 return remote_steal_spins; 91 } 92 93 static __always_inline int get_head_spins(bool paravirt, bool sleepy) 94 { 95 if (paravirt && sleepy) 96 return head_spins * pv_sleepy_lock_factor; 97 else 98 return head_spins; 99 } 100 101 static inline u32 encode_tail_cpu(int cpu) 102 { 103 return (cpu + 1) << _Q_TAIL_CPU_OFFSET; 104 } 105 106 static inline int decode_tail_cpu(u32 val) 107 { 108 return (val >> _Q_TAIL_CPU_OFFSET) - 1; 109 } 110 111 static inline int get_owner_cpu(u32 val) 112 { 113 return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; 114 } 115 116 /* 117 * Try to acquire the lock if it was not already locked. If the tail matches 118 * mytail then clear it, otherwise leave it unchnaged. Return previous value. 119 * 120 * This is used by the head of the queue to acquire the lock and clean up 121 * its tail if it was the last one queued. 122 */ 123 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) 124 { 125 u32 newval = queued_spin_encode_locked_val(); 126 u32 prev, tmp; 127 128 asm volatile( 129 "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" 130 /* This test is necessary if there could be stealers */ 131 " andi. %1,%0,%5 \n" 132 " bne 3f \n" 133 /* Test whether the lock tail == mytail */ 134 " and %1,%0,%6 \n" 135 " cmpw 0,%1,%3 \n" 136 /* Merge the new locked value */ 137 " or %1,%1,%4 \n" 138 " bne 2f \n" 139 /* If the lock tail matched, then clear it, otherwise leave it. */ 140 " andc %1,%1,%6 \n" 141 "2: stwcx. %1,0,%2 \n" 142 " bne- 1b \n" 143 "\t" PPC_ACQUIRE_BARRIER " \n" 144 "3: \n" 145 : "=&r" (prev), "=&r" (tmp) 146 : "r" (&lock->val), "r"(tail), "r" (newval), 147 "i" (_Q_LOCKED_VAL), 148 "r" (_Q_TAIL_CPU_MASK), 149 "i" (_Q_SPIN_EH_HINT) 150 : "cr0", "memory"); 151 152 return prev; 153 } 154 155 /* 156 * Publish our tail, replacing previous tail. Return previous value. 157 * 158 * This provides a release barrier for publishing node, this pairs with the 159 * acquire barrier in get_tail_qnode() when the next CPU finds this tail 160 * value. 161 */ 162 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) 163 { 164 u32 prev, tmp; 165 166 kcsan_release(); 167 168 asm volatile( 169 "\t" PPC_RELEASE_BARRIER " \n" 170 "1: lwarx %0,0,%2 # publish_tail_cpu \n" 171 " andc %1,%0,%4 \n" 172 " or %1,%1,%3 \n" 173 " stwcx. %1,0,%2 \n" 174 " bne- 1b \n" 175 : "=&r" (prev), "=&r"(tmp) 176 : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) 177 : "cr0", "memory"); 178 179 return prev; 180 } 181 182 static __always_inline u32 set_mustq(struct qspinlock *lock) 183 { 184 u32 prev; 185 186 asm volatile( 187 "1: lwarx %0,0,%1 # set_mustq \n" 188 " or %0,%0,%2 \n" 189 " stwcx. %0,0,%1 \n" 190 " bne- 1b \n" 191 : "=&r" (prev) 192 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) 193 : "cr0", "memory"); 194 195 return prev; 196 } 197 198 static __always_inline u32 clear_mustq(struct qspinlock *lock) 199 { 200 u32 prev; 201 202 asm volatile( 203 "1: lwarx %0,0,%1 # clear_mustq \n" 204 " andc %0,%0,%2 \n" 205 " stwcx. %0,0,%1 \n" 206 " bne- 1b \n" 207 : "=&r" (prev) 208 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) 209 : "cr0", "memory"); 210 211 return prev; 212 } 213 214 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) 215 { 216 u32 prev; 217 u32 new = old | _Q_SLEEPY_VAL; 218 219 BUG_ON(!(old & _Q_LOCKED_VAL)); 220 BUG_ON(old & _Q_SLEEPY_VAL); 221 222 asm volatile( 223 "1: lwarx %0,0,%1 # try_set_sleepy \n" 224 " cmpw 0,%0,%2 \n" 225 " bne- 2f \n" 226 " stwcx. %3,0,%1 \n" 227 " bne- 1b \n" 228 "2: \n" 229 : "=&r" (prev) 230 : "r" (&lock->val), "r"(old), "r" (new) 231 : "cr0", "memory"); 232 233 return likely(prev == old); 234 } 235 236 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) 237 { 238 if (pv_sleepy_lock) { 239 if (pv_sleepy_lock_interval_ns) 240 this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 241 if (!(val & _Q_SLEEPY_VAL)) 242 try_set_sleepy(lock, val); 243 } 244 } 245 246 static __always_inline void seen_sleepy_lock(void) 247 { 248 if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) 249 this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 250 } 251 252 static __always_inline void seen_sleepy_node(void) 253 { 254 if (pv_sleepy_lock) { 255 if (pv_sleepy_lock_interval_ns) 256 this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 257 /* Don't set sleepy because we likely have a stale val */ 258 } 259 } 260 261 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) 262 { 263 struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); 264 int idx; 265 266 /* 267 * After publishing the new tail and finding a previous tail in the 268 * previous val (which is the control dependency), this barrier 269 * orders the release barrier in publish_tail_cpu performed by the 270 * last CPU, with subsequently looking at its qnode structures 271 * after the barrier. 272 */ 273 smp_acquire__after_ctrl_dep(); 274 275 for (idx = 0; idx < MAX_NODES; idx++) { 276 struct qnode *qnode = &qnodesp->nodes[idx]; 277 if (qnode->lock == lock) 278 return qnode; 279 } 280 281 BUG(); 282 } 283 284 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 285 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) 286 { 287 int owner; 288 u32 yield_count; 289 bool preempted = false; 290 291 BUG_ON(!(val & _Q_LOCKED_VAL)); 292 293 if (!paravirt) 294 goto relax; 295 296 if (!pv_yield_owner) 297 goto relax; 298 299 owner = get_owner_cpu(val); 300 yield_count = yield_count_of(owner); 301 302 if ((yield_count & 1) == 0) 303 goto relax; /* owner vcpu is running */ 304 305 spin_end(); 306 307 seen_sleepy_owner(lock, val); 308 preempted = true; 309 310 /* 311 * Read the lock word after sampling the yield count. On the other side 312 * there may a wmb because the yield count update is done by the 313 * hypervisor preemption and the value update by the OS, however this 314 * ordering might reduce the chance of out of order accesses and 315 * improve the heuristic. 316 */ 317 smp_rmb(); 318 319 if (READ_ONCE(lock->val) == val) { 320 if (mustq) 321 clear_mustq(lock); 322 yield_to_preempted(owner, yield_count); 323 if (mustq) 324 set_mustq(lock); 325 spin_begin(); 326 327 /* Don't relax if we yielded. Maybe we should? */ 328 return preempted; 329 } 330 spin_begin(); 331 relax: 332 spin_cpu_relax(); 333 334 return preempted; 335 } 336 337 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 338 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) 339 { 340 return __yield_to_locked_owner(lock, val, paravirt, false); 341 } 342 343 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 344 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) 345 { 346 bool mustq = false; 347 348 if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) 349 mustq = true; 350 351 return __yield_to_locked_owner(lock, val, paravirt, mustq); 352 } 353 354 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) 355 { 356 struct qnode *next; 357 int owner; 358 359 if (!paravirt) 360 return; 361 if (!pv_yield_sleepy_owner) 362 return; 363 364 next = READ_ONCE(node->next); 365 if (!next) 366 return; 367 368 if (next->sleepy) 369 return; 370 371 owner = get_owner_cpu(val); 372 if (vcpu_is_preempted(owner)) 373 next->sleepy = 1; 374 } 375 376 /* Called inside spin_begin() */ 377 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) 378 { 379 u32 yield_count; 380 bool preempted = false; 381 382 if (!paravirt) 383 goto relax; 384 385 if (!pv_yield_sleepy_owner) 386 goto yield_prev; 387 388 /* 389 * If the previous waiter was preempted it might not be able to 390 * propagate sleepy to us, so check the lock in that case too. 391 */ 392 if (node->sleepy || vcpu_is_preempted(prev_cpu)) { 393 u32 val = READ_ONCE(lock->val); 394 395 if (val & _Q_LOCKED_VAL) { 396 if (node->next && !node->next->sleepy) { 397 /* 398 * Propagate sleepy to next waiter. Only if 399 * owner is preempted, which allows the queue 400 * to become "non-sleepy" if vCPU preemption 401 * ceases to occur, even if the lock remains 402 * highly contended. 403 */ 404 if (vcpu_is_preempted(get_owner_cpu(val))) 405 node->next->sleepy = 1; 406 } 407 408 preempted = yield_to_locked_owner(lock, val, paravirt); 409 if (preempted) 410 return preempted; 411 } 412 node->sleepy = false; 413 } 414 415 yield_prev: 416 if (!pv_yield_prev) 417 goto relax; 418 419 yield_count = yield_count_of(prev_cpu); 420 if ((yield_count & 1) == 0) 421 goto relax; /* owner vcpu is running */ 422 423 spin_end(); 424 425 preempted = true; 426 seen_sleepy_node(); 427 428 smp_rmb(); /* See __yield_to_locked_owner comment */ 429 430 if (!READ_ONCE(node->locked)) { 431 yield_to_preempted(prev_cpu, yield_count); 432 spin_begin(); 433 return preempted; 434 } 435 spin_begin(); 436 437 relax: 438 spin_cpu_relax(); 439 440 return preempted; 441 } 442 443 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) 444 { 445 if (iters >= get_steal_spins(paravirt, sleepy)) 446 return true; 447 448 if (IS_ENABLED(CONFIG_NUMA) && 449 (iters >= get_remote_steal_spins(paravirt, sleepy))) { 450 int cpu = get_owner_cpu(val); 451 if (numa_node_id() != cpu_to_node(cpu)) 452 return true; 453 } 454 return false; 455 } 456 457 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) 458 { 459 bool seen_preempted = false; 460 bool sleepy = false; 461 int iters = 0; 462 u32 val; 463 464 if (!steal_spins) { 465 /* XXX: should spin_on_preempted_owner do anything here? */ 466 return false; 467 } 468 469 /* Attempt to steal the lock */ 470 spin_begin(); 471 do { 472 bool preempted = false; 473 474 val = READ_ONCE(lock->val); 475 if (val & _Q_MUST_Q_VAL) 476 break; 477 spec_barrier(); 478 479 if (unlikely(!(val & _Q_LOCKED_VAL))) { 480 spin_end(); 481 if (__queued_spin_trylock_steal(lock)) 482 return true; 483 spin_begin(); 484 } else { 485 preempted = yield_to_locked_owner(lock, val, paravirt); 486 } 487 488 if (paravirt && pv_sleepy_lock) { 489 if (!sleepy) { 490 if (val & _Q_SLEEPY_VAL) { 491 seen_sleepy_lock(); 492 sleepy = true; 493 } else if (recently_sleepy()) { 494 sleepy = true; 495 } 496 } 497 if (pv_sleepy_lock_sticky && seen_preempted && 498 !(val & _Q_SLEEPY_VAL)) { 499 if (try_set_sleepy(lock, val)) 500 val |= _Q_SLEEPY_VAL; 501 } 502 } 503 504 if (preempted) { 505 seen_preempted = true; 506 sleepy = true; 507 if (!pv_spin_on_preempted_owner) 508 iters++; 509 /* 510 * pv_spin_on_preempted_owner don't increase iters 511 * while the owner is preempted -- we won't interfere 512 * with it by definition. This could introduce some 513 * latency issue if we continually observe preempted 514 * owners, but hopefully that's a rare corner case of 515 * a badly oversubscribed system. 516 */ 517 } else { 518 iters++; 519 } 520 } while (!steal_break(val, iters, paravirt, sleepy)); 521 522 spin_end(); 523 524 return false; 525 } 526 527 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) 528 { 529 struct qnodes *qnodesp; 530 struct qnode *next, *node; 531 u32 val, old, tail; 532 bool seen_preempted = false; 533 bool sleepy = false; 534 bool mustq = false; 535 int idx; 536 int iters = 0; 537 538 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); 539 540 qnodesp = this_cpu_ptr(&qnodes); 541 if (unlikely(qnodesp->count >= MAX_NODES)) { 542 spec_barrier(); 543 while (!queued_spin_trylock(lock)) 544 cpu_relax(); 545 return; 546 } 547 548 idx = qnodesp->count++; 549 /* 550 * Ensure that we increment the head node->count before initialising 551 * the actual node. If the compiler is kind enough to reorder these 552 * stores, then an IRQ could overwrite our assignments. 553 */ 554 barrier(); 555 node = &qnodesp->nodes[idx]; 556 node->next = NULL; 557 node->lock = lock; 558 node->cpu = smp_processor_id(); 559 node->sleepy = 0; 560 node->locked = 0; 561 562 tail = encode_tail_cpu(node->cpu); 563 564 /* 565 * Assign all attributes of a node before it can be published. 566 * Issues an lwsync, serving as a release barrier, as well as a 567 * compiler barrier. 568 */ 569 old = publish_tail_cpu(lock, tail); 570 571 /* 572 * If there was a previous node; link it and wait until reaching the 573 * head of the waitqueue. 574 */ 575 if (old & _Q_TAIL_CPU_MASK) { 576 int prev_cpu = decode_tail_cpu(old); 577 struct qnode *prev = get_tail_qnode(lock, prev_cpu); 578 579 /* Link @node into the waitqueue. */ 580 WRITE_ONCE(prev->next, node); 581 582 /* Wait for mcs node lock to be released */ 583 spin_begin(); 584 while (!READ_ONCE(node->locked)) { 585 spec_barrier(); 586 587 if (yield_to_prev(lock, node, prev_cpu, paravirt)) 588 seen_preempted = true; 589 } 590 spec_barrier(); 591 spin_end(); 592 593 smp_rmb(); /* acquire barrier for the mcs lock */ 594 595 /* 596 * Generic qspinlocks have this prefetch here, but it seems 597 * like it could cause additional line transitions because 598 * the waiter will keep loading from it. 599 */ 600 if (_Q_SPIN_PREFETCH_NEXT) { 601 next = READ_ONCE(node->next); 602 if (next) 603 prefetchw(next); 604 } 605 } 606 607 /* We're at the head of the waitqueue, wait for the lock. */ 608 again: 609 spin_begin(); 610 for (;;) { 611 bool preempted; 612 613 val = READ_ONCE(lock->val); 614 if (!(val & _Q_LOCKED_VAL)) 615 break; 616 spec_barrier(); 617 618 if (paravirt && pv_sleepy_lock && maybe_stealers) { 619 if (!sleepy) { 620 if (val & _Q_SLEEPY_VAL) { 621 seen_sleepy_lock(); 622 sleepy = true; 623 } else if (recently_sleepy()) { 624 sleepy = true; 625 } 626 } 627 if (pv_sleepy_lock_sticky && seen_preempted && 628 !(val & _Q_SLEEPY_VAL)) { 629 if (try_set_sleepy(lock, val)) 630 val |= _Q_SLEEPY_VAL; 631 } 632 } 633 634 propagate_sleepy(node, val, paravirt); 635 preempted = yield_head_to_locked_owner(lock, val, paravirt); 636 if (!maybe_stealers) 637 continue; 638 639 if (preempted) 640 seen_preempted = true; 641 642 if (paravirt && preempted) { 643 sleepy = true; 644 645 if (!pv_spin_on_preempted_owner) 646 iters++; 647 } else { 648 iters++; 649 } 650 651 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { 652 mustq = true; 653 set_mustq(lock); 654 val |= _Q_MUST_Q_VAL; 655 } 656 } 657 spec_barrier(); 658 spin_end(); 659 660 /* If we're the last queued, must clean up the tail. */ 661 old = trylock_clean_tail(lock, tail); 662 if (unlikely(old & _Q_LOCKED_VAL)) { 663 BUG_ON(!maybe_stealers); 664 goto again; /* Can only be true if maybe_stealers. */ 665 } 666 667 if ((old & _Q_TAIL_CPU_MASK) == tail) 668 goto release; /* We were the tail, no next. */ 669 670 /* There is a next, must wait for node->next != NULL (MCS protocol) */ 671 next = READ_ONCE(node->next); 672 if (!next) { 673 spin_begin(); 674 while (!(next = READ_ONCE(node->next))) 675 cpu_relax(); 676 spin_end(); 677 } 678 spec_barrier(); 679 680 /* 681 * Unlock the next mcs waiter node. Release barrier is not required 682 * here because the acquirer is only accessing the lock word, and 683 * the acquire barrier we took the lock with orders that update vs 684 * this store to locked. The corresponding barrier is the smp_rmb() 685 * acquire barrier for mcs lock, above. 686 */ 687 if (paravirt && pv_prod_head) { 688 int next_cpu = next->cpu; 689 WRITE_ONCE(next->locked, 1); 690 if (_Q_SPIN_MISO) 691 asm volatile("miso" ::: "memory"); 692 if (vcpu_is_preempted(next_cpu)) 693 prod_cpu(next_cpu); 694 } else { 695 WRITE_ONCE(next->locked, 1); 696 if (_Q_SPIN_MISO) 697 asm volatile("miso" ::: "memory"); 698 } 699 700 release: 701 /* 702 * Clear the lock before releasing the node, as another CPU might see stale 703 * values if an interrupt occurs after we increment qnodesp->count 704 * but before node->lock is initialized. The barrier ensures that 705 * there are no further stores to the node after it has been released. 706 */ 707 node->lock = NULL; 708 barrier(); 709 qnodesp->count--; 710 } 711 712 void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock) 713 { 714 trace_contention_begin(lock, LCB_F_SPIN); 715 /* 716 * This looks funny, but it induces the compiler to inline both 717 * sides of the branch rather than share code as when the condition 718 * is passed as the paravirt argument to the functions. 719 */ 720 if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { 721 if (try_to_steal_lock(lock, true)) 722 spec_barrier(); 723 else 724 queued_spin_lock_mcs_queue(lock, true); 725 } else { 726 if (try_to_steal_lock(lock, false)) 727 spec_barrier(); 728 else 729 queued_spin_lock_mcs_queue(lock, false); 730 } 731 trace_contention_end(lock, 0); 732 } 733 EXPORT_SYMBOL(queued_spin_lock_slowpath); 734 735 #ifdef CONFIG_PARAVIRT_SPINLOCKS 736 void pv_spinlocks_init(void) 737 { 738 } 739 #endif 740 741 #include <linux/debugfs.h> 742 static int steal_spins_set(void *data, u64 val) 743 { 744 #if _Q_SPIN_TRY_LOCK_STEAL == 1 745 /* MAYBE_STEAL remains true */ 746 steal_spins = val; 747 #else 748 static DEFINE_MUTEX(lock); 749 750 /* 751 * The lock slow path has a !maybe_stealers case that can assume 752 * the head of queue will not see concurrent waiters. That waiter 753 * is unsafe in the presence of stealers, so must keep them away 754 * from one another. 755 */ 756 757 mutex_lock(&lock); 758 if (val && !steal_spins) { 759 maybe_stealers = true; 760 /* wait for queue head waiter to go away */ 761 synchronize_rcu(); 762 steal_spins = val; 763 } else if (!val && steal_spins) { 764 steal_spins = val; 765 /* wait for all possible stealers to go away */ 766 synchronize_rcu(); 767 maybe_stealers = false; 768 } else { 769 steal_spins = val; 770 } 771 mutex_unlock(&lock); 772 #endif 773 774 return 0; 775 } 776 777 static int steal_spins_get(void *data, u64 *val) 778 { 779 *val = steal_spins; 780 781 return 0; 782 } 783 784 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); 785 786 static int remote_steal_spins_set(void *data, u64 val) 787 { 788 remote_steal_spins = val; 789 790 return 0; 791 } 792 793 static int remote_steal_spins_get(void *data, u64 *val) 794 { 795 *val = remote_steal_spins; 796 797 return 0; 798 } 799 800 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); 801 802 static int head_spins_set(void *data, u64 val) 803 { 804 head_spins = val; 805 806 return 0; 807 } 808 809 static int head_spins_get(void *data, u64 *val) 810 { 811 *val = head_spins; 812 813 return 0; 814 } 815 816 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); 817 818 static int pv_yield_owner_set(void *data, u64 val) 819 { 820 pv_yield_owner = !!val; 821 822 return 0; 823 } 824 825 static int pv_yield_owner_get(void *data, u64 *val) 826 { 827 *val = pv_yield_owner; 828 829 return 0; 830 } 831 832 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); 833 834 static int pv_yield_allow_steal_set(void *data, u64 val) 835 { 836 pv_yield_allow_steal = !!val; 837 838 return 0; 839 } 840 841 static int pv_yield_allow_steal_get(void *data, u64 *val) 842 { 843 *val = pv_yield_allow_steal; 844 845 return 0; 846 } 847 848 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); 849 850 static int pv_spin_on_preempted_owner_set(void *data, u64 val) 851 { 852 pv_spin_on_preempted_owner = !!val; 853 854 return 0; 855 } 856 857 static int pv_spin_on_preempted_owner_get(void *data, u64 *val) 858 { 859 *val = pv_spin_on_preempted_owner; 860 861 return 0; 862 } 863 864 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); 865 866 static int pv_sleepy_lock_set(void *data, u64 val) 867 { 868 pv_sleepy_lock = !!val; 869 870 return 0; 871 } 872 873 static int pv_sleepy_lock_get(void *data, u64 *val) 874 { 875 *val = pv_sleepy_lock; 876 877 return 0; 878 } 879 880 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); 881 882 static int pv_sleepy_lock_sticky_set(void *data, u64 val) 883 { 884 pv_sleepy_lock_sticky = !!val; 885 886 return 0; 887 } 888 889 static int pv_sleepy_lock_sticky_get(void *data, u64 *val) 890 { 891 *val = pv_sleepy_lock_sticky; 892 893 return 0; 894 } 895 896 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); 897 898 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) 899 { 900 pv_sleepy_lock_interval_ns = val; 901 902 return 0; 903 } 904 905 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) 906 { 907 *val = pv_sleepy_lock_interval_ns; 908 909 return 0; 910 } 911 912 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); 913 914 static int pv_sleepy_lock_factor_set(void *data, u64 val) 915 { 916 pv_sleepy_lock_factor = val; 917 918 return 0; 919 } 920 921 static int pv_sleepy_lock_factor_get(void *data, u64 *val) 922 { 923 *val = pv_sleepy_lock_factor; 924 925 return 0; 926 } 927 928 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); 929 930 static int pv_yield_prev_set(void *data, u64 val) 931 { 932 pv_yield_prev = !!val; 933 934 return 0; 935 } 936 937 static int pv_yield_prev_get(void *data, u64 *val) 938 { 939 *val = pv_yield_prev; 940 941 return 0; 942 } 943 944 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); 945 946 static int pv_yield_sleepy_owner_set(void *data, u64 val) 947 { 948 pv_yield_sleepy_owner = !!val; 949 950 return 0; 951 } 952 953 static int pv_yield_sleepy_owner_get(void *data, u64 *val) 954 { 955 *val = pv_yield_sleepy_owner; 956 957 return 0; 958 } 959 960 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); 961 962 static int pv_prod_head_set(void *data, u64 val) 963 { 964 pv_prod_head = !!val; 965 966 return 0; 967 } 968 969 static int pv_prod_head_get(void *data, u64 *val) 970 { 971 *val = pv_prod_head; 972 973 return 0; 974 } 975 976 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); 977 978 static __init int spinlock_debugfs_init(void) 979 { 980 debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); 981 debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); 982 debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); 983 if (is_shared_processor()) { 984 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); 985 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); 986 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); 987 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); 988 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); 989 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); 990 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); 991 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); 992 debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); 993 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); 994 } 995 996 return 0; 997 } 998 device_initcall(spinlock_debugfs_init); 999