1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 44 #include <sys/kernel.h> 45 #include <sys/kdb.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/proc.h> 50 #include <sys/rmlock.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/turnstile.h> 54 #include <sys/lock_profile.h> 55 #include <machine/cpu.h> 56 #include <vm/uma.h> 57 58 #ifdef DDB 59 #include <ddb/ddb.h> 60 #endif 61 62 /* 63 * A cookie to mark destroyed rmlocks. This is stored in the head of 64 * rm_activeReaders. 65 */ 66 #define RM_DESTROYED ((void *)0xdead) 67 68 #define rm_destroyed(rm) \ 69 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 70 71 #define RMPF_ONQUEUE 1 72 #define RMPF_SIGNAL 2 73 74 #ifndef INVARIANTS 75 #define _rm_assert(c, what, file, line) 76 #endif 77 78 static void assert_rm(const struct lock_object *lock, int what); 79 #ifdef DDB 80 static void db_show_rm(const struct lock_object *lock); 81 #endif 82 static void lock_rm(struct lock_object *lock, uintptr_t how); 83 #ifdef KDTRACE_HOOKS 84 static int owner_rm(const struct lock_object *lock, struct thread **owner); 85 #endif 86 static uintptr_t unlock_rm(struct lock_object *lock); 87 88 struct lock_class lock_class_rm = { 89 .lc_name = "rm", 90 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 91 .lc_assert = assert_rm, 92 #ifdef DDB 93 .lc_ddb_show = db_show_rm, 94 #endif 95 .lc_lock = lock_rm, 96 .lc_unlock = unlock_rm, 97 #ifdef KDTRACE_HOOKS 98 .lc_owner = owner_rm, 99 #endif 100 }; 101 102 struct lock_class lock_class_rm_sleepable = { 103 .lc_name = "sleepable rm", 104 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 105 .lc_assert = assert_rm, 106 #ifdef DDB 107 .lc_ddb_show = db_show_rm, 108 #endif 109 .lc_lock = lock_rm, 110 .lc_unlock = unlock_rm, 111 #ifdef KDTRACE_HOOKS 112 .lc_owner = owner_rm, 113 #endif 114 }; 115 116 static void 117 assert_rm(const struct lock_object *lock, int what) 118 { 119 120 rm_assert((const struct rmlock *)lock, what); 121 } 122 123 static void 124 lock_rm(struct lock_object *lock, uintptr_t how) 125 { 126 struct rmlock *rm; 127 struct rm_priotracker *tracker; 128 129 rm = (struct rmlock *)lock; 130 if (how == 0) 131 rm_wlock(rm); 132 else { 133 tracker = (struct rm_priotracker *)how; 134 rm_rlock(rm, tracker); 135 } 136 } 137 138 static uintptr_t 139 unlock_rm(struct lock_object *lock) 140 { 141 struct thread *td; 142 struct pcpu *pc; 143 struct rmlock *rm; 144 struct rm_queue *queue; 145 struct rm_priotracker *tracker; 146 uintptr_t how; 147 148 rm = (struct rmlock *)lock; 149 tracker = NULL; 150 how = 0; 151 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 152 if (rm_wowned(rm)) 153 rm_wunlock(rm); 154 else { 155 /* 156 * Find the right rm_priotracker structure for curthread. 157 * The guarantee about its uniqueness is given by the fact 158 * we already asserted the lock wasn't recursively acquired. 159 */ 160 critical_enter(); 161 td = curthread; 162 pc = get_pcpu(); 163 for (queue = pc->pc_rm_queue.rmq_next; 164 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 165 tracker = (struct rm_priotracker *)queue; 166 if ((tracker->rmp_rmlock == rm) && 167 (tracker->rmp_thread == td)) { 168 how = (uintptr_t)tracker; 169 break; 170 } 171 } 172 KASSERT(tracker != NULL, 173 ("rm_priotracker is non-NULL when lock held in read mode")); 174 critical_exit(); 175 rm_runlock(rm, tracker); 176 } 177 return (how); 178 } 179 180 #ifdef KDTRACE_HOOKS 181 static int 182 owner_rm(const struct lock_object *lock, struct thread **owner) 183 { 184 const struct rmlock *rm; 185 struct lock_class *lc; 186 187 rm = (const struct rmlock *)lock; 188 lc = LOCK_CLASS(&rm->rm_wlock_object); 189 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 190 } 191 #endif 192 193 static struct mtx rm_spinlock; 194 195 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 196 197 /* 198 * Add or remove tracker from per-cpu list. 199 * 200 * The per-cpu list can be traversed at any time in forward direction from an 201 * interrupt on the *local* cpu. 202 */ 203 static void inline 204 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 205 { 206 struct rm_queue *next; 207 208 /* Initialize all tracker pointers */ 209 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 210 next = pc->pc_rm_queue.rmq_next; 211 tracker->rmp_cpuQueue.rmq_next = next; 212 213 /* rmq_prev is not used during froward traversal. */ 214 next->rmq_prev = &tracker->rmp_cpuQueue; 215 216 /* Update pointer to first element. */ 217 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 218 } 219 220 /* 221 * Return a count of the number of trackers the thread 'td' already 222 * has on this CPU for the lock 'rm'. 223 */ 224 static int 225 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 226 const struct thread *td) 227 { 228 struct rm_queue *queue; 229 struct rm_priotracker *tracker; 230 int count; 231 232 count = 0; 233 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 234 queue = queue->rmq_next) { 235 tracker = (struct rm_priotracker *)queue; 236 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 237 count++; 238 } 239 return (count); 240 } 241 242 static void inline 243 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 244 { 245 struct rm_queue *next, *prev; 246 247 next = tracker->rmp_cpuQueue.rmq_next; 248 prev = tracker->rmp_cpuQueue.rmq_prev; 249 250 /* Not used during forward traversal. */ 251 next->rmq_prev = prev; 252 253 /* Remove from list. */ 254 prev->rmq_next = next; 255 } 256 257 static void 258 rm_cleanIPI(void *arg) 259 { 260 struct pcpu *pc; 261 struct rmlock *rm = arg; 262 struct rm_priotracker *tracker; 263 struct rm_queue *queue; 264 pc = get_pcpu(); 265 266 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 267 queue = queue->rmq_next) { 268 tracker = (struct rm_priotracker *)queue; 269 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 270 tracker->rmp_flags = RMPF_ONQUEUE; 271 mtx_lock_spin(&rm_spinlock); 272 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 273 rmp_qentry); 274 mtx_unlock_spin(&rm_spinlock); 275 } 276 } 277 } 278 279 void 280 rm_init_flags(struct rmlock *rm, const char *name, int opts) 281 { 282 struct lock_class *lc; 283 int liflags, xflags; 284 285 liflags = 0; 286 if (!(opts & RM_NOWITNESS)) 287 liflags |= LO_WITNESS; 288 if (opts & RM_RECURSE) 289 liflags |= LO_RECURSABLE; 290 if (opts & RM_NEW) 291 liflags |= LO_NEW; 292 if (opts & RM_DUPOK) 293 liflags |= LO_DUPOK; 294 rm->rm_writecpus = all_cpus; 295 LIST_INIT(&rm->rm_activeReaders); 296 if (opts & RM_SLEEPABLE) { 297 liflags |= LO_SLEEPABLE; 298 lc = &lock_class_rm_sleepable; 299 xflags = (opts & RM_NEW ? SX_NEW : 0); 300 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 301 xflags | SX_NOWITNESS); 302 } else { 303 lc = &lock_class_rm; 304 xflags = (opts & RM_NEW ? MTX_NEW : 0); 305 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 306 xflags | MTX_NOWITNESS); 307 } 308 lock_init(&rm->lock_object, lc, name, NULL, liflags); 309 } 310 311 void 312 rm_init(struct rmlock *rm, const char *name) 313 { 314 315 rm_init_flags(rm, name, 0); 316 } 317 318 void 319 rm_destroy(struct rmlock *rm) 320 { 321 322 rm_assert(rm, RA_UNLOCKED); 323 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 324 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 325 sx_destroy(&rm->rm_lock_sx); 326 else 327 mtx_destroy(&rm->rm_lock_mtx); 328 lock_destroy(&rm->lock_object); 329 } 330 331 int 332 rm_wowned(const struct rmlock *rm) 333 { 334 335 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 336 return (sx_xlocked(&rm->rm_lock_sx)); 337 else 338 return (mtx_owned(&rm->rm_lock_mtx)); 339 } 340 341 void 342 rm_sysinit(void *arg) 343 { 344 struct rm_args *args; 345 346 args = arg; 347 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 348 } 349 350 static __noinline int 351 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 352 { 353 struct pcpu *pc; 354 355 critical_enter(); 356 pc = get_pcpu(); 357 358 /* Check if we just need to do a proper critical_exit. */ 359 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 360 critical_exit(); 361 return (1); 362 } 363 364 /* Remove our tracker from the per-cpu list. */ 365 rm_tracker_remove(pc, tracker); 366 367 /* 368 * Check to see if the IPI granted us the lock after all. The load of 369 * rmp_flags must happen after the tracker is removed from the list. 370 */ 371 atomic_interrupt_fence(); 372 if (tracker->rmp_flags) { 373 /* Just add back tracker - we hold the lock. */ 374 rm_tracker_add(pc, tracker); 375 critical_exit(); 376 return (1); 377 } 378 379 /* 380 * We allow readers to acquire a lock even if a writer is blocked if 381 * the lock is recursive and the reader already holds the lock. 382 */ 383 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 384 /* 385 * Just grant the lock if this thread already has a tracker 386 * for this lock on the per-cpu queue. 387 */ 388 if (rm_trackers_present(pc, rm, curthread) != 0) { 389 mtx_lock_spin(&rm_spinlock); 390 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 391 rmp_qentry); 392 tracker->rmp_flags = RMPF_ONQUEUE; 393 mtx_unlock_spin(&rm_spinlock); 394 rm_tracker_add(pc, tracker); 395 critical_exit(); 396 return (1); 397 } 398 } 399 400 sched_unpin(); 401 critical_exit(); 402 403 if (trylock) { 404 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 405 if (!sx_try_xlock(&rm->rm_lock_sx)) 406 return (0); 407 } else { 408 if (!mtx_trylock(&rm->rm_lock_mtx)) 409 return (0); 410 } 411 } else { 412 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 413 THREAD_SLEEPING_OK(); 414 sx_xlock(&rm->rm_lock_sx); 415 THREAD_NO_SLEEPING(); 416 } else 417 mtx_lock(&rm->rm_lock_mtx); 418 } 419 420 critical_enter(); 421 pc = get_pcpu(); 422 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 423 rm_tracker_add(pc, tracker); 424 sched_pin(); 425 critical_exit(); 426 427 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 428 sx_xunlock(&rm->rm_lock_sx); 429 else 430 mtx_unlock(&rm->rm_lock_mtx); 431 432 return (1); 433 } 434 435 int 436 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 437 { 438 struct thread *td = curthread; 439 struct pcpu *pc; 440 441 if (SCHEDULER_STOPPED()) 442 return (1); 443 444 tracker->rmp_flags = 0; 445 tracker->rmp_thread = td; 446 tracker->rmp_rmlock = rm; 447 448 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 449 THREAD_NO_SLEEPING(); 450 451 td->td_critnest++; /* critical_enter(); */ 452 453 atomic_interrupt_fence(); 454 455 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 456 457 rm_tracker_add(pc, tracker); 458 459 sched_pin(); 460 461 atomic_interrupt_fence(); 462 463 td->td_critnest--; 464 465 /* 466 * Fast path to combine two common conditions into a single 467 * conditional jump. 468 */ 469 if (__predict_true(0 == (td->td_owepreempt | 470 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 471 return (1); 472 473 /* We do not have a read token and need to acquire one. */ 474 return _rm_rlock_hard(rm, tracker, trylock); 475 } 476 477 static __noinline void 478 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 479 { 480 481 if (td->td_owepreempt) { 482 td->td_critnest++; 483 critical_exit(); 484 } 485 486 if (!tracker->rmp_flags) 487 return; 488 489 mtx_lock_spin(&rm_spinlock); 490 LIST_REMOVE(tracker, rmp_qentry); 491 492 if (tracker->rmp_flags & RMPF_SIGNAL) { 493 struct rmlock *rm; 494 struct turnstile *ts; 495 496 rm = tracker->rmp_rmlock; 497 498 turnstile_chain_lock(&rm->lock_object); 499 mtx_unlock_spin(&rm_spinlock); 500 501 ts = turnstile_lookup(&rm->lock_object); 502 503 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 504 turnstile_unpend(ts); 505 turnstile_chain_unlock(&rm->lock_object); 506 } else 507 mtx_unlock_spin(&rm_spinlock); 508 } 509 510 void 511 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 512 { 513 struct pcpu *pc; 514 struct thread *td = tracker->rmp_thread; 515 516 if (SCHEDULER_STOPPED()) 517 return; 518 519 td->td_critnest++; /* critical_enter(); */ 520 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 521 rm_tracker_remove(pc, tracker); 522 td->td_critnest--; 523 sched_unpin(); 524 525 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 526 THREAD_SLEEPING_OK(); 527 528 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 529 return; 530 531 _rm_unlock_hard(td, tracker); 532 } 533 534 void 535 _rm_wlock(struct rmlock *rm) 536 { 537 struct rm_priotracker *prio; 538 struct turnstile *ts; 539 cpuset_t readcpus; 540 541 if (SCHEDULER_STOPPED()) 542 return; 543 544 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 545 sx_xlock(&rm->rm_lock_sx); 546 else 547 mtx_lock(&rm->rm_lock_mtx); 548 549 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 550 /* Get all read tokens back */ 551 readcpus = all_cpus; 552 CPU_ANDNOT(&readcpus, &readcpus, &rm->rm_writecpus); 553 rm->rm_writecpus = all_cpus; 554 555 /* 556 * Assumes rm->rm_writecpus update is visible on other CPUs 557 * before rm_cleanIPI is called. 558 */ 559 #ifdef SMP 560 smp_rendezvous_cpus(readcpus, 561 smp_no_rendezvous_barrier, 562 rm_cleanIPI, 563 smp_no_rendezvous_barrier, 564 rm); 565 566 #else 567 rm_cleanIPI(rm); 568 #endif 569 570 mtx_lock_spin(&rm_spinlock); 571 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 572 ts = turnstile_trywait(&rm->lock_object); 573 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 574 mtx_unlock_spin(&rm_spinlock); 575 turnstile_wait(ts, prio->rmp_thread, 576 TS_EXCLUSIVE_QUEUE); 577 mtx_lock_spin(&rm_spinlock); 578 } 579 mtx_unlock_spin(&rm_spinlock); 580 } 581 } 582 583 void 584 _rm_wunlock(struct rmlock *rm) 585 { 586 587 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 588 sx_xunlock(&rm->rm_lock_sx); 589 else 590 mtx_unlock(&rm->rm_lock_mtx); 591 } 592 593 #if LOCK_DEBUG > 0 594 595 void 596 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 597 { 598 599 if (SCHEDULER_STOPPED()) 600 return; 601 602 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 603 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 604 curthread, rm->lock_object.lo_name, file, line)); 605 KASSERT(!rm_destroyed(rm), 606 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 607 _rm_assert(rm, RA_UNLOCKED, file, line); 608 609 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 610 file, line, NULL); 611 612 _rm_wlock(rm); 613 614 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 615 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 616 TD_LOCKS_INC(curthread); 617 } 618 619 void 620 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 621 { 622 623 if (SCHEDULER_STOPPED()) 624 return; 625 626 KASSERT(!rm_destroyed(rm), 627 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 628 _rm_assert(rm, RA_WLOCKED, file, line); 629 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 630 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 631 _rm_wunlock(rm); 632 TD_LOCKS_DEC(curthread); 633 } 634 635 int 636 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 637 int trylock, const char *file, int line) 638 { 639 640 if (SCHEDULER_STOPPED()) 641 return (1); 642 643 #ifdef INVARIANTS 644 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 645 critical_enter(); 646 KASSERT(rm_trackers_present(get_pcpu(), rm, 647 curthread) == 0, 648 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 649 rm->lock_object.lo_name, file, line)); 650 critical_exit(); 651 } 652 #endif 653 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 654 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 655 curthread, rm->lock_object.lo_name, file, line)); 656 KASSERT(!rm_destroyed(rm), 657 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 658 if (!trylock) { 659 KASSERT(!rm_wowned(rm), 660 ("rm_rlock: wlock already held for %s @ %s:%d", 661 rm->lock_object.lo_name, file, line)); 662 WITNESS_CHECKORDER(&rm->lock_object, 663 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 664 } 665 666 if (_rm_rlock(rm, tracker, trylock)) { 667 if (trylock) 668 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 669 line); 670 else 671 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 672 line); 673 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 674 TD_LOCKS_INC(curthread); 675 return (1); 676 } else if (trylock) 677 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 678 679 return (0); 680 } 681 682 void 683 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 684 const char *file, int line) 685 { 686 687 if (SCHEDULER_STOPPED()) 688 return; 689 690 KASSERT(!rm_destroyed(rm), 691 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 692 _rm_assert(rm, RA_RLOCKED, file, line); 693 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 694 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 695 _rm_runlock(rm, tracker); 696 TD_LOCKS_DEC(curthread); 697 } 698 699 #else 700 701 /* 702 * Just strip out file and line arguments if no lock debugging is enabled in 703 * the kernel - we are called from a kernel module. 704 */ 705 void 706 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 707 { 708 709 _rm_wlock(rm); 710 } 711 712 void 713 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 714 { 715 716 _rm_wunlock(rm); 717 } 718 719 int 720 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 721 int trylock, const char *file, int line) 722 { 723 724 return _rm_rlock(rm, tracker, trylock); 725 } 726 727 void 728 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 729 const char *file, int line) 730 { 731 732 _rm_runlock(rm, tracker); 733 } 734 735 #endif 736 737 #ifdef INVARIANT_SUPPORT 738 #ifndef INVARIANTS 739 #undef _rm_assert 740 #endif 741 742 /* 743 * Note that this does not need to use witness_assert() for read lock 744 * assertions since an exact count of read locks held by this thread 745 * is computable. 746 */ 747 void 748 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 749 { 750 int count; 751 752 if (SCHEDULER_STOPPED()) 753 return; 754 switch (what) { 755 case RA_LOCKED: 756 case RA_LOCKED | RA_RECURSED: 757 case RA_LOCKED | RA_NOTRECURSED: 758 case RA_RLOCKED: 759 case RA_RLOCKED | RA_RECURSED: 760 case RA_RLOCKED | RA_NOTRECURSED: 761 /* 762 * Handle the write-locked case. Unlike other 763 * primitives, writers can never recurse. 764 */ 765 if (rm_wowned(rm)) { 766 if (what & RA_RLOCKED) 767 panic("Lock %s exclusively locked @ %s:%d\n", 768 rm->lock_object.lo_name, file, line); 769 if (what & RA_RECURSED) 770 panic("Lock %s not recursed @ %s:%d\n", 771 rm->lock_object.lo_name, file, line); 772 break; 773 } 774 775 critical_enter(); 776 count = rm_trackers_present(get_pcpu(), rm, curthread); 777 critical_exit(); 778 779 if (count == 0) 780 panic("Lock %s not %slocked @ %s:%d\n", 781 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 782 "read " : "", file, line); 783 if (count > 1) { 784 if (what & RA_NOTRECURSED) 785 panic("Lock %s recursed @ %s:%d\n", 786 rm->lock_object.lo_name, file, line); 787 } else if (what & RA_RECURSED) 788 panic("Lock %s not recursed @ %s:%d\n", 789 rm->lock_object.lo_name, file, line); 790 break; 791 case RA_WLOCKED: 792 if (!rm_wowned(rm)) 793 panic("Lock %s not exclusively locked @ %s:%d\n", 794 rm->lock_object.lo_name, file, line); 795 break; 796 case RA_UNLOCKED: 797 if (rm_wowned(rm)) 798 panic("Lock %s exclusively locked @ %s:%d\n", 799 rm->lock_object.lo_name, file, line); 800 801 critical_enter(); 802 count = rm_trackers_present(get_pcpu(), rm, curthread); 803 critical_exit(); 804 805 if (count != 0) 806 panic("Lock %s read locked @ %s:%d\n", 807 rm->lock_object.lo_name, file, line); 808 break; 809 default: 810 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 811 line); 812 } 813 } 814 #endif /* INVARIANT_SUPPORT */ 815 816 #ifdef DDB 817 static void 818 print_tracker(struct rm_priotracker *tr) 819 { 820 struct thread *td; 821 822 td = tr->rmp_thread; 823 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 824 td->td_proc->p_pid, td->td_name); 825 if (tr->rmp_flags & RMPF_ONQUEUE) { 826 db_printf("ONQUEUE"); 827 if (tr->rmp_flags & RMPF_SIGNAL) 828 db_printf(",SIGNAL"); 829 } else 830 db_printf("0"); 831 db_printf("}\n"); 832 } 833 834 static void 835 db_show_rm(const struct lock_object *lock) 836 { 837 struct rm_priotracker *tr; 838 struct rm_queue *queue; 839 const struct rmlock *rm; 840 struct lock_class *lc; 841 struct pcpu *pc; 842 843 rm = (const struct rmlock *)lock; 844 db_printf(" writecpus: "); 845 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 846 db_printf("\n"); 847 db_printf(" per-CPU readers:\n"); 848 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 849 for (queue = pc->pc_rm_queue.rmq_next; 850 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 851 tr = (struct rm_priotracker *)queue; 852 if (tr->rmp_rmlock == rm) 853 print_tracker(tr); 854 } 855 db_printf(" active readers:\n"); 856 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 857 print_tracker(tr); 858 lc = LOCK_CLASS(&rm->rm_wlock_object); 859 db_printf("Backing write-lock (%s):\n", lc->lc_name); 860 lc->lc_ddb_show(&rm->rm_wlock_object); 861 } 862 #endif 863 864 /* 865 * Read-mostly sleepable locks. 866 * 867 * These primitives allow both readers and writers to sleep. However, neither 868 * readers nor writers are tracked and subsequently there is no priority 869 * propagation. 870 * 871 * They are intended to be only used when write-locking is almost never needed 872 * (e.g., they can guard against unloading a kernel module) while read-locking 873 * happens all the time. 874 * 875 * Concurrent writers take turns taking the lock while going off cpu. If this is 876 * of concern for your usecase, this is not the right primitive. 877 * 878 * Neither rms_rlock nor rms_runlock use thread fences. Instead interrupt 879 * fences are inserted to ensure ordering with the code executed in the IPI 880 * handler. 881 * 882 * No attempt is made to track which CPUs read locked at least once, 883 * consequently write locking sends IPIs to all of them. This will become a 884 * problem at some point. The easiest way to lessen it is to provide a bitmap. 885 */ 886 887 #define RMS_NOOWNER ((void *)0x1) 888 #define RMS_TRANSIENT ((void *)0x2) 889 #define RMS_FLAGMASK 0xf 890 891 struct rmslock_pcpu { 892 int influx; 893 int readers; 894 }; 895 896 _Static_assert(sizeof(struct rmslock_pcpu) == 8, "bad size"); 897 898 /* 899 * Internal routines 900 */ 901 static struct rmslock_pcpu * 902 rms_int_pcpu(struct rmslock *rms) 903 { 904 905 CRITICAL_ASSERT(curthread); 906 return (zpcpu_get(rms->pcpu)); 907 } 908 909 static struct rmslock_pcpu * 910 rms_int_remote_pcpu(struct rmslock *rms, int cpu) 911 { 912 913 return (zpcpu_get_cpu(rms->pcpu, cpu)); 914 } 915 916 static void 917 rms_int_influx_enter(struct rmslock *rms, struct rmslock_pcpu *pcpu) 918 { 919 920 CRITICAL_ASSERT(curthread); 921 MPASS(pcpu->influx == 0); 922 pcpu->influx = 1; 923 } 924 925 static void 926 rms_int_influx_exit(struct rmslock *rms, struct rmslock_pcpu *pcpu) 927 { 928 929 CRITICAL_ASSERT(curthread); 930 MPASS(pcpu->influx == 1); 931 pcpu->influx = 0; 932 } 933 934 #ifdef INVARIANTS 935 static void 936 rms_int_debug_readers_inc(struct rmslock *rms) 937 { 938 int old; 939 old = atomic_fetchadd_int(&rms->debug_readers, 1); 940 KASSERT(old >= 0, ("%s: bad readers count %d\n", __func__, old)); 941 } 942 943 static void 944 rms_int_debug_readers_dec(struct rmslock *rms) 945 { 946 int old; 947 948 old = atomic_fetchadd_int(&rms->debug_readers, -1); 949 KASSERT(old > 0, ("%s: bad readers count %d\n", __func__, old)); 950 } 951 #else 952 static void 953 rms_int_debug_readers_inc(struct rmslock *rms) 954 { 955 } 956 957 static void 958 rms_int_debug_readers_dec(struct rmslock *rms) 959 { 960 } 961 #endif 962 963 static void 964 rms_int_readers_inc(struct rmslock *rms, struct rmslock_pcpu *pcpu) 965 { 966 967 CRITICAL_ASSERT(curthread); 968 rms_int_debug_readers_inc(rms); 969 pcpu->readers++; 970 } 971 972 static void 973 rms_int_readers_dec(struct rmslock *rms, struct rmslock_pcpu *pcpu) 974 { 975 976 CRITICAL_ASSERT(curthread); 977 rms_int_debug_readers_dec(rms); 978 pcpu->readers--; 979 } 980 981 /* 982 * Public API 983 */ 984 void 985 rms_init(struct rmslock *rms, const char *name) 986 { 987 988 rms->owner = RMS_NOOWNER; 989 rms->writers = 0; 990 rms->readers = 0; 991 rms->debug_readers = 0; 992 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 993 rms->pcpu = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK | M_ZERO); 994 } 995 996 void 997 rms_destroy(struct rmslock *rms) 998 { 999 1000 MPASS(rms->writers == 0); 1001 MPASS(rms->readers == 0); 1002 mtx_destroy(&rms->mtx); 1003 uma_zfree_pcpu(pcpu_zone_8, rms->pcpu); 1004 } 1005 1006 static void __noinline 1007 rms_rlock_fallback(struct rmslock *rms) 1008 { 1009 1010 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1011 critical_exit(); 1012 1013 mtx_lock(&rms->mtx); 1014 while (rms->writers > 0) 1015 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 1016 critical_enter(); 1017 rms_int_readers_inc(rms, rms_int_pcpu(rms)); 1018 mtx_unlock(&rms->mtx); 1019 critical_exit(); 1020 TD_LOCKS_INC(curthread); 1021 } 1022 1023 void 1024 rms_rlock(struct rmslock *rms) 1025 { 1026 struct rmslock_pcpu *pcpu; 1027 1028 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1029 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1030 1031 critical_enter(); 1032 pcpu = rms_int_pcpu(rms); 1033 rms_int_influx_enter(rms, pcpu); 1034 atomic_interrupt_fence(); 1035 if (__predict_false(rms->writers > 0)) { 1036 rms_rlock_fallback(rms); 1037 return; 1038 } 1039 atomic_interrupt_fence(); 1040 rms_int_readers_inc(rms, pcpu); 1041 atomic_interrupt_fence(); 1042 rms_int_influx_exit(rms, pcpu); 1043 critical_exit(); 1044 TD_LOCKS_INC(curthread); 1045 } 1046 1047 int 1048 rms_try_rlock(struct rmslock *rms) 1049 { 1050 struct rmslock_pcpu *pcpu; 1051 1052 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1053 1054 critical_enter(); 1055 pcpu = rms_int_pcpu(rms); 1056 rms_int_influx_enter(rms, pcpu); 1057 atomic_interrupt_fence(); 1058 if (__predict_false(rms->writers > 0)) { 1059 rms_int_influx_exit(rms, pcpu); 1060 critical_exit(); 1061 return (0); 1062 } 1063 atomic_interrupt_fence(); 1064 rms_int_readers_inc(rms, pcpu); 1065 atomic_interrupt_fence(); 1066 rms_int_influx_exit(rms, pcpu); 1067 critical_exit(); 1068 TD_LOCKS_INC(curthread); 1069 return (1); 1070 } 1071 1072 static void __noinline 1073 rms_runlock_fallback(struct rmslock *rms) 1074 { 1075 1076 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1077 critical_exit(); 1078 1079 mtx_lock(&rms->mtx); 1080 MPASS(rms->writers > 0); 1081 MPASS(rms->readers > 0); 1082 MPASS(rms->debug_readers == rms->readers); 1083 rms_int_debug_readers_dec(rms); 1084 rms->readers--; 1085 if (rms->readers == 0) 1086 wakeup_one(&rms->writers); 1087 mtx_unlock(&rms->mtx); 1088 TD_LOCKS_DEC(curthread); 1089 } 1090 1091 void 1092 rms_runlock(struct rmslock *rms) 1093 { 1094 struct rmslock_pcpu *pcpu; 1095 1096 critical_enter(); 1097 pcpu = rms_int_pcpu(rms); 1098 rms_int_influx_enter(rms, pcpu); 1099 atomic_interrupt_fence(); 1100 if (__predict_false(rms->writers > 0)) { 1101 rms_runlock_fallback(rms); 1102 return; 1103 } 1104 atomic_interrupt_fence(); 1105 rms_int_readers_dec(rms, pcpu); 1106 atomic_interrupt_fence(); 1107 rms_int_influx_exit(rms, pcpu); 1108 critical_exit(); 1109 TD_LOCKS_DEC(curthread); 1110 } 1111 1112 struct rmslock_ipi { 1113 struct rmslock *rms; 1114 struct smp_rendezvous_cpus_retry_arg srcra; 1115 }; 1116 1117 static void 1118 rms_action_func(void *arg) 1119 { 1120 struct rmslock_ipi *rmsipi; 1121 struct rmslock_pcpu *pcpu; 1122 struct rmslock *rms; 1123 1124 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1125 rms = rmsipi->rms; 1126 pcpu = rms_int_pcpu(rms); 1127 1128 if (pcpu->influx) 1129 return; 1130 if (pcpu->readers != 0) { 1131 atomic_add_int(&rms->readers, pcpu->readers); 1132 pcpu->readers = 0; 1133 } 1134 smp_rendezvous_cpus_done(arg); 1135 } 1136 1137 static void 1138 rms_wait_func(void *arg, int cpu) 1139 { 1140 struct rmslock_ipi *rmsipi; 1141 struct rmslock_pcpu *pcpu; 1142 struct rmslock *rms; 1143 1144 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1145 rms = rmsipi->rms; 1146 pcpu = rms_int_remote_pcpu(rms, cpu); 1147 1148 while (atomic_load_int(&pcpu->influx)) 1149 cpu_spinwait(); 1150 } 1151 1152 #ifdef INVARIANTS 1153 static void 1154 rms_assert_no_pcpu_readers(struct rmslock *rms) 1155 { 1156 struct rmslock_pcpu *pcpu; 1157 int cpu; 1158 1159 CPU_FOREACH(cpu) { 1160 pcpu = rms_int_remote_pcpu(rms, cpu); 1161 if (pcpu->readers != 0) { 1162 panic("%s: got %d readers on cpu %d\n", __func__, 1163 pcpu->readers, cpu); 1164 } 1165 } 1166 } 1167 #else 1168 static void 1169 rms_assert_no_pcpu_readers(struct rmslock *rms) 1170 { 1171 } 1172 #endif 1173 1174 static void 1175 rms_wlock_switch(struct rmslock *rms) 1176 { 1177 struct rmslock_ipi rmsipi; 1178 1179 MPASS(rms->readers == 0); 1180 MPASS(rms->writers == 1); 1181 1182 rmsipi.rms = rms; 1183 1184 smp_rendezvous_cpus_retry(all_cpus, 1185 smp_no_rendezvous_barrier, 1186 rms_action_func, 1187 smp_no_rendezvous_barrier, 1188 rms_wait_func, 1189 &rmsipi.srcra); 1190 } 1191 1192 void 1193 rms_wlock(struct rmslock *rms) 1194 { 1195 1196 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1197 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1198 1199 mtx_lock(&rms->mtx); 1200 rms->writers++; 1201 if (rms->writers > 1) { 1202 msleep(&rms->owner, &rms->mtx, (PUSER - 1), 1203 mtx_name(&rms->mtx), 0); 1204 MPASS(rms->readers == 0); 1205 KASSERT(rms->owner == RMS_TRANSIENT, 1206 ("%s: unexpected owner value %p\n", __func__, 1207 rms->owner)); 1208 goto out_grab; 1209 } 1210 1211 KASSERT(rms->owner == RMS_NOOWNER, 1212 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1213 1214 rms_wlock_switch(rms); 1215 rms_assert_no_pcpu_readers(rms); 1216 1217 if (rms->readers > 0) { 1218 msleep(&rms->writers, &rms->mtx, (PUSER - 1), 1219 mtx_name(&rms->mtx), 0); 1220 } 1221 1222 out_grab: 1223 rms->owner = curthread; 1224 rms_assert_no_pcpu_readers(rms); 1225 mtx_unlock(&rms->mtx); 1226 MPASS(rms->readers == 0); 1227 TD_LOCKS_INC(curthread); 1228 } 1229 1230 void 1231 rms_wunlock(struct rmslock *rms) 1232 { 1233 1234 mtx_lock(&rms->mtx); 1235 KASSERT(rms->owner == curthread, 1236 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1237 MPASS(rms->writers >= 1); 1238 MPASS(rms->readers == 0); 1239 rms->writers--; 1240 if (rms->writers > 0) { 1241 wakeup_one(&rms->owner); 1242 rms->owner = RMS_TRANSIENT; 1243 } else { 1244 wakeup(&rms->readers); 1245 rms->owner = RMS_NOOWNER; 1246 } 1247 mtx_unlock(&rms->mtx); 1248 TD_LOCKS_DEC(curthread); 1249 } 1250 1251 void 1252 rms_unlock(struct rmslock *rms) 1253 { 1254 1255 if (rms_wowned(rms)) 1256 rms_wunlock(rms); 1257 else 1258 rms_runlock(rms); 1259 } 1260