1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 44 #include <sys/kernel.h> 45 #include <sys/kdb.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/proc.h> 50 #include <sys/rmlock.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/turnstile.h> 54 #include <sys/lock_profile.h> 55 #include <machine/cpu.h> 56 #include <vm/uma.h> 57 58 #ifdef DDB 59 #include <ddb/ddb.h> 60 #endif 61 62 /* 63 * A cookie to mark destroyed rmlocks. This is stored in the head of 64 * rm_activeReaders. 65 */ 66 #define RM_DESTROYED ((void *)0xdead) 67 68 #define rm_destroyed(rm) \ 69 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 70 71 #define RMPF_ONQUEUE 1 72 #define RMPF_SIGNAL 2 73 74 #ifndef INVARIANTS 75 #define _rm_assert(c, what, file, line) 76 #endif 77 78 static void assert_rm(const struct lock_object *lock, int what); 79 #ifdef DDB 80 static void db_show_rm(const struct lock_object *lock); 81 #endif 82 static void lock_rm(struct lock_object *lock, uintptr_t how); 83 #ifdef KDTRACE_HOOKS 84 static int owner_rm(const struct lock_object *lock, struct thread **owner); 85 #endif 86 static uintptr_t unlock_rm(struct lock_object *lock); 87 88 struct lock_class lock_class_rm = { 89 .lc_name = "rm", 90 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 91 .lc_assert = assert_rm, 92 #ifdef DDB 93 .lc_ddb_show = db_show_rm, 94 #endif 95 .lc_lock = lock_rm, 96 .lc_unlock = unlock_rm, 97 #ifdef KDTRACE_HOOKS 98 .lc_owner = owner_rm, 99 #endif 100 }; 101 102 struct lock_class lock_class_rm_sleepable = { 103 .lc_name = "sleepable rm", 104 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 105 .lc_assert = assert_rm, 106 #ifdef DDB 107 .lc_ddb_show = db_show_rm, 108 #endif 109 .lc_lock = lock_rm, 110 .lc_unlock = unlock_rm, 111 #ifdef KDTRACE_HOOKS 112 .lc_owner = owner_rm, 113 #endif 114 }; 115 116 static void 117 assert_rm(const struct lock_object *lock, int what) 118 { 119 120 rm_assert((const struct rmlock *)lock, what); 121 } 122 123 static void 124 lock_rm(struct lock_object *lock, uintptr_t how) 125 { 126 struct rmlock *rm; 127 struct rm_priotracker *tracker; 128 129 rm = (struct rmlock *)lock; 130 if (how == 0) 131 rm_wlock(rm); 132 else { 133 tracker = (struct rm_priotracker *)how; 134 rm_rlock(rm, tracker); 135 } 136 } 137 138 static uintptr_t 139 unlock_rm(struct lock_object *lock) 140 { 141 struct thread *td; 142 struct pcpu *pc; 143 struct rmlock *rm; 144 struct rm_queue *queue; 145 struct rm_priotracker *tracker; 146 uintptr_t how; 147 148 rm = (struct rmlock *)lock; 149 tracker = NULL; 150 how = 0; 151 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 152 if (rm_wowned(rm)) 153 rm_wunlock(rm); 154 else { 155 /* 156 * Find the right rm_priotracker structure for curthread. 157 * The guarantee about its uniqueness is given by the fact 158 * we already asserted the lock wasn't recursively acquired. 159 */ 160 critical_enter(); 161 td = curthread; 162 pc = get_pcpu(); 163 for (queue = pc->pc_rm_queue.rmq_next; 164 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 165 tracker = (struct rm_priotracker *)queue; 166 if ((tracker->rmp_rmlock == rm) && 167 (tracker->rmp_thread == td)) { 168 how = (uintptr_t)tracker; 169 break; 170 } 171 } 172 KASSERT(tracker != NULL, 173 ("rm_priotracker is non-NULL when lock held in read mode")); 174 critical_exit(); 175 rm_runlock(rm, tracker); 176 } 177 return (how); 178 } 179 180 #ifdef KDTRACE_HOOKS 181 static int 182 owner_rm(const struct lock_object *lock, struct thread **owner) 183 { 184 const struct rmlock *rm; 185 struct lock_class *lc; 186 187 rm = (const struct rmlock *)lock; 188 lc = LOCK_CLASS(&rm->rm_wlock_object); 189 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 190 } 191 #endif 192 193 static struct mtx rm_spinlock; 194 195 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 196 197 /* 198 * Add or remove tracker from per-cpu list. 199 * 200 * The per-cpu list can be traversed at any time in forward direction from an 201 * interrupt on the *local* cpu. 202 */ 203 static void inline 204 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 205 { 206 struct rm_queue *next; 207 208 /* Initialize all tracker pointers */ 209 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 210 next = pc->pc_rm_queue.rmq_next; 211 tracker->rmp_cpuQueue.rmq_next = next; 212 213 /* rmq_prev is not used during froward traversal. */ 214 next->rmq_prev = &tracker->rmp_cpuQueue; 215 216 /* Update pointer to first element. */ 217 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 218 } 219 220 /* 221 * Return a count of the number of trackers the thread 'td' already 222 * has on this CPU for the lock 'rm'. 223 */ 224 static int 225 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 226 const struct thread *td) 227 { 228 struct rm_queue *queue; 229 struct rm_priotracker *tracker; 230 int count; 231 232 count = 0; 233 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 234 queue = queue->rmq_next) { 235 tracker = (struct rm_priotracker *)queue; 236 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 237 count++; 238 } 239 return (count); 240 } 241 242 static void inline 243 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 244 { 245 struct rm_queue *next, *prev; 246 247 next = tracker->rmp_cpuQueue.rmq_next; 248 prev = tracker->rmp_cpuQueue.rmq_prev; 249 250 /* Not used during forward traversal. */ 251 next->rmq_prev = prev; 252 253 /* Remove from list. */ 254 prev->rmq_next = next; 255 } 256 257 static void 258 rm_cleanIPI(void *arg) 259 { 260 struct pcpu *pc; 261 struct rmlock *rm = arg; 262 struct rm_priotracker *tracker; 263 struct rm_queue *queue; 264 pc = get_pcpu(); 265 266 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 267 queue = queue->rmq_next) { 268 tracker = (struct rm_priotracker *)queue; 269 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 270 tracker->rmp_flags = RMPF_ONQUEUE; 271 mtx_lock_spin(&rm_spinlock); 272 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 273 rmp_qentry); 274 mtx_unlock_spin(&rm_spinlock); 275 } 276 } 277 } 278 279 void 280 rm_init_flags(struct rmlock *rm, const char *name, int opts) 281 { 282 struct lock_class *lc; 283 int liflags, xflags; 284 285 liflags = 0; 286 if (!(opts & RM_NOWITNESS)) 287 liflags |= LO_WITNESS; 288 if (opts & RM_RECURSE) 289 liflags |= LO_RECURSABLE; 290 if (opts & RM_NEW) 291 liflags |= LO_NEW; 292 if (opts & RM_DUPOK) 293 liflags |= LO_DUPOK; 294 rm->rm_writecpus = all_cpus; 295 LIST_INIT(&rm->rm_activeReaders); 296 if (opts & RM_SLEEPABLE) { 297 liflags |= LO_SLEEPABLE; 298 lc = &lock_class_rm_sleepable; 299 xflags = (opts & RM_NEW ? SX_NEW : 0); 300 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 301 xflags | SX_NOWITNESS); 302 } else { 303 lc = &lock_class_rm; 304 xflags = (opts & RM_NEW ? MTX_NEW : 0); 305 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 306 xflags | MTX_NOWITNESS); 307 } 308 lock_init(&rm->lock_object, lc, name, NULL, liflags); 309 } 310 311 void 312 rm_init(struct rmlock *rm, const char *name) 313 { 314 315 rm_init_flags(rm, name, 0); 316 } 317 318 void 319 rm_destroy(struct rmlock *rm) 320 { 321 322 rm_assert(rm, RA_UNLOCKED); 323 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 324 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 325 sx_destroy(&rm->rm_lock_sx); 326 else 327 mtx_destroy(&rm->rm_lock_mtx); 328 lock_destroy(&rm->lock_object); 329 } 330 331 int 332 rm_wowned(const struct rmlock *rm) 333 { 334 335 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 336 return (sx_xlocked(&rm->rm_lock_sx)); 337 else 338 return (mtx_owned(&rm->rm_lock_mtx)); 339 } 340 341 void 342 rm_sysinit(void *arg) 343 { 344 struct rm_args *args; 345 346 args = arg; 347 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 348 } 349 350 static __noinline int 351 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 352 { 353 struct pcpu *pc; 354 355 critical_enter(); 356 pc = get_pcpu(); 357 358 /* Check if we just need to do a proper critical_exit. */ 359 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 360 critical_exit(); 361 return (1); 362 } 363 364 /* Remove our tracker from the per-cpu list. */ 365 rm_tracker_remove(pc, tracker); 366 367 /* 368 * Check to see if the IPI granted us the lock after all. The load of 369 * rmp_flags must happen after the tracker is removed from the list. 370 */ 371 atomic_interrupt_fence(); 372 if (tracker->rmp_flags) { 373 /* Just add back tracker - we hold the lock. */ 374 rm_tracker_add(pc, tracker); 375 critical_exit(); 376 return (1); 377 } 378 379 /* 380 * We allow readers to acquire a lock even if a writer is blocked if 381 * the lock is recursive and the reader already holds the lock. 382 */ 383 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 384 /* 385 * Just grant the lock if this thread already has a tracker 386 * for this lock on the per-cpu queue. 387 */ 388 if (rm_trackers_present(pc, rm, curthread) != 0) { 389 mtx_lock_spin(&rm_spinlock); 390 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 391 rmp_qentry); 392 tracker->rmp_flags = RMPF_ONQUEUE; 393 mtx_unlock_spin(&rm_spinlock); 394 rm_tracker_add(pc, tracker); 395 critical_exit(); 396 return (1); 397 } 398 } 399 400 sched_unpin(); 401 critical_exit(); 402 403 if (trylock) { 404 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 405 if (!sx_try_xlock(&rm->rm_lock_sx)) 406 return (0); 407 } else { 408 if (!mtx_trylock(&rm->rm_lock_mtx)) 409 return (0); 410 } 411 } else { 412 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 413 THREAD_SLEEPING_OK(); 414 sx_xlock(&rm->rm_lock_sx); 415 THREAD_NO_SLEEPING(); 416 } else 417 mtx_lock(&rm->rm_lock_mtx); 418 } 419 420 critical_enter(); 421 pc = get_pcpu(); 422 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 423 rm_tracker_add(pc, tracker); 424 sched_pin(); 425 critical_exit(); 426 427 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 428 sx_xunlock(&rm->rm_lock_sx); 429 else 430 mtx_unlock(&rm->rm_lock_mtx); 431 432 return (1); 433 } 434 435 int 436 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 437 { 438 struct thread *td = curthread; 439 struct pcpu *pc; 440 441 if (SCHEDULER_STOPPED()) 442 return (1); 443 444 tracker->rmp_flags = 0; 445 tracker->rmp_thread = td; 446 tracker->rmp_rmlock = rm; 447 448 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 449 THREAD_NO_SLEEPING(); 450 451 td->td_critnest++; /* critical_enter(); */ 452 453 atomic_interrupt_fence(); 454 455 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 456 457 rm_tracker_add(pc, tracker); 458 459 sched_pin(); 460 461 atomic_interrupt_fence(); 462 463 td->td_critnest--; 464 465 /* 466 * Fast path to combine two common conditions into a single 467 * conditional jump. 468 */ 469 if (__predict_true(0 == (td->td_owepreempt | 470 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 471 return (1); 472 473 /* We do not have a read token and need to acquire one. */ 474 return _rm_rlock_hard(rm, tracker, trylock); 475 } 476 477 static __noinline void 478 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 479 { 480 481 if (td->td_owepreempt) { 482 td->td_critnest++; 483 critical_exit(); 484 } 485 486 if (!tracker->rmp_flags) 487 return; 488 489 mtx_lock_spin(&rm_spinlock); 490 LIST_REMOVE(tracker, rmp_qentry); 491 492 if (tracker->rmp_flags & RMPF_SIGNAL) { 493 struct rmlock *rm; 494 struct turnstile *ts; 495 496 rm = tracker->rmp_rmlock; 497 498 turnstile_chain_lock(&rm->lock_object); 499 mtx_unlock_spin(&rm_spinlock); 500 501 ts = turnstile_lookup(&rm->lock_object); 502 503 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 504 turnstile_unpend(ts); 505 turnstile_chain_unlock(&rm->lock_object); 506 } else 507 mtx_unlock_spin(&rm_spinlock); 508 } 509 510 void 511 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 512 { 513 struct pcpu *pc; 514 struct thread *td = tracker->rmp_thread; 515 516 if (SCHEDULER_STOPPED()) 517 return; 518 519 td->td_critnest++; /* critical_enter(); */ 520 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 521 rm_tracker_remove(pc, tracker); 522 td->td_critnest--; 523 sched_unpin(); 524 525 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 526 THREAD_SLEEPING_OK(); 527 528 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 529 return; 530 531 _rm_unlock_hard(td, tracker); 532 } 533 534 void 535 _rm_wlock(struct rmlock *rm) 536 { 537 struct rm_priotracker *prio; 538 struct turnstile *ts; 539 cpuset_t readcpus; 540 541 if (SCHEDULER_STOPPED()) 542 return; 543 544 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 545 sx_xlock(&rm->rm_lock_sx); 546 else 547 mtx_lock(&rm->rm_lock_mtx); 548 549 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 550 /* Get all read tokens back */ 551 CPU_ANDNOT(&readcpus, &all_cpus, &rm->rm_writecpus); 552 rm->rm_writecpus = all_cpus; 553 554 /* 555 * Assumes rm->rm_writecpus update is visible on other CPUs 556 * before rm_cleanIPI is called. 557 */ 558 #ifdef SMP 559 smp_rendezvous_cpus(readcpus, 560 smp_no_rendezvous_barrier, 561 rm_cleanIPI, 562 smp_no_rendezvous_barrier, 563 rm); 564 565 #else 566 rm_cleanIPI(rm); 567 #endif 568 569 mtx_lock_spin(&rm_spinlock); 570 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 571 ts = turnstile_trywait(&rm->lock_object); 572 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 573 mtx_unlock_spin(&rm_spinlock); 574 turnstile_wait(ts, prio->rmp_thread, 575 TS_EXCLUSIVE_QUEUE); 576 mtx_lock_spin(&rm_spinlock); 577 } 578 mtx_unlock_spin(&rm_spinlock); 579 } 580 } 581 582 void 583 _rm_wunlock(struct rmlock *rm) 584 { 585 586 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 587 sx_xunlock(&rm->rm_lock_sx); 588 else 589 mtx_unlock(&rm->rm_lock_mtx); 590 } 591 592 #if LOCK_DEBUG > 0 593 594 void 595 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 596 { 597 598 if (SCHEDULER_STOPPED()) 599 return; 600 601 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 602 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 603 curthread, rm->lock_object.lo_name, file, line)); 604 KASSERT(!rm_destroyed(rm), 605 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 606 _rm_assert(rm, RA_UNLOCKED, file, line); 607 608 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 609 file, line, NULL); 610 611 _rm_wlock(rm); 612 613 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 614 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 615 TD_LOCKS_INC(curthread); 616 } 617 618 void 619 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 620 { 621 622 if (SCHEDULER_STOPPED()) 623 return; 624 625 KASSERT(!rm_destroyed(rm), 626 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 627 _rm_assert(rm, RA_WLOCKED, file, line); 628 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 629 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 630 _rm_wunlock(rm); 631 TD_LOCKS_DEC(curthread); 632 } 633 634 int 635 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 636 int trylock, const char *file, int line) 637 { 638 639 if (SCHEDULER_STOPPED()) 640 return (1); 641 642 #ifdef INVARIANTS 643 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 644 critical_enter(); 645 KASSERT(rm_trackers_present(get_pcpu(), rm, 646 curthread) == 0, 647 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 648 rm->lock_object.lo_name, file, line)); 649 critical_exit(); 650 } 651 #endif 652 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 653 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 654 curthread, rm->lock_object.lo_name, file, line)); 655 KASSERT(!rm_destroyed(rm), 656 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 657 if (!trylock) { 658 KASSERT(!rm_wowned(rm), 659 ("rm_rlock: wlock already held for %s @ %s:%d", 660 rm->lock_object.lo_name, file, line)); 661 WITNESS_CHECKORDER(&rm->lock_object, 662 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 663 } 664 665 if (_rm_rlock(rm, tracker, trylock)) { 666 if (trylock) 667 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 668 line); 669 else 670 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 671 line); 672 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 673 TD_LOCKS_INC(curthread); 674 return (1); 675 } else if (trylock) 676 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 677 678 return (0); 679 } 680 681 void 682 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 683 const char *file, int line) 684 { 685 686 if (SCHEDULER_STOPPED()) 687 return; 688 689 KASSERT(!rm_destroyed(rm), 690 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 691 _rm_assert(rm, RA_RLOCKED, file, line); 692 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 693 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 694 _rm_runlock(rm, tracker); 695 TD_LOCKS_DEC(curthread); 696 } 697 698 #else 699 700 /* 701 * Just strip out file and line arguments if no lock debugging is enabled in 702 * the kernel - we are called from a kernel module. 703 */ 704 void 705 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 706 { 707 708 _rm_wlock(rm); 709 } 710 711 void 712 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 713 { 714 715 _rm_wunlock(rm); 716 } 717 718 int 719 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 720 int trylock, const char *file, int line) 721 { 722 723 return _rm_rlock(rm, tracker, trylock); 724 } 725 726 void 727 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 728 const char *file, int line) 729 { 730 731 _rm_runlock(rm, tracker); 732 } 733 734 #endif 735 736 #ifdef INVARIANT_SUPPORT 737 #ifndef INVARIANTS 738 #undef _rm_assert 739 #endif 740 741 /* 742 * Note that this does not need to use witness_assert() for read lock 743 * assertions since an exact count of read locks held by this thread 744 * is computable. 745 */ 746 void 747 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 748 { 749 int count; 750 751 if (SCHEDULER_STOPPED()) 752 return; 753 switch (what) { 754 case RA_LOCKED: 755 case RA_LOCKED | RA_RECURSED: 756 case RA_LOCKED | RA_NOTRECURSED: 757 case RA_RLOCKED: 758 case RA_RLOCKED | RA_RECURSED: 759 case RA_RLOCKED | RA_NOTRECURSED: 760 /* 761 * Handle the write-locked case. Unlike other 762 * primitives, writers can never recurse. 763 */ 764 if (rm_wowned(rm)) { 765 if (what & RA_RLOCKED) 766 panic("Lock %s exclusively locked @ %s:%d\n", 767 rm->lock_object.lo_name, file, line); 768 if (what & RA_RECURSED) 769 panic("Lock %s not recursed @ %s:%d\n", 770 rm->lock_object.lo_name, file, line); 771 break; 772 } 773 774 critical_enter(); 775 count = rm_trackers_present(get_pcpu(), rm, curthread); 776 critical_exit(); 777 778 if (count == 0) 779 panic("Lock %s not %slocked @ %s:%d\n", 780 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 781 "read " : "", file, line); 782 if (count > 1) { 783 if (what & RA_NOTRECURSED) 784 panic("Lock %s recursed @ %s:%d\n", 785 rm->lock_object.lo_name, file, line); 786 } else if (what & RA_RECURSED) 787 panic("Lock %s not recursed @ %s:%d\n", 788 rm->lock_object.lo_name, file, line); 789 break; 790 case RA_WLOCKED: 791 if (!rm_wowned(rm)) 792 panic("Lock %s not exclusively locked @ %s:%d\n", 793 rm->lock_object.lo_name, file, line); 794 break; 795 case RA_UNLOCKED: 796 if (rm_wowned(rm)) 797 panic("Lock %s exclusively locked @ %s:%d\n", 798 rm->lock_object.lo_name, file, line); 799 800 critical_enter(); 801 count = rm_trackers_present(get_pcpu(), rm, curthread); 802 critical_exit(); 803 804 if (count != 0) 805 panic("Lock %s read locked @ %s:%d\n", 806 rm->lock_object.lo_name, file, line); 807 break; 808 default: 809 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 810 line); 811 } 812 } 813 #endif /* INVARIANT_SUPPORT */ 814 815 #ifdef DDB 816 static void 817 print_tracker(struct rm_priotracker *tr) 818 { 819 struct thread *td; 820 821 td = tr->rmp_thread; 822 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 823 td->td_proc->p_pid, td->td_name); 824 if (tr->rmp_flags & RMPF_ONQUEUE) { 825 db_printf("ONQUEUE"); 826 if (tr->rmp_flags & RMPF_SIGNAL) 827 db_printf(",SIGNAL"); 828 } else 829 db_printf("0"); 830 db_printf("}\n"); 831 } 832 833 static void 834 db_show_rm(const struct lock_object *lock) 835 { 836 struct rm_priotracker *tr; 837 struct rm_queue *queue; 838 const struct rmlock *rm; 839 struct lock_class *lc; 840 struct pcpu *pc; 841 842 rm = (const struct rmlock *)lock; 843 db_printf(" writecpus: "); 844 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 845 db_printf("\n"); 846 db_printf(" per-CPU readers:\n"); 847 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 848 for (queue = pc->pc_rm_queue.rmq_next; 849 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 850 tr = (struct rm_priotracker *)queue; 851 if (tr->rmp_rmlock == rm) 852 print_tracker(tr); 853 } 854 db_printf(" active readers:\n"); 855 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 856 print_tracker(tr); 857 lc = LOCK_CLASS(&rm->rm_wlock_object); 858 db_printf("Backing write-lock (%s):\n", lc->lc_name); 859 lc->lc_ddb_show(&rm->rm_wlock_object); 860 } 861 #endif 862 863 /* 864 * Read-mostly sleepable locks. 865 * 866 * These primitives allow both readers and writers to sleep. However, neither 867 * readers nor writers are tracked and subsequently there is no priority 868 * propagation. 869 * 870 * They are intended to be only used when write-locking is almost never needed 871 * (e.g., they can guard against unloading a kernel module) while read-locking 872 * happens all the time. 873 * 874 * Concurrent writers take turns taking the lock while going off cpu. If this is 875 * of concern for your usecase, this is not the right primitive. 876 * 877 * Neither rms_rlock nor rms_runlock use thread fences. Instead interrupt 878 * fences are inserted to ensure ordering with the code executed in the IPI 879 * handler. 880 * 881 * No attempt is made to track which CPUs read locked at least once, 882 * consequently write locking sends IPIs to all of them. This will become a 883 * problem at some point. The easiest way to lessen it is to provide a bitmap. 884 */ 885 886 #define RMS_NOOWNER ((void *)0x1) 887 #define RMS_TRANSIENT ((void *)0x2) 888 #define RMS_FLAGMASK 0xf 889 890 struct rmslock_pcpu { 891 int influx; 892 int readers; 893 }; 894 895 _Static_assert(sizeof(struct rmslock_pcpu) == 8, "bad size"); 896 897 /* 898 * Internal routines 899 */ 900 static struct rmslock_pcpu * 901 rms_int_pcpu(struct rmslock *rms) 902 { 903 904 CRITICAL_ASSERT(curthread); 905 return (zpcpu_get(rms->pcpu)); 906 } 907 908 static struct rmslock_pcpu * 909 rms_int_remote_pcpu(struct rmslock *rms, int cpu) 910 { 911 912 return (zpcpu_get_cpu(rms->pcpu, cpu)); 913 } 914 915 static void 916 rms_int_influx_enter(struct rmslock *rms, struct rmslock_pcpu *pcpu) 917 { 918 919 CRITICAL_ASSERT(curthread); 920 MPASS(pcpu->influx == 0); 921 pcpu->influx = 1; 922 } 923 924 static void 925 rms_int_influx_exit(struct rmslock *rms, struct rmslock_pcpu *pcpu) 926 { 927 928 CRITICAL_ASSERT(curthread); 929 MPASS(pcpu->influx == 1); 930 pcpu->influx = 0; 931 } 932 933 #ifdef INVARIANTS 934 static void 935 rms_int_debug_readers_inc(struct rmslock *rms) 936 { 937 int old; 938 old = atomic_fetchadd_int(&rms->debug_readers, 1); 939 KASSERT(old >= 0, ("%s: bad readers count %d\n", __func__, old)); 940 } 941 942 static void 943 rms_int_debug_readers_dec(struct rmslock *rms) 944 { 945 int old; 946 947 old = atomic_fetchadd_int(&rms->debug_readers, -1); 948 KASSERT(old > 0, ("%s: bad readers count %d\n", __func__, old)); 949 } 950 #else 951 static void 952 rms_int_debug_readers_inc(struct rmslock *rms) 953 { 954 } 955 956 static void 957 rms_int_debug_readers_dec(struct rmslock *rms) 958 { 959 } 960 #endif 961 962 static void 963 rms_int_readers_inc(struct rmslock *rms, struct rmslock_pcpu *pcpu) 964 { 965 966 CRITICAL_ASSERT(curthread); 967 rms_int_debug_readers_inc(rms); 968 pcpu->readers++; 969 } 970 971 static void 972 rms_int_readers_dec(struct rmslock *rms, struct rmslock_pcpu *pcpu) 973 { 974 975 CRITICAL_ASSERT(curthread); 976 rms_int_debug_readers_dec(rms); 977 pcpu->readers--; 978 } 979 980 /* 981 * Public API 982 */ 983 void 984 rms_init(struct rmslock *rms, const char *name) 985 { 986 987 rms->owner = RMS_NOOWNER; 988 rms->writers = 0; 989 rms->readers = 0; 990 rms->debug_readers = 0; 991 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 992 rms->pcpu = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK | M_ZERO); 993 } 994 995 void 996 rms_destroy(struct rmslock *rms) 997 { 998 999 MPASS(rms->writers == 0); 1000 MPASS(rms->readers == 0); 1001 mtx_destroy(&rms->mtx); 1002 uma_zfree_pcpu(pcpu_zone_8, rms->pcpu); 1003 } 1004 1005 static void __noinline 1006 rms_rlock_fallback(struct rmslock *rms) 1007 { 1008 1009 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1010 critical_exit(); 1011 1012 mtx_lock(&rms->mtx); 1013 while (rms->writers > 0) 1014 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 1015 critical_enter(); 1016 rms_int_readers_inc(rms, rms_int_pcpu(rms)); 1017 mtx_unlock(&rms->mtx); 1018 critical_exit(); 1019 TD_LOCKS_INC(curthread); 1020 } 1021 1022 void 1023 rms_rlock(struct rmslock *rms) 1024 { 1025 struct rmslock_pcpu *pcpu; 1026 1027 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1028 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1029 1030 critical_enter(); 1031 pcpu = rms_int_pcpu(rms); 1032 rms_int_influx_enter(rms, pcpu); 1033 atomic_interrupt_fence(); 1034 if (__predict_false(rms->writers > 0)) { 1035 rms_rlock_fallback(rms); 1036 return; 1037 } 1038 atomic_interrupt_fence(); 1039 rms_int_readers_inc(rms, pcpu); 1040 atomic_interrupt_fence(); 1041 rms_int_influx_exit(rms, pcpu); 1042 critical_exit(); 1043 TD_LOCKS_INC(curthread); 1044 } 1045 1046 int 1047 rms_try_rlock(struct rmslock *rms) 1048 { 1049 struct rmslock_pcpu *pcpu; 1050 1051 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1052 1053 critical_enter(); 1054 pcpu = rms_int_pcpu(rms); 1055 rms_int_influx_enter(rms, pcpu); 1056 atomic_interrupt_fence(); 1057 if (__predict_false(rms->writers > 0)) { 1058 rms_int_influx_exit(rms, pcpu); 1059 critical_exit(); 1060 return (0); 1061 } 1062 atomic_interrupt_fence(); 1063 rms_int_readers_inc(rms, pcpu); 1064 atomic_interrupt_fence(); 1065 rms_int_influx_exit(rms, pcpu); 1066 critical_exit(); 1067 TD_LOCKS_INC(curthread); 1068 return (1); 1069 } 1070 1071 static void __noinline 1072 rms_runlock_fallback(struct rmslock *rms) 1073 { 1074 1075 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1076 critical_exit(); 1077 1078 mtx_lock(&rms->mtx); 1079 MPASS(rms->writers > 0); 1080 MPASS(rms->readers > 0); 1081 MPASS(rms->debug_readers == rms->readers); 1082 rms_int_debug_readers_dec(rms); 1083 rms->readers--; 1084 if (rms->readers == 0) 1085 wakeup_one(&rms->writers); 1086 mtx_unlock(&rms->mtx); 1087 TD_LOCKS_DEC(curthread); 1088 } 1089 1090 void 1091 rms_runlock(struct rmslock *rms) 1092 { 1093 struct rmslock_pcpu *pcpu; 1094 1095 critical_enter(); 1096 pcpu = rms_int_pcpu(rms); 1097 rms_int_influx_enter(rms, pcpu); 1098 atomic_interrupt_fence(); 1099 if (__predict_false(rms->writers > 0)) { 1100 rms_runlock_fallback(rms); 1101 return; 1102 } 1103 atomic_interrupt_fence(); 1104 rms_int_readers_dec(rms, pcpu); 1105 atomic_interrupt_fence(); 1106 rms_int_influx_exit(rms, pcpu); 1107 critical_exit(); 1108 TD_LOCKS_DEC(curthread); 1109 } 1110 1111 struct rmslock_ipi { 1112 struct rmslock *rms; 1113 struct smp_rendezvous_cpus_retry_arg srcra; 1114 }; 1115 1116 static void 1117 rms_action_func(void *arg) 1118 { 1119 struct rmslock_ipi *rmsipi; 1120 struct rmslock_pcpu *pcpu; 1121 struct rmslock *rms; 1122 1123 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1124 rms = rmsipi->rms; 1125 pcpu = rms_int_pcpu(rms); 1126 1127 if (pcpu->influx) 1128 return; 1129 if (pcpu->readers != 0) { 1130 atomic_add_int(&rms->readers, pcpu->readers); 1131 pcpu->readers = 0; 1132 } 1133 smp_rendezvous_cpus_done(arg); 1134 } 1135 1136 static void 1137 rms_wait_func(void *arg, int cpu) 1138 { 1139 struct rmslock_ipi *rmsipi; 1140 struct rmslock_pcpu *pcpu; 1141 struct rmslock *rms; 1142 1143 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1144 rms = rmsipi->rms; 1145 pcpu = rms_int_remote_pcpu(rms, cpu); 1146 1147 while (atomic_load_int(&pcpu->influx)) 1148 cpu_spinwait(); 1149 } 1150 1151 #ifdef INVARIANTS 1152 static void 1153 rms_assert_no_pcpu_readers(struct rmslock *rms) 1154 { 1155 struct rmslock_pcpu *pcpu; 1156 int cpu; 1157 1158 CPU_FOREACH(cpu) { 1159 pcpu = rms_int_remote_pcpu(rms, cpu); 1160 if (pcpu->readers != 0) { 1161 panic("%s: got %d readers on cpu %d\n", __func__, 1162 pcpu->readers, cpu); 1163 } 1164 } 1165 } 1166 #else 1167 static void 1168 rms_assert_no_pcpu_readers(struct rmslock *rms) 1169 { 1170 } 1171 #endif 1172 1173 static void 1174 rms_wlock_switch(struct rmslock *rms) 1175 { 1176 struct rmslock_ipi rmsipi; 1177 1178 MPASS(rms->readers == 0); 1179 MPASS(rms->writers == 1); 1180 1181 rmsipi.rms = rms; 1182 1183 smp_rendezvous_cpus_retry(all_cpus, 1184 smp_no_rendezvous_barrier, 1185 rms_action_func, 1186 smp_no_rendezvous_barrier, 1187 rms_wait_func, 1188 &rmsipi.srcra); 1189 } 1190 1191 void 1192 rms_wlock(struct rmslock *rms) 1193 { 1194 1195 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1196 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1197 1198 mtx_lock(&rms->mtx); 1199 rms->writers++; 1200 if (rms->writers > 1) { 1201 msleep(&rms->owner, &rms->mtx, (PUSER - 1), 1202 mtx_name(&rms->mtx), 0); 1203 MPASS(rms->readers == 0); 1204 KASSERT(rms->owner == RMS_TRANSIENT, 1205 ("%s: unexpected owner value %p\n", __func__, 1206 rms->owner)); 1207 goto out_grab; 1208 } 1209 1210 KASSERT(rms->owner == RMS_NOOWNER, 1211 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1212 1213 rms_wlock_switch(rms); 1214 rms_assert_no_pcpu_readers(rms); 1215 1216 if (rms->readers > 0) { 1217 msleep(&rms->writers, &rms->mtx, (PUSER - 1), 1218 mtx_name(&rms->mtx), 0); 1219 } 1220 1221 out_grab: 1222 rms->owner = curthread; 1223 rms_assert_no_pcpu_readers(rms); 1224 mtx_unlock(&rms->mtx); 1225 MPASS(rms->readers == 0); 1226 TD_LOCKS_INC(curthread); 1227 } 1228 1229 void 1230 rms_wunlock(struct rmslock *rms) 1231 { 1232 1233 mtx_lock(&rms->mtx); 1234 KASSERT(rms->owner == curthread, 1235 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1236 MPASS(rms->writers >= 1); 1237 MPASS(rms->readers == 0); 1238 rms->writers--; 1239 if (rms->writers > 0) { 1240 wakeup_one(&rms->owner); 1241 rms->owner = RMS_TRANSIENT; 1242 } else { 1243 wakeup(&rms->readers); 1244 rms->owner = RMS_NOOWNER; 1245 } 1246 mtx_unlock(&rms->mtx); 1247 TD_LOCKS_DEC(curthread); 1248 } 1249 1250 void 1251 rms_unlock(struct rmslock *rms) 1252 { 1253 1254 if (rms_wowned(rms)) 1255 rms_wunlock(rms); 1256 else 1257 rms_runlock(rms); 1258 } 1259