1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 44 #include <sys/kernel.h> 45 #include <sys/kdb.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/proc.h> 50 #include <sys/rmlock.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/turnstile.h> 54 #include <sys/lock_profile.h> 55 #include <machine/cpu.h> 56 #include <vm/uma.h> 57 58 #ifdef DDB 59 #include <ddb/ddb.h> 60 #endif 61 62 /* 63 * A cookie to mark destroyed rmlocks. This is stored in the head of 64 * rm_activeReaders. 65 */ 66 #define RM_DESTROYED ((void *)0xdead) 67 68 #define rm_destroyed(rm) \ 69 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 70 71 #define RMPF_ONQUEUE 1 72 #define RMPF_SIGNAL 2 73 74 #ifndef INVARIANTS 75 #define _rm_assert(c, what, file, line) 76 #endif 77 78 static void assert_rm(const struct lock_object *lock, int what); 79 #ifdef DDB 80 static void db_show_rm(const struct lock_object *lock); 81 #endif 82 static void lock_rm(struct lock_object *lock, uintptr_t how); 83 #ifdef KDTRACE_HOOKS 84 static int owner_rm(const struct lock_object *lock, struct thread **owner); 85 #endif 86 static uintptr_t unlock_rm(struct lock_object *lock); 87 88 struct lock_class lock_class_rm = { 89 .lc_name = "rm", 90 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 91 .lc_assert = assert_rm, 92 #ifdef DDB 93 .lc_ddb_show = db_show_rm, 94 #endif 95 .lc_lock = lock_rm, 96 .lc_unlock = unlock_rm, 97 #ifdef KDTRACE_HOOKS 98 .lc_owner = owner_rm, 99 #endif 100 }; 101 102 struct lock_class lock_class_rm_sleepable = { 103 .lc_name = "sleepable rm", 104 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 105 .lc_assert = assert_rm, 106 #ifdef DDB 107 .lc_ddb_show = db_show_rm, 108 #endif 109 .lc_lock = lock_rm, 110 .lc_unlock = unlock_rm, 111 #ifdef KDTRACE_HOOKS 112 .lc_owner = owner_rm, 113 #endif 114 }; 115 116 static void 117 assert_rm(const struct lock_object *lock, int what) 118 { 119 120 rm_assert((const struct rmlock *)lock, what); 121 } 122 123 static void 124 lock_rm(struct lock_object *lock, uintptr_t how) 125 { 126 struct rmlock *rm; 127 struct rm_priotracker *tracker; 128 129 rm = (struct rmlock *)lock; 130 if (how == 0) 131 rm_wlock(rm); 132 else { 133 tracker = (struct rm_priotracker *)how; 134 rm_rlock(rm, tracker); 135 } 136 } 137 138 static uintptr_t 139 unlock_rm(struct lock_object *lock) 140 { 141 struct thread *td; 142 struct pcpu *pc; 143 struct rmlock *rm; 144 struct rm_queue *queue; 145 struct rm_priotracker *tracker; 146 uintptr_t how; 147 148 rm = (struct rmlock *)lock; 149 tracker = NULL; 150 how = 0; 151 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 152 if (rm_wowned(rm)) 153 rm_wunlock(rm); 154 else { 155 /* 156 * Find the right rm_priotracker structure for curthread. 157 * The guarantee about its uniqueness is given by the fact 158 * we already asserted the lock wasn't recursively acquired. 159 */ 160 critical_enter(); 161 td = curthread; 162 pc = get_pcpu(); 163 for (queue = pc->pc_rm_queue.rmq_next; 164 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 165 tracker = (struct rm_priotracker *)queue; 166 if ((tracker->rmp_rmlock == rm) && 167 (tracker->rmp_thread == td)) { 168 how = (uintptr_t)tracker; 169 break; 170 } 171 } 172 KASSERT(tracker != NULL, 173 ("rm_priotracker is non-NULL when lock held in read mode")); 174 critical_exit(); 175 rm_runlock(rm, tracker); 176 } 177 return (how); 178 } 179 180 #ifdef KDTRACE_HOOKS 181 static int 182 owner_rm(const struct lock_object *lock, struct thread **owner) 183 { 184 const struct rmlock *rm; 185 struct lock_class *lc; 186 187 rm = (const struct rmlock *)lock; 188 lc = LOCK_CLASS(&rm->rm_wlock_object); 189 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 190 } 191 #endif 192 193 static struct mtx rm_spinlock; 194 195 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 196 197 /* 198 * Add or remove tracker from per-cpu list. 199 * 200 * The per-cpu list can be traversed at any time in forward direction from an 201 * interrupt on the *local* cpu. 202 */ 203 static void inline 204 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 205 { 206 struct rm_queue *next; 207 208 /* Initialize all tracker pointers */ 209 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 210 next = pc->pc_rm_queue.rmq_next; 211 tracker->rmp_cpuQueue.rmq_next = next; 212 213 /* rmq_prev is not used during froward traversal. */ 214 next->rmq_prev = &tracker->rmp_cpuQueue; 215 216 /* Update pointer to first element. */ 217 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 218 } 219 220 /* 221 * Return a count of the number of trackers the thread 'td' already 222 * has on this CPU for the lock 'rm'. 223 */ 224 static int 225 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 226 const struct thread *td) 227 { 228 struct rm_queue *queue; 229 struct rm_priotracker *tracker; 230 int count; 231 232 count = 0; 233 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 234 queue = queue->rmq_next) { 235 tracker = (struct rm_priotracker *)queue; 236 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 237 count++; 238 } 239 return (count); 240 } 241 242 static void inline 243 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 244 { 245 struct rm_queue *next, *prev; 246 247 next = tracker->rmp_cpuQueue.rmq_next; 248 prev = tracker->rmp_cpuQueue.rmq_prev; 249 250 /* Not used during forward traversal. */ 251 next->rmq_prev = prev; 252 253 /* Remove from list. */ 254 prev->rmq_next = next; 255 } 256 257 static void 258 rm_cleanIPI(void *arg) 259 { 260 struct pcpu *pc; 261 struct rmlock *rm = arg; 262 struct rm_priotracker *tracker; 263 struct rm_queue *queue; 264 pc = get_pcpu(); 265 266 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 267 queue = queue->rmq_next) { 268 tracker = (struct rm_priotracker *)queue; 269 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 270 tracker->rmp_flags = RMPF_ONQUEUE; 271 mtx_lock_spin(&rm_spinlock); 272 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 273 rmp_qentry); 274 mtx_unlock_spin(&rm_spinlock); 275 } 276 } 277 } 278 279 void 280 rm_init_flags(struct rmlock *rm, const char *name, int opts) 281 { 282 struct lock_class *lc; 283 int liflags, xflags; 284 285 liflags = 0; 286 if (!(opts & RM_NOWITNESS)) 287 liflags |= LO_WITNESS; 288 if (opts & RM_RECURSE) 289 liflags |= LO_RECURSABLE; 290 if (opts & RM_NEW) 291 liflags |= LO_NEW; 292 rm->rm_writecpus = all_cpus; 293 LIST_INIT(&rm->rm_activeReaders); 294 if (opts & RM_SLEEPABLE) { 295 liflags |= LO_SLEEPABLE; 296 lc = &lock_class_rm_sleepable; 297 xflags = (opts & RM_NEW ? SX_NEW : 0); 298 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 299 xflags | SX_NOWITNESS); 300 } else { 301 lc = &lock_class_rm; 302 xflags = (opts & RM_NEW ? MTX_NEW : 0); 303 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 304 xflags | MTX_NOWITNESS); 305 } 306 lock_init(&rm->lock_object, lc, name, NULL, liflags); 307 } 308 309 void 310 rm_init(struct rmlock *rm, const char *name) 311 { 312 313 rm_init_flags(rm, name, 0); 314 } 315 316 void 317 rm_destroy(struct rmlock *rm) 318 { 319 320 rm_assert(rm, RA_UNLOCKED); 321 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 322 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 323 sx_destroy(&rm->rm_lock_sx); 324 else 325 mtx_destroy(&rm->rm_lock_mtx); 326 lock_destroy(&rm->lock_object); 327 } 328 329 int 330 rm_wowned(const struct rmlock *rm) 331 { 332 333 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 334 return (sx_xlocked(&rm->rm_lock_sx)); 335 else 336 return (mtx_owned(&rm->rm_lock_mtx)); 337 } 338 339 void 340 rm_sysinit(void *arg) 341 { 342 struct rm_args *args; 343 344 args = arg; 345 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 346 } 347 348 static __noinline int 349 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 350 { 351 struct pcpu *pc; 352 353 critical_enter(); 354 pc = get_pcpu(); 355 356 /* Check if we just need to do a proper critical_exit. */ 357 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 358 critical_exit(); 359 return (1); 360 } 361 362 /* Remove our tracker from the per-cpu list. */ 363 rm_tracker_remove(pc, tracker); 364 365 /* 366 * Check to see if the IPI granted us the lock after all. The load of 367 * rmp_flags must happen after the tracker is removed from the list. 368 */ 369 atomic_interrupt_fence(); 370 if (tracker->rmp_flags) { 371 /* Just add back tracker - we hold the lock. */ 372 rm_tracker_add(pc, tracker); 373 critical_exit(); 374 return (1); 375 } 376 377 /* 378 * We allow readers to acquire a lock even if a writer is blocked if 379 * the lock is recursive and the reader already holds the lock. 380 */ 381 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 382 /* 383 * Just grant the lock if this thread already has a tracker 384 * for this lock on the per-cpu queue. 385 */ 386 if (rm_trackers_present(pc, rm, curthread) != 0) { 387 mtx_lock_spin(&rm_spinlock); 388 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 389 rmp_qentry); 390 tracker->rmp_flags = RMPF_ONQUEUE; 391 mtx_unlock_spin(&rm_spinlock); 392 rm_tracker_add(pc, tracker); 393 critical_exit(); 394 return (1); 395 } 396 } 397 398 sched_unpin(); 399 critical_exit(); 400 401 if (trylock) { 402 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 403 if (!sx_try_xlock(&rm->rm_lock_sx)) 404 return (0); 405 } else { 406 if (!mtx_trylock(&rm->rm_lock_mtx)) 407 return (0); 408 } 409 } else { 410 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 411 THREAD_SLEEPING_OK(); 412 sx_xlock(&rm->rm_lock_sx); 413 THREAD_NO_SLEEPING(); 414 } else 415 mtx_lock(&rm->rm_lock_mtx); 416 } 417 418 critical_enter(); 419 pc = get_pcpu(); 420 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 421 rm_tracker_add(pc, tracker); 422 sched_pin(); 423 critical_exit(); 424 425 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 426 sx_xunlock(&rm->rm_lock_sx); 427 else 428 mtx_unlock(&rm->rm_lock_mtx); 429 430 return (1); 431 } 432 433 int 434 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 435 { 436 struct thread *td = curthread; 437 struct pcpu *pc; 438 439 if (SCHEDULER_STOPPED()) 440 return (1); 441 442 tracker->rmp_flags = 0; 443 tracker->rmp_thread = td; 444 tracker->rmp_rmlock = rm; 445 446 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 447 THREAD_NO_SLEEPING(); 448 449 td->td_critnest++; /* critical_enter(); */ 450 451 atomic_interrupt_fence(); 452 453 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 454 455 rm_tracker_add(pc, tracker); 456 457 sched_pin(); 458 459 atomic_interrupt_fence(); 460 461 td->td_critnest--; 462 463 /* 464 * Fast path to combine two common conditions into a single 465 * conditional jump. 466 */ 467 if (__predict_true(0 == (td->td_owepreempt | 468 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 469 return (1); 470 471 /* We do not have a read token and need to acquire one. */ 472 return _rm_rlock_hard(rm, tracker, trylock); 473 } 474 475 static __noinline void 476 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 477 { 478 479 if (td->td_owepreempt) { 480 td->td_critnest++; 481 critical_exit(); 482 } 483 484 if (!tracker->rmp_flags) 485 return; 486 487 mtx_lock_spin(&rm_spinlock); 488 LIST_REMOVE(tracker, rmp_qentry); 489 490 if (tracker->rmp_flags & RMPF_SIGNAL) { 491 struct rmlock *rm; 492 struct turnstile *ts; 493 494 rm = tracker->rmp_rmlock; 495 496 turnstile_chain_lock(&rm->lock_object); 497 mtx_unlock_spin(&rm_spinlock); 498 499 ts = turnstile_lookup(&rm->lock_object); 500 501 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 502 turnstile_unpend(ts); 503 turnstile_chain_unlock(&rm->lock_object); 504 } else 505 mtx_unlock_spin(&rm_spinlock); 506 } 507 508 void 509 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 510 { 511 struct pcpu *pc; 512 struct thread *td = tracker->rmp_thread; 513 514 if (SCHEDULER_STOPPED()) 515 return; 516 517 td->td_critnest++; /* critical_enter(); */ 518 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 519 rm_tracker_remove(pc, tracker); 520 td->td_critnest--; 521 sched_unpin(); 522 523 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 524 THREAD_SLEEPING_OK(); 525 526 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 527 return; 528 529 _rm_unlock_hard(td, tracker); 530 } 531 532 void 533 _rm_wlock(struct rmlock *rm) 534 { 535 struct rm_priotracker *prio; 536 struct turnstile *ts; 537 cpuset_t readcpus; 538 539 if (SCHEDULER_STOPPED()) 540 return; 541 542 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 543 sx_xlock(&rm->rm_lock_sx); 544 else 545 mtx_lock(&rm->rm_lock_mtx); 546 547 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 548 /* Get all read tokens back */ 549 readcpus = all_cpus; 550 CPU_ANDNOT(&readcpus, &rm->rm_writecpus); 551 rm->rm_writecpus = all_cpus; 552 553 /* 554 * Assumes rm->rm_writecpus update is visible on other CPUs 555 * before rm_cleanIPI is called. 556 */ 557 #ifdef SMP 558 smp_rendezvous_cpus(readcpus, 559 smp_no_rendezvous_barrier, 560 rm_cleanIPI, 561 smp_no_rendezvous_barrier, 562 rm); 563 564 #else 565 rm_cleanIPI(rm); 566 #endif 567 568 mtx_lock_spin(&rm_spinlock); 569 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 570 ts = turnstile_trywait(&rm->lock_object); 571 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 572 mtx_unlock_spin(&rm_spinlock); 573 turnstile_wait(ts, prio->rmp_thread, 574 TS_EXCLUSIVE_QUEUE); 575 mtx_lock_spin(&rm_spinlock); 576 } 577 mtx_unlock_spin(&rm_spinlock); 578 } 579 } 580 581 void 582 _rm_wunlock(struct rmlock *rm) 583 { 584 585 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 586 sx_xunlock(&rm->rm_lock_sx); 587 else 588 mtx_unlock(&rm->rm_lock_mtx); 589 } 590 591 #if LOCK_DEBUG > 0 592 593 void 594 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 595 { 596 597 if (SCHEDULER_STOPPED()) 598 return; 599 600 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 601 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 602 curthread, rm->lock_object.lo_name, file, line)); 603 KASSERT(!rm_destroyed(rm), 604 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 605 _rm_assert(rm, RA_UNLOCKED, file, line); 606 607 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 608 file, line, NULL); 609 610 _rm_wlock(rm); 611 612 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 613 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 614 TD_LOCKS_INC(curthread); 615 } 616 617 void 618 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 619 { 620 621 if (SCHEDULER_STOPPED()) 622 return; 623 624 KASSERT(!rm_destroyed(rm), 625 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 626 _rm_assert(rm, RA_WLOCKED, file, line); 627 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 628 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 629 _rm_wunlock(rm); 630 TD_LOCKS_DEC(curthread); 631 } 632 633 int 634 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 635 int trylock, const char *file, int line) 636 { 637 638 if (SCHEDULER_STOPPED()) 639 return (1); 640 641 #ifdef INVARIANTS 642 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 643 critical_enter(); 644 KASSERT(rm_trackers_present(get_pcpu(), rm, 645 curthread) == 0, 646 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 647 rm->lock_object.lo_name, file, line)); 648 critical_exit(); 649 } 650 #endif 651 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 652 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 653 curthread, rm->lock_object.lo_name, file, line)); 654 KASSERT(!rm_destroyed(rm), 655 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 656 if (!trylock) { 657 KASSERT(!rm_wowned(rm), 658 ("rm_rlock: wlock already held for %s @ %s:%d", 659 rm->lock_object.lo_name, file, line)); 660 WITNESS_CHECKORDER(&rm->lock_object, 661 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 662 } 663 664 if (_rm_rlock(rm, tracker, trylock)) { 665 if (trylock) 666 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 667 line); 668 else 669 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 670 line); 671 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 672 TD_LOCKS_INC(curthread); 673 return (1); 674 } else if (trylock) 675 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 676 677 return (0); 678 } 679 680 void 681 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 682 const char *file, int line) 683 { 684 685 if (SCHEDULER_STOPPED()) 686 return; 687 688 KASSERT(!rm_destroyed(rm), 689 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 690 _rm_assert(rm, RA_RLOCKED, file, line); 691 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 692 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 693 _rm_runlock(rm, tracker); 694 TD_LOCKS_DEC(curthread); 695 } 696 697 #else 698 699 /* 700 * Just strip out file and line arguments if no lock debugging is enabled in 701 * the kernel - we are called from a kernel module. 702 */ 703 void 704 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 705 { 706 707 _rm_wlock(rm); 708 } 709 710 void 711 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 712 { 713 714 _rm_wunlock(rm); 715 } 716 717 int 718 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 719 int trylock, const char *file, int line) 720 { 721 722 return _rm_rlock(rm, tracker, trylock); 723 } 724 725 void 726 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 727 const char *file, int line) 728 { 729 730 _rm_runlock(rm, tracker); 731 } 732 733 #endif 734 735 #ifdef INVARIANT_SUPPORT 736 #ifndef INVARIANTS 737 #undef _rm_assert 738 #endif 739 740 /* 741 * Note that this does not need to use witness_assert() for read lock 742 * assertions since an exact count of read locks held by this thread 743 * is computable. 744 */ 745 void 746 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 747 { 748 int count; 749 750 if (SCHEDULER_STOPPED()) 751 return; 752 switch (what) { 753 case RA_LOCKED: 754 case RA_LOCKED | RA_RECURSED: 755 case RA_LOCKED | RA_NOTRECURSED: 756 case RA_RLOCKED: 757 case RA_RLOCKED | RA_RECURSED: 758 case RA_RLOCKED | RA_NOTRECURSED: 759 /* 760 * Handle the write-locked case. Unlike other 761 * primitives, writers can never recurse. 762 */ 763 if (rm_wowned(rm)) { 764 if (what & RA_RLOCKED) 765 panic("Lock %s exclusively locked @ %s:%d\n", 766 rm->lock_object.lo_name, file, line); 767 if (what & RA_RECURSED) 768 panic("Lock %s not recursed @ %s:%d\n", 769 rm->lock_object.lo_name, file, line); 770 break; 771 } 772 773 critical_enter(); 774 count = rm_trackers_present(get_pcpu(), rm, curthread); 775 critical_exit(); 776 777 if (count == 0) 778 panic("Lock %s not %slocked @ %s:%d\n", 779 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 780 "read " : "", file, line); 781 if (count > 1) { 782 if (what & RA_NOTRECURSED) 783 panic("Lock %s recursed @ %s:%d\n", 784 rm->lock_object.lo_name, file, line); 785 } else if (what & RA_RECURSED) 786 panic("Lock %s not recursed @ %s:%d\n", 787 rm->lock_object.lo_name, file, line); 788 break; 789 case RA_WLOCKED: 790 if (!rm_wowned(rm)) 791 panic("Lock %s not exclusively locked @ %s:%d\n", 792 rm->lock_object.lo_name, file, line); 793 break; 794 case RA_UNLOCKED: 795 if (rm_wowned(rm)) 796 panic("Lock %s exclusively locked @ %s:%d\n", 797 rm->lock_object.lo_name, file, line); 798 799 critical_enter(); 800 count = rm_trackers_present(get_pcpu(), rm, curthread); 801 critical_exit(); 802 803 if (count != 0) 804 panic("Lock %s read locked @ %s:%d\n", 805 rm->lock_object.lo_name, file, line); 806 break; 807 default: 808 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 809 line); 810 } 811 } 812 #endif /* INVARIANT_SUPPORT */ 813 814 #ifdef DDB 815 static void 816 print_tracker(struct rm_priotracker *tr) 817 { 818 struct thread *td; 819 820 td = tr->rmp_thread; 821 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 822 td->td_proc->p_pid, td->td_name); 823 if (tr->rmp_flags & RMPF_ONQUEUE) { 824 db_printf("ONQUEUE"); 825 if (tr->rmp_flags & RMPF_SIGNAL) 826 db_printf(",SIGNAL"); 827 } else 828 db_printf("0"); 829 db_printf("}\n"); 830 } 831 832 static void 833 db_show_rm(const struct lock_object *lock) 834 { 835 struct rm_priotracker *tr; 836 struct rm_queue *queue; 837 const struct rmlock *rm; 838 struct lock_class *lc; 839 struct pcpu *pc; 840 841 rm = (const struct rmlock *)lock; 842 db_printf(" writecpus: "); 843 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 844 db_printf("\n"); 845 db_printf(" per-CPU readers:\n"); 846 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 847 for (queue = pc->pc_rm_queue.rmq_next; 848 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 849 tr = (struct rm_priotracker *)queue; 850 if (tr->rmp_rmlock == rm) 851 print_tracker(tr); 852 } 853 db_printf(" active readers:\n"); 854 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 855 print_tracker(tr); 856 lc = LOCK_CLASS(&rm->rm_wlock_object); 857 db_printf("Backing write-lock (%s):\n", lc->lc_name); 858 lc->lc_ddb_show(&rm->rm_wlock_object); 859 } 860 #endif 861 862 /* 863 * Read-mostly sleepable locks. 864 * 865 * These primitives allow both readers and writers to sleep. However, neither 866 * readers nor writers are tracked and subsequently there is no priority 867 * propagation. 868 * 869 * They are intended to be only used when write-locking is almost never needed 870 * (e.g., they can guard against unloading a kernel module) while read-locking 871 * happens all the time. 872 * 873 * Concurrent writers take turns taking the lock while going off cpu. If this is 874 * of concern for your usecase, this is not the right primitive. 875 * 876 * Neither rms_rlock nor rms_runlock use thread fences. Instead interrupt 877 * fences are inserted to ensure ordering with the code executed in the IPI 878 * handler. 879 * 880 * No attempt is made to track which CPUs read locked at least once, 881 * consequently write locking sends IPIs to all of them. This will become a 882 * problem at some point. The easiest way to lessen it is to provide a bitmap. 883 */ 884 885 #define RMS_NOOWNER ((void *)0x1) 886 #define RMS_TRANSIENT ((void *)0x2) 887 #define RMS_FLAGMASK 0xf 888 889 struct rmslock_pcpu { 890 int influx; 891 int readers; 892 }; 893 894 _Static_assert(sizeof(struct rmslock_pcpu) == 8, "bad size"); 895 896 /* 897 * Internal routines 898 */ 899 static struct rmslock_pcpu * 900 rms_int_pcpu(struct rmslock *rms) 901 { 902 903 CRITICAL_ASSERT(curthread); 904 return (zpcpu_get(rms->pcpu)); 905 } 906 907 static struct rmslock_pcpu * 908 rms_int_remote_pcpu(struct rmslock *rms, int cpu) 909 { 910 911 return (zpcpu_get_cpu(rms->pcpu, cpu)); 912 } 913 914 static void 915 rms_int_influx_enter(struct rmslock *rms, struct rmslock_pcpu *pcpu) 916 { 917 918 CRITICAL_ASSERT(curthread); 919 MPASS(pcpu->influx == 0); 920 pcpu->influx = 1; 921 } 922 923 static void 924 rms_int_influx_exit(struct rmslock *rms, struct rmslock_pcpu *pcpu) 925 { 926 927 CRITICAL_ASSERT(curthread); 928 MPASS(pcpu->influx == 1); 929 pcpu->influx = 0; 930 } 931 932 #ifdef INVARIANTS 933 static void 934 rms_int_debug_readers_inc(struct rmslock *rms) 935 { 936 int old; 937 old = atomic_fetchadd_int(&rms->debug_readers, 1); 938 KASSERT(old >= 0, ("%s: bad readers count %d\n", __func__, old)); 939 } 940 941 static void 942 rms_int_debug_readers_dec(struct rmslock *rms) 943 { 944 int old; 945 946 old = atomic_fetchadd_int(&rms->debug_readers, -1); 947 KASSERT(old > 0, ("%s: bad readers count %d\n", __func__, old)); 948 } 949 #else 950 static void 951 rms_int_debug_readers_inc(struct rmslock *rms) 952 { 953 } 954 955 static void 956 rms_int_debug_readers_dec(struct rmslock *rms) 957 { 958 } 959 #endif 960 961 static void 962 rms_int_readers_inc(struct rmslock *rms, struct rmslock_pcpu *pcpu) 963 { 964 965 CRITICAL_ASSERT(curthread); 966 rms_int_debug_readers_inc(rms); 967 pcpu->readers++; 968 } 969 970 static void 971 rms_int_readers_dec(struct rmslock *rms, struct rmslock_pcpu *pcpu) 972 { 973 974 CRITICAL_ASSERT(curthread); 975 rms_int_debug_readers_dec(rms); 976 pcpu->readers--; 977 } 978 979 /* 980 * Public API 981 */ 982 void 983 rms_init(struct rmslock *rms, const char *name) 984 { 985 986 rms->owner = RMS_NOOWNER; 987 rms->writers = 0; 988 rms->readers = 0; 989 rms->debug_readers = 0; 990 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 991 rms->pcpu = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK | M_ZERO); 992 } 993 994 void 995 rms_destroy(struct rmslock *rms) 996 { 997 998 MPASS(rms->writers == 0); 999 MPASS(rms->readers == 0); 1000 mtx_destroy(&rms->mtx); 1001 uma_zfree_pcpu(pcpu_zone_8, rms->pcpu); 1002 } 1003 1004 static void __noinline 1005 rms_rlock_fallback(struct rmslock *rms) 1006 { 1007 1008 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1009 critical_exit(); 1010 1011 mtx_lock(&rms->mtx); 1012 while (rms->writers > 0) 1013 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 1014 critical_enter(); 1015 rms_int_readers_inc(rms, rms_int_pcpu(rms)); 1016 mtx_unlock(&rms->mtx); 1017 critical_exit(); 1018 } 1019 1020 void 1021 rms_rlock(struct rmslock *rms) 1022 { 1023 struct rmslock_pcpu *pcpu; 1024 1025 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1026 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1027 1028 critical_enter(); 1029 pcpu = rms_int_pcpu(rms); 1030 rms_int_influx_enter(rms, pcpu); 1031 atomic_interrupt_fence(); 1032 if (__predict_false(rms->writers > 0)) { 1033 rms_rlock_fallback(rms); 1034 return; 1035 } 1036 atomic_interrupt_fence(); 1037 rms_int_readers_inc(rms, pcpu); 1038 atomic_interrupt_fence(); 1039 rms_int_influx_exit(rms, pcpu); 1040 critical_exit(); 1041 } 1042 1043 int 1044 rms_try_rlock(struct rmslock *rms) 1045 { 1046 struct rmslock_pcpu *pcpu; 1047 1048 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1049 1050 critical_enter(); 1051 pcpu = rms_int_pcpu(rms); 1052 rms_int_influx_enter(rms, pcpu); 1053 atomic_interrupt_fence(); 1054 if (__predict_false(rms->writers > 0)) { 1055 rms_int_influx_exit(rms, pcpu); 1056 critical_exit(); 1057 return (0); 1058 } 1059 atomic_interrupt_fence(); 1060 rms_int_readers_inc(rms, pcpu); 1061 atomic_interrupt_fence(); 1062 rms_int_influx_exit(rms, pcpu); 1063 critical_exit(); 1064 return (1); 1065 } 1066 1067 static void __noinline 1068 rms_runlock_fallback(struct rmslock *rms) 1069 { 1070 1071 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1072 critical_exit(); 1073 1074 mtx_lock(&rms->mtx); 1075 MPASS(rms->writers > 0); 1076 MPASS(rms->readers > 0); 1077 MPASS(rms->debug_readers == rms->readers); 1078 rms_int_debug_readers_dec(rms); 1079 rms->readers--; 1080 if (rms->readers == 0) 1081 wakeup_one(&rms->writers); 1082 mtx_unlock(&rms->mtx); 1083 } 1084 1085 void 1086 rms_runlock(struct rmslock *rms) 1087 { 1088 struct rmslock_pcpu *pcpu; 1089 1090 critical_enter(); 1091 pcpu = rms_int_pcpu(rms); 1092 rms_int_influx_enter(rms, pcpu); 1093 atomic_interrupt_fence(); 1094 if (__predict_false(rms->writers > 0)) { 1095 rms_runlock_fallback(rms); 1096 return; 1097 } 1098 atomic_interrupt_fence(); 1099 rms_int_readers_dec(rms, pcpu); 1100 atomic_interrupt_fence(); 1101 rms_int_influx_exit(rms, pcpu); 1102 critical_exit(); 1103 } 1104 1105 struct rmslock_ipi { 1106 struct rmslock *rms; 1107 struct smp_rendezvous_cpus_retry_arg srcra; 1108 }; 1109 1110 static void 1111 rms_action_func(void *arg) 1112 { 1113 struct rmslock_ipi *rmsipi; 1114 struct rmslock_pcpu *pcpu; 1115 struct rmslock *rms; 1116 1117 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1118 rms = rmsipi->rms; 1119 pcpu = rms_int_pcpu(rms); 1120 1121 if (pcpu->influx) 1122 return; 1123 if (pcpu->readers != 0) { 1124 atomic_add_int(&rms->readers, pcpu->readers); 1125 pcpu->readers = 0; 1126 } 1127 smp_rendezvous_cpus_done(arg); 1128 } 1129 1130 static void 1131 rms_wait_func(void *arg, int cpu) 1132 { 1133 struct rmslock_ipi *rmsipi; 1134 struct rmslock_pcpu *pcpu; 1135 struct rmslock *rms; 1136 1137 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1138 rms = rmsipi->rms; 1139 pcpu = rms_int_remote_pcpu(rms, cpu); 1140 1141 while (atomic_load_int(&pcpu->influx)) 1142 cpu_spinwait(); 1143 } 1144 1145 #ifdef INVARIANTS 1146 static void 1147 rms_assert_no_pcpu_readers(struct rmslock *rms) 1148 { 1149 struct rmslock_pcpu *pcpu; 1150 int cpu; 1151 1152 CPU_FOREACH(cpu) { 1153 pcpu = rms_int_remote_pcpu(rms, cpu); 1154 if (pcpu->readers != 0) { 1155 panic("%s: got %d readers on cpu %d\n", __func__, 1156 pcpu->readers, cpu); 1157 } 1158 } 1159 } 1160 #else 1161 static void 1162 rms_assert_no_pcpu_readers(struct rmslock *rms) 1163 { 1164 } 1165 #endif 1166 1167 static void 1168 rms_wlock_switch(struct rmslock *rms) 1169 { 1170 struct rmslock_ipi rmsipi; 1171 1172 MPASS(rms->readers == 0); 1173 MPASS(rms->writers == 1); 1174 1175 rmsipi.rms = rms; 1176 1177 smp_rendezvous_cpus_retry(all_cpus, 1178 smp_no_rendezvous_barrier, 1179 rms_action_func, 1180 smp_no_rendezvous_barrier, 1181 rms_wait_func, 1182 &rmsipi.srcra); 1183 } 1184 1185 void 1186 rms_wlock(struct rmslock *rms) 1187 { 1188 1189 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1190 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1191 1192 mtx_lock(&rms->mtx); 1193 rms->writers++; 1194 if (rms->writers > 1) { 1195 msleep(&rms->owner, &rms->mtx, (PUSER - 1), 1196 mtx_name(&rms->mtx), 0); 1197 MPASS(rms->readers == 0); 1198 KASSERT(rms->owner == RMS_TRANSIENT, 1199 ("%s: unexpected owner value %p\n", __func__, 1200 rms->owner)); 1201 goto out_grab; 1202 } 1203 1204 KASSERT(rms->owner == RMS_NOOWNER, 1205 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1206 1207 rms_wlock_switch(rms); 1208 rms_assert_no_pcpu_readers(rms); 1209 1210 if (rms->readers > 0) { 1211 msleep(&rms->writers, &rms->mtx, (PUSER - 1), 1212 mtx_name(&rms->mtx), 0); 1213 } 1214 1215 out_grab: 1216 rms->owner = curthread; 1217 rms_assert_no_pcpu_readers(rms); 1218 mtx_unlock(&rms->mtx); 1219 MPASS(rms->readers == 0); 1220 } 1221 1222 void 1223 rms_wunlock(struct rmslock *rms) 1224 { 1225 1226 mtx_lock(&rms->mtx); 1227 KASSERT(rms->owner == curthread, 1228 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1229 MPASS(rms->writers >= 1); 1230 MPASS(rms->readers == 0); 1231 rms->writers--; 1232 if (rms->writers > 0) { 1233 wakeup_one(&rms->owner); 1234 rms->owner = RMS_TRANSIENT; 1235 } else { 1236 wakeup(&rms->readers); 1237 rms->owner = RMS_NOOWNER; 1238 } 1239 mtx_unlock(&rms->mtx); 1240 } 1241 1242 void 1243 rms_unlock(struct rmslock *rms) 1244 { 1245 1246 if (rms_wowned(rms)) 1247 rms_wunlock(rms); 1248 else 1249 rms_runlock(rms); 1250 } 1251