1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 #include "opt_ddb.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 42 #include <sys/kernel.h> 43 #include <sys/kdb.h> 44 #include <sys/ktr.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/rmlock.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/turnstile.h> 52 #include <sys/lock_profile.h> 53 #include <machine/cpu.h> 54 #include <vm/uma.h> 55 56 #ifdef DDB 57 #include <ddb/ddb.h> 58 #endif 59 60 /* 61 * A cookie to mark destroyed rmlocks. This is stored in the head of 62 * rm_activeReaders. 63 */ 64 #define RM_DESTROYED ((void *)0xdead) 65 66 #define rm_destroyed(rm) \ 67 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 68 69 #define RMPF_ONQUEUE 1 70 #define RMPF_SIGNAL 2 71 72 #ifndef INVARIANTS 73 #define _rm_assert(c, what, file, line) 74 #endif 75 76 static void assert_rm(const struct lock_object *lock, int what); 77 #ifdef DDB 78 static void db_show_rm(const struct lock_object *lock); 79 #endif 80 static void lock_rm(struct lock_object *lock, uintptr_t how); 81 #ifdef KDTRACE_HOOKS 82 static int owner_rm(const struct lock_object *lock, struct thread **owner); 83 #endif 84 static uintptr_t unlock_rm(struct lock_object *lock); 85 86 struct lock_class lock_class_rm = { 87 .lc_name = "rm", 88 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 89 .lc_assert = assert_rm, 90 #ifdef DDB 91 .lc_ddb_show = db_show_rm, 92 #endif 93 .lc_lock = lock_rm, 94 .lc_unlock = unlock_rm, 95 #ifdef KDTRACE_HOOKS 96 .lc_owner = owner_rm, 97 #endif 98 }; 99 100 struct lock_class lock_class_rm_sleepable = { 101 .lc_name = "sleepable rm", 102 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 103 .lc_assert = assert_rm, 104 #ifdef DDB 105 .lc_ddb_show = db_show_rm, 106 #endif 107 .lc_lock = lock_rm, 108 .lc_unlock = unlock_rm, 109 #ifdef KDTRACE_HOOKS 110 .lc_owner = owner_rm, 111 #endif 112 }; 113 114 static void 115 assert_rm(const struct lock_object *lock, int what) 116 { 117 118 rm_assert((const struct rmlock *)lock, what); 119 } 120 121 static void 122 lock_rm(struct lock_object *lock, uintptr_t how) 123 { 124 struct rmlock *rm; 125 struct rm_priotracker *tracker; 126 127 rm = (struct rmlock *)lock; 128 if (how == 0) 129 rm_wlock(rm); 130 else { 131 tracker = (struct rm_priotracker *)how; 132 rm_rlock(rm, tracker); 133 } 134 } 135 136 static uintptr_t 137 unlock_rm(struct lock_object *lock) 138 { 139 struct thread *td; 140 struct pcpu *pc; 141 struct rmlock *rm; 142 struct rm_queue *queue; 143 struct rm_priotracker *tracker; 144 uintptr_t how; 145 146 rm = (struct rmlock *)lock; 147 tracker = NULL; 148 how = 0; 149 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 150 if (rm_wowned(rm)) 151 rm_wunlock(rm); 152 else { 153 /* 154 * Find the right rm_priotracker structure for curthread. 155 * The guarantee about its uniqueness is given by the fact 156 * we already asserted the lock wasn't recursively acquired. 157 */ 158 critical_enter(); 159 td = curthread; 160 pc = get_pcpu(); 161 for (queue = pc->pc_rm_queue.rmq_next; 162 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 163 tracker = (struct rm_priotracker *)queue; 164 if ((tracker->rmp_rmlock == rm) && 165 (tracker->rmp_thread == td)) { 166 how = (uintptr_t)tracker; 167 break; 168 } 169 } 170 KASSERT(tracker != NULL, 171 ("rm_priotracker is non-NULL when lock held in read mode")); 172 critical_exit(); 173 rm_runlock(rm, tracker); 174 } 175 return (how); 176 } 177 178 #ifdef KDTRACE_HOOKS 179 static int 180 owner_rm(const struct lock_object *lock, struct thread **owner) 181 { 182 const struct rmlock *rm; 183 struct lock_class *lc; 184 185 rm = (const struct rmlock *)lock; 186 lc = LOCK_CLASS(&rm->rm_wlock_object); 187 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 188 } 189 #endif 190 191 static struct mtx rm_spinlock; 192 193 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 194 195 /* 196 * Add or remove tracker from per-cpu list. 197 * 198 * The per-cpu list can be traversed at any time in forward direction from an 199 * interrupt on the *local* cpu. 200 */ 201 static void inline 202 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 203 { 204 struct rm_queue *next; 205 206 /* Initialize all tracker pointers */ 207 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 208 next = pc->pc_rm_queue.rmq_next; 209 tracker->rmp_cpuQueue.rmq_next = next; 210 211 /* rmq_prev is not used during froward traversal. */ 212 next->rmq_prev = &tracker->rmp_cpuQueue; 213 214 /* Update pointer to first element. */ 215 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 216 } 217 218 /* 219 * Return a count of the number of trackers the thread 'td' already 220 * has on this CPU for the lock 'rm'. 221 */ 222 static int 223 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 224 const struct thread *td) 225 { 226 struct rm_queue *queue; 227 struct rm_priotracker *tracker; 228 int count; 229 230 count = 0; 231 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 232 queue = queue->rmq_next) { 233 tracker = (struct rm_priotracker *)queue; 234 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 235 count++; 236 } 237 return (count); 238 } 239 240 static void inline 241 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 242 { 243 struct rm_queue *next, *prev; 244 245 next = tracker->rmp_cpuQueue.rmq_next; 246 prev = tracker->rmp_cpuQueue.rmq_prev; 247 248 /* Not used during forward traversal. */ 249 next->rmq_prev = prev; 250 251 /* Remove from list. */ 252 prev->rmq_next = next; 253 } 254 255 static void 256 rm_cleanIPI(void *arg) 257 { 258 struct pcpu *pc; 259 struct rmlock *rm = arg; 260 struct rm_priotracker *tracker; 261 struct rm_queue *queue; 262 pc = get_pcpu(); 263 264 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 265 queue = queue->rmq_next) { 266 tracker = (struct rm_priotracker *)queue; 267 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 268 tracker->rmp_flags = RMPF_ONQUEUE; 269 mtx_lock_spin(&rm_spinlock); 270 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 271 rmp_qentry); 272 mtx_unlock_spin(&rm_spinlock); 273 } 274 } 275 } 276 277 void 278 rm_init_flags(struct rmlock *rm, const char *name, int opts) 279 { 280 struct lock_class *lc; 281 int liflags, xflags; 282 283 liflags = 0; 284 if (!(opts & RM_NOWITNESS)) 285 liflags |= LO_WITNESS; 286 if (opts & RM_RECURSE) 287 liflags |= LO_RECURSABLE; 288 if (opts & RM_NEW) 289 liflags |= LO_NEW; 290 if (opts & RM_DUPOK) 291 liflags |= LO_DUPOK; 292 rm->rm_writecpus = all_cpus; 293 LIST_INIT(&rm->rm_activeReaders); 294 if (opts & RM_SLEEPABLE) { 295 liflags |= LO_SLEEPABLE; 296 lc = &lock_class_rm_sleepable; 297 xflags = (opts & RM_NEW ? SX_NEW : 0); 298 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 299 xflags | SX_NOWITNESS); 300 } else { 301 lc = &lock_class_rm; 302 xflags = (opts & RM_NEW ? MTX_NEW : 0); 303 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 304 xflags | MTX_NOWITNESS); 305 } 306 lock_init(&rm->lock_object, lc, name, NULL, liflags); 307 } 308 309 void 310 rm_init(struct rmlock *rm, const char *name) 311 { 312 313 rm_init_flags(rm, name, 0); 314 } 315 316 void 317 rm_destroy(struct rmlock *rm) 318 { 319 320 rm_assert(rm, RA_UNLOCKED); 321 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 322 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 323 sx_destroy(&rm->rm_lock_sx); 324 else 325 mtx_destroy(&rm->rm_lock_mtx); 326 lock_destroy(&rm->lock_object); 327 } 328 329 int 330 rm_wowned(const struct rmlock *rm) 331 { 332 333 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 334 return (sx_xlocked(&rm->rm_lock_sx)); 335 else 336 return (mtx_owned(&rm->rm_lock_mtx)); 337 } 338 339 void 340 rm_sysinit(void *arg) 341 { 342 struct rm_args *args; 343 344 args = arg; 345 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 346 } 347 348 static __noinline int 349 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 350 { 351 struct pcpu *pc; 352 353 critical_enter(); 354 pc = get_pcpu(); 355 356 /* Check if we just need to do a proper critical_exit. */ 357 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 358 critical_exit(); 359 return (1); 360 } 361 362 /* Remove our tracker from the per-cpu list. */ 363 rm_tracker_remove(pc, tracker); 364 365 /* 366 * Check to see if the IPI granted us the lock after all. The load of 367 * rmp_flags must happen after the tracker is removed from the list. 368 */ 369 atomic_interrupt_fence(); 370 if (tracker->rmp_flags) { 371 /* Just add back tracker - we hold the lock. */ 372 rm_tracker_add(pc, tracker); 373 critical_exit(); 374 return (1); 375 } 376 377 /* 378 * We allow readers to acquire a lock even if a writer is blocked if 379 * the lock is recursive and the reader already holds the lock. 380 */ 381 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 382 /* 383 * Just grant the lock if this thread already has a tracker 384 * for this lock on the per-cpu queue. 385 */ 386 if (rm_trackers_present(pc, rm, curthread) != 0) { 387 mtx_lock_spin(&rm_spinlock); 388 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 389 rmp_qentry); 390 tracker->rmp_flags = RMPF_ONQUEUE; 391 mtx_unlock_spin(&rm_spinlock); 392 rm_tracker_add(pc, tracker); 393 critical_exit(); 394 return (1); 395 } 396 } 397 398 sched_unpin(); 399 critical_exit(); 400 401 if (trylock) { 402 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 403 if (!sx_try_xlock(&rm->rm_lock_sx)) 404 return (0); 405 } else { 406 if (!mtx_trylock(&rm->rm_lock_mtx)) 407 return (0); 408 } 409 } else { 410 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 411 THREAD_SLEEPING_OK(); 412 sx_xlock(&rm->rm_lock_sx); 413 THREAD_NO_SLEEPING(); 414 } else 415 mtx_lock(&rm->rm_lock_mtx); 416 } 417 418 critical_enter(); 419 pc = get_pcpu(); 420 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 421 rm_tracker_add(pc, tracker); 422 sched_pin(); 423 critical_exit(); 424 425 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 426 sx_xunlock(&rm->rm_lock_sx); 427 else 428 mtx_unlock(&rm->rm_lock_mtx); 429 430 return (1); 431 } 432 433 int 434 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 435 { 436 struct thread *td = curthread; 437 struct pcpu *pc; 438 439 if (SCHEDULER_STOPPED()) 440 return (1); 441 442 tracker->rmp_flags = 0; 443 tracker->rmp_thread = td; 444 tracker->rmp_rmlock = rm; 445 446 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 447 THREAD_NO_SLEEPING(); 448 449 td->td_critnest++; /* critical_enter(); */ 450 atomic_interrupt_fence(); 451 452 pc = cpuid_to_pcpu[td->td_oncpu]; 453 rm_tracker_add(pc, tracker); 454 sched_pin(); 455 456 atomic_interrupt_fence(); 457 td->td_critnest--; 458 459 /* 460 * Fast path to combine two common conditions into a single 461 * conditional jump. 462 */ 463 if (__predict_true(0 == (td->td_owepreempt | 464 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 465 return (1); 466 467 /* We do not have a read token and need to acquire one. */ 468 return _rm_rlock_hard(rm, tracker, trylock); 469 } 470 471 static __noinline void 472 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 473 { 474 475 if (td->td_owepreempt) { 476 td->td_critnest++; 477 critical_exit(); 478 } 479 480 if (!tracker->rmp_flags) 481 return; 482 483 mtx_lock_spin(&rm_spinlock); 484 LIST_REMOVE(tracker, rmp_qentry); 485 486 if (tracker->rmp_flags & RMPF_SIGNAL) { 487 struct rmlock *rm; 488 struct turnstile *ts; 489 490 rm = tracker->rmp_rmlock; 491 492 turnstile_chain_lock(&rm->lock_object); 493 mtx_unlock_spin(&rm_spinlock); 494 495 ts = turnstile_lookup(&rm->lock_object); 496 497 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 498 turnstile_unpend(ts); 499 turnstile_chain_unlock(&rm->lock_object); 500 } else 501 mtx_unlock_spin(&rm_spinlock); 502 } 503 504 void 505 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 506 { 507 struct pcpu *pc; 508 struct thread *td = tracker->rmp_thread; 509 510 if (SCHEDULER_STOPPED()) 511 return; 512 513 td->td_critnest++; /* critical_enter(); */ 514 atomic_interrupt_fence(); 515 516 pc = cpuid_to_pcpu[td->td_oncpu]; 517 rm_tracker_remove(pc, tracker); 518 519 atomic_interrupt_fence(); 520 td->td_critnest--; 521 sched_unpin(); 522 523 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 524 THREAD_SLEEPING_OK(); 525 526 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 527 return; 528 529 _rm_unlock_hard(td, tracker); 530 } 531 532 void 533 _rm_wlock(struct rmlock *rm) 534 { 535 struct rm_priotracker *prio; 536 struct turnstile *ts; 537 cpuset_t readcpus; 538 539 if (SCHEDULER_STOPPED()) 540 return; 541 542 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 543 sx_xlock(&rm->rm_lock_sx); 544 else 545 mtx_lock(&rm->rm_lock_mtx); 546 547 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 548 /* Get all read tokens back */ 549 readcpus = all_cpus; 550 CPU_ANDNOT(&readcpus, &readcpus, &rm->rm_writecpus); 551 rm->rm_writecpus = all_cpus; 552 553 /* 554 * Assumes rm->rm_writecpus update is visible on other CPUs 555 * before rm_cleanIPI is called. 556 */ 557 #ifdef SMP 558 smp_rendezvous_cpus(readcpus, 559 smp_no_rendezvous_barrier, 560 rm_cleanIPI, 561 smp_no_rendezvous_barrier, 562 rm); 563 564 #else 565 rm_cleanIPI(rm); 566 #endif 567 568 mtx_lock_spin(&rm_spinlock); 569 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 570 ts = turnstile_trywait(&rm->lock_object); 571 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 572 mtx_unlock_spin(&rm_spinlock); 573 turnstile_wait(ts, prio->rmp_thread, 574 TS_EXCLUSIVE_QUEUE); 575 mtx_lock_spin(&rm_spinlock); 576 } 577 mtx_unlock_spin(&rm_spinlock); 578 } 579 } 580 581 void 582 _rm_wunlock(struct rmlock *rm) 583 { 584 585 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 586 sx_xunlock(&rm->rm_lock_sx); 587 else 588 mtx_unlock(&rm->rm_lock_mtx); 589 } 590 591 #if LOCK_DEBUG > 0 592 593 void 594 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 595 { 596 597 if (SCHEDULER_STOPPED()) 598 return; 599 600 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 601 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 602 curthread, rm->lock_object.lo_name, file, line)); 603 KASSERT(!rm_destroyed(rm), 604 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 605 _rm_assert(rm, RA_UNLOCKED, file, line); 606 607 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 608 file, line, NULL); 609 610 _rm_wlock(rm); 611 612 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 613 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 614 TD_LOCKS_INC(curthread); 615 } 616 617 void 618 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 619 { 620 621 if (SCHEDULER_STOPPED()) 622 return; 623 624 KASSERT(!rm_destroyed(rm), 625 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 626 _rm_assert(rm, RA_WLOCKED, file, line); 627 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 628 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 629 _rm_wunlock(rm); 630 TD_LOCKS_DEC(curthread); 631 } 632 633 int 634 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 635 int trylock, const char *file, int line) 636 { 637 638 if (SCHEDULER_STOPPED()) 639 return (1); 640 641 #ifdef INVARIANTS 642 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 643 critical_enter(); 644 KASSERT(rm_trackers_present(get_pcpu(), rm, 645 curthread) == 0, 646 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 647 rm->lock_object.lo_name, file, line)); 648 critical_exit(); 649 } 650 #endif 651 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 652 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 653 curthread, rm->lock_object.lo_name, file, line)); 654 KASSERT(!rm_destroyed(rm), 655 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 656 if (!trylock) { 657 KASSERT(!rm_wowned(rm), 658 ("rm_rlock: wlock already held for %s @ %s:%d", 659 rm->lock_object.lo_name, file, line)); 660 WITNESS_CHECKORDER(&rm->lock_object, 661 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 662 } 663 664 if (_rm_rlock(rm, tracker, trylock)) { 665 if (trylock) 666 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 667 line); 668 else 669 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 670 line); 671 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 672 TD_LOCKS_INC(curthread); 673 return (1); 674 } else if (trylock) 675 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 676 677 return (0); 678 } 679 680 void 681 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 682 const char *file, int line) 683 { 684 685 if (SCHEDULER_STOPPED()) 686 return; 687 688 KASSERT(!rm_destroyed(rm), 689 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 690 _rm_assert(rm, RA_RLOCKED, file, line); 691 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 692 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 693 _rm_runlock(rm, tracker); 694 TD_LOCKS_DEC(curthread); 695 } 696 697 #else 698 699 /* 700 * Just strip out file and line arguments if no lock debugging is enabled in 701 * the kernel - we are called from a kernel module. 702 */ 703 void 704 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 705 { 706 707 _rm_wlock(rm); 708 } 709 710 void 711 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 712 { 713 714 _rm_wunlock(rm); 715 } 716 717 int 718 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 719 int trylock, const char *file, int line) 720 { 721 722 return _rm_rlock(rm, tracker, trylock); 723 } 724 725 void 726 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 727 const char *file, int line) 728 { 729 730 _rm_runlock(rm, tracker); 731 } 732 733 #endif 734 735 #ifdef INVARIANT_SUPPORT 736 #ifndef INVARIANTS 737 #undef _rm_assert 738 #endif 739 740 /* 741 * Note that this does not need to use witness_assert() for read lock 742 * assertions since an exact count of read locks held by this thread 743 * is computable. 744 */ 745 void 746 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 747 { 748 int count; 749 750 if (SCHEDULER_STOPPED()) 751 return; 752 switch (what) { 753 case RA_LOCKED: 754 case RA_LOCKED | RA_RECURSED: 755 case RA_LOCKED | RA_NOTRECURSED: 756 case RA_RLOCKED: 757 case RA_RLOCKED | RA_RECURSED: 758 case RA_RLOCKED | RA_NOTRECURSED: 759 /* 760 * Handle the write-locked case. Unlike other 761 * primitives, writers can never recurse. 762 */ 763 if (rm_wowned(rm)) { 764 if (what & RA_RLOCKED) 765 panic("Lock %s exclusively locked @ %s:%d\n", 766 rm->lock_object.lo_name, file, line); 767 if (what & RA_RECURSED) 768 panic("Lock %s not recursed @ %s:%d\n", 769 rm->lock_object.lo_name, file, line); 770 break; 771 } 772 773 critical_enter(); 774 count = rm_trackers_present(get_pcpu(), rm, curthread); 775 critical_exit(); 776 777 if (count == 0) 778 panic("Lock %s not %slocked @ %s:%d\n", 779 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 780 "read " : "", file, line); 781 if (count > 1) { 782 if (what & RA_NOTRECURSED) 783 panic("Lock %s recursed @ %s:%d\n", 784 rm->lock_object.lo_name, file, line); 785 } else if (what & RA_RECURSED) 786 panic("Lock %s not recursed @ %s:%d\n", 787 rm->lock_object.lo_name, file, line); 788 break; 789 case RA_WLOCKED: 790 if (!rm_wowned(rm)) 791 panic("Lock %s not exclusively locked @ %s:%d\n", 792 rm->lock_object.lo_name, file, line); 793 break; 794 case RA_UNLOCKED: 795 if (rm_wowned(rm)) 796 panic("Lock %s exclusively locked @ %s:%d\n", 797 rm->lock_object.lo_name, file, line); 798 799 critical_enter(); 800 count = rm_trackers_present(get_pcpu(), rm, curthread); 801 critical_exit(); 802 803 if (count != 0) 804 panic("Lock %s read locked @ %s:%d\n", 805 rm->lock_object.lo_name, file, line); 806 break; 807 default: 808 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 809 line); 810 } 811 } 812 #endif /* INVARIANT_SUPPORT */ 813 814 #ifdef DDB 815 static void 816 print_tracker(struct rm_priotracker *tr) 817 { 818 struct thread *td; 819 820 td = tr->rmp_thread; 821 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 822 td->td_proc->p_pid, td->td_name); 823 if (tr->rmp_flags & RMPF_ONQUEUE) { 824 db_printf("ONQUEUE"); 825 if (tr->rmp_flags & RMPF_SIGNAL) 826 db_printf(",SIGNAL"); 827 } else 828 db_printf("0"); 829 db_printf("}\n"); 830 } 831 832 static void 833 db_show_rm(const struct lock_object *lock) 834 { 835 struct rm_priotracker *tr; 836 struct rm_queue *queue; 837 const struct rmlock *rm; 838 struct lock_class *lc; 839 struct pcpu *pc; 840 841 rm = (const struct rmlock *)lock; 842 db_printf(" writecpus: "); 843 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 844 db_printf("\n"); 845 db_printf(" per-CPU readers:\n"); 846 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 847 for (queue = pc->pc_rm_queue.rmq_next; 848 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 849 tr = (struct rm_priotracker *)queue; 850 if (tr->rmp_rmlock == rm) 851 print_tracker(tr); 852 } 853 db_printf(" active readers:\n"); 854 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 855 print_tracker(tr); 856 lc = LOCK_CLASS(&rm->rm_wlock_object); 857 db_printf("Backing write-lock (%s):\n", lc->lc_name); 858 lc->lc_ddb_show(&rm->rm_wlock_object); 859 } 860 #endif 861 862 /* 863 * Read-mostly sleepable locks. 864 * 865 * These primitives allow both readers and writers to sleep. However, neither 866 * readers nor writers are tracked and subsequently there is no priority 867 * propagation. 868 * 869 * They are intended to be only used when write-locking is almost never needed 870 * (e.g., they can guard against unloading a kernel module) while read-locking 871 * happens all the time. 872 * 873 * Concurrent writers take turns taking the lock while going off cpu. If this is 874 * of concern for your usecase, this is not the right primitive. 875 * 876 * Neither rms_rlock nor rms_runlock use thread fences. Instead interrupt 877 * fences are inserted to ensure ordering with the code executed in the IPI 878 * handler. 879 * 880 * No attempt is made to track which CPUs read locked at least once, 881 * consequently write locking sends IPIs to all of them. This will become a 882 * problem at some point. The easiest way to lessen it is to provide a bitmap. 883 */ 884 885 #define RMS_NOOWNER ((void *)0x1) 886 #define RMS_TRANSIENT ((void *)0x2) 887 #define RMS_FLAGMASK 0xf 888 889 struct rmslock_pcpu { 890 int influx; 891 int readers; 892 }; 893 894 _Static_assert(sizeof(struct rmslock_pcpu) == 8, "bad size"); 895 896 /* 897 * Internal routines 898 */ 899 static struct rmslock_pcpu * 900 rms_int_pcpu(struct rmslock *rms) 901 { 902 903 CRITICAL_ASSERT(curthread); 904 return (zpcpu_get(rms->pcpu)); 905 } 906 907 static struct rmslock_pcpu * 908 rms_int_remote_pcpu(struct rmslock *rms, int cpu) 909 { 910 911 return (zpcpu_get_cpu(rms->pcpu, cpu)); 912 } 913 914 static void 915 rms_int_influx_enter(struct rmslock *rms, struct rmslock_pcpu *pcpu) 916 { 917 918 CRITICAL_ASSERT(curthread); 919 MPASS(pcpu->influx == 0); 920 pcpu->influx = 1; 921 } 922 923 static void 924 rms_int_influx_exit(struct rmslock *rms, struct rmslock_pcpu *pcpu) 925 { 926 927 CRITICAL_ASSERT(curthread); 928 MPASS(pcpu->influx == 1); 929 pcpu->influx = 0; 930 } 931 932 #ifdef INVARIANTS 933 static void 934 rms_int_debug_readers_inc(struct rmslock *rms) 935 { 936 int old; 937 old = atomic_fetchadd_int(&rms->debug_readers, 1); 938 KASSERT(old >= 0, ("%s: bad readers count %d\n", __func__, old)); 939 } 940 941 static void 942 rms_int_debug_readers_dec(struct rmslock *rms) 943 { 944 int old; 945 946 old = atomic_fetchadd_int(&rms->debug_readers, -1); 947 KASSERT(old > 0, ("%s: bad readers count %d\n", __func__, old)); 948 } 949 #else 950 static void 951 rms_int_debug_readers_inc(struct rmslock *rms) 952 { 953 } 954 955 static void 956 rms_int_debug_readers_dec(struct rmslock *rms) 957 { 958 } 959 #endif 960 961 static void 962 rms_int_readers_inc(struct rmslock *rms, struct rmslock_pcpu *pcpu) 963 { 964 965 CRITICAL_ASSERT(curthread); 966 rms_int_debug_readers_inc(rms); 967 pcpu->readers++; 968 } 969 970 static void 971 rms_int_readers_dec(struct rmslock *rms, struct rmslock_pcpu *pcpu) 972 { 973 974 CRITICAL_ASSERT(curthread); 975 rms_int_debug_readers_dec(rms); 976 pcpu->readers--; 977 } 978 979 /* 980 * Public API 981 */ 982 void 983 rms_init(struct rmslock *rms, const char *name) 984 { 985 986 rms->owner = RMS_NOOWNER; 987 rms->writers = 0; 988 rms->readers = 0; 989 rms->debug_readers = 0; 990 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 991 rms->pcpu = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK | M_ZERO); 992 } 993 994 void 995 rms_destroy(struct rmslock *rms) 996 { 997 998 MPASS(rms->writers == 0); 999 MPASS(rms->readers == 0); 1000 mtx_destroy(&rms->mtx); 1001 uma_zfree_pcpu(pcpu_zone_8, rms->pcpu); 1002 } 1003 1004 static void __noinline 1005 rms_rlock_fallback(struct rmslock *rms) 1006 { 1007 1008 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1009 critical_exit(); 1010 1011 mtx_lock(&rms->mtx); 1012 while (rms->writers > 0) 1013 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 1014 critical_enter(); 1015 rms_int_readers_inc(rms, rms_int_pcpu(rms)); 1016 mtx_unlock(&rms->mtx); 1017 critical_exit(); 1018 TD_LOCKS_INC(curthread); 1019 } 1020 1021 void 1022 rms_rlock(struct rmslock *rms) 1023 { 1024 struct rmslock_pcpu *pcpu; 1025 1026 rms_assert_rlock_ok(rms); 1027 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1028 1029 critical_enter(); 1030 pcpu = rms_int_pcpu(rms); 1031 rms_int_influx_enter(rms, pcpu); 1032 atomic_interrupt_fence(); 1033 if (__predict_false(rms->writers > 0)) { 1034 rms_rlock_fallback(rms); 1035 return; 1036 } 1037 atomic_interrupt_fence(); 1038 rms_int_readers_inc(rms, pcpu); 1039 atomic_interrupt_fence(); 1040 rms_int_influx_exit(rms, pcpu); 1041 critical_exit(); 1042 TD_LOCKS_INC(curthread); 1043 } 1044 1045 int 1046 rms_try_rlock(struct rmslock *rms) 1047 { 1048 struct rmslock_pcpu *pcpu; 1049 1050 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1051 1052 critical_enter(); 1053 pcpu = rms_int_pcpu(rms); 1054 rms_int_influx_enter(rms, pcpu); 1055 atomic_interrupt_fence(); 1056 if (__predict_false(rms->writers > 0)) { 1057 rms_int_influx_exit(rms, pcpu); 1058 critical_exit(); 1059 return (0); 1060 } 1061 atomic_interrupt_fence(); 1062 rms_int_readers_inc(rms, pcpu); 1063 atomic_interrupt_fence(); 1064 rms_int_influx_exit(rms, pcpu); 1065 critical_exit(); 1066 TD_LOCKS_INC(curthread); 1067 return (1); 1068 } 1069 1070 static void __noinline 1071 rms_runlock_fallback(struct rmslock *rms) 1072 { 1073 1074 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1075 critical_exit(); 1076 1077 mtx_lock(&rms->mtx); 1078 MPASS(rms->writers > 0); 1079 MPASS(rms->readers > 0); 1080 MPASS(rms->debug_readers == rms->readers); 1081 rms_int_debug_readers_dec(rms); 1082 rms->readers--; 1083 if (rms->readers == 0) 1084 wakeup_one(&rms->writers); 1085 mtx_unlock(&rms->mtx); 1086 TD_LOCKS_DEC(curthread); 1087 } 1088 1089 void 1090 rms_runlock(struct rmslock *rms) 1091 { 1092 struct rmslock_pcpu *pcpu; 1093 1094 critical_enter(); 1095 pcpu = rms_int_pcpu(rms); 1096 rms_int_influx_enter(rms, pcpu); 1097 atomic_interrupt_fence(); 1098 if (__predict_false(rms->writers > 0)) { 1099 rms_runlock_fallback(rms); 1100 return; 1101 } 1102 atomic_interrupt_fence(); 1103 rms_int_readers_dec(rms, pcpu); 1104 atomic_interrupt_fence(); 1105 rms_int_influx_exit(rms, pcpu); 1106 critical_exit(); 1107 TD_LOCKS_DEC(curthread); 1108 } 1109 1110 struct rmslock_ipi { 1111 struct rmslock *rms; 1112 struct smp_rendezvous_cpus_retry_arg srcra; 1113 }; 1114 1115 static void 1116 rms_action_func(void *arg) 1117 { 1118 struct rmslock_ipi *rmsipi; 1119 struct rmslock_pcpu *pcpu; 1120 struct rmslock *rms; 1121 1122 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1123 rms = rmsipi->rms; 1124 pcpu = rms_int_pcpu(rms); 1125 1126 if (pcpu->influx) 1127 return; 1128 if (pcpu->readers != 0) { 1129 atomic_add_int(&rms->readers, pcpu->readers); 1130 pcpu->readers = 0; 1131 } 1132 smp_rendezvous_cpus_done(arg); 1133 } 1134 1135 static void 1136 rms_wait_func(void *arg, int cpu) 1137 { 1138 struct rmslock_ipi *rmsipi; 1139 struct rmslock_pcpu *pcpu; 1140 struct rmslock *rms; 1141 1142 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1143 rms = rmsipi->rms; 1144 pcpu = rms_int_remote_pcpu(rms, cpu); 1145 1146 while (atomic_load_int(&pcpu->influx)) 1147 cpu_spinwait(); 1148 } 1149 1150 #ifdef INVARIANTS 1151 static void 1152 rms_assert_no_pcpu_readers(struct rmslock *rms) 1153 { 1154 struct rmslock_pcpu *pcpu; 1155 int cpu; 1156 1157 CPU_FOREACH(cpu) { 1158 pcpu = rms_int_remote_pcpu(rms, cpu); 1159 if (pcpu->readers != 0) { 1160 panic("%s: got %d readers on cpu %d\n", __func__, 1161 pcpu->readers, cpu); 1162 } 1163 } 1164 } 1165 #else 1166 static void 1167 rms_assert_no_pcpu_readers(struct rmslock *rms) 1168 { 1169 } 1170 #endif 1171 1172 static void 1173 rms_wlock_switch(struct rmslock *rms) 1174 { 1175 struct rmslock_ipi rmsipi; 1176 1177 MPASS(rms->readers == 0); 1178 MPASS(rms->writers == 1); 1179 1180 rmsipi.rms = rms; 1181 1182 smp_rendezvous_cpus_retry(all_cpus, 1183 smp_no_rendezvous_barrier, 1184 rms_action_func, 1185 smp_no_rendezvous_barrier, 1186 rms_wait_func, 1187 &rmsipi.srcra); 1188 } 1189 1190 void 1191 rms_wlock(struct rmslock *rms) 1192 { 1193 1194 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1195 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1196 1197 mtx_lock(&rms->mtx); 1198 rms->writers++; 1199 if (rms->writers > 1) { 1200 msleep(&rms->owner, &rms->mtx, (PUSER - 1), 1201 mtx_name(&rms->mtx), 0); 1202 MPASS(rms->readers == 0); 1203 KASSERT(rms->owner == RMS_TRANSIENT, 1204 ("%s: unexpected owner value %p\n", __func__, 1205 rms->owner)); 1206 goto out_grab; 1207 } 1208 1209 KASSERT(rms->owner == RMS_NOOWNER, 1210 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1211 1212 rms_wlock_switch(rms); 1213 rms_assert_no_pcpu_readers(rms); 1214 1215 if (rms->readers > 0) { 1216 msleep(&rms->writers, &rms->mtx, (PUSER - 1), 1217 mtx_name(&rms->mtx), 0); 1218 } 1219 1220 out_grab: 1221 rms->owner = curthread; 1222 rms_assert_no_pcpu_readers(rms); 1223 mtx_unlock(&rms->mtx); 1224 MPASS(rms->readers == 0); 1225 TD_LOCKS_INC(curthread); 1226 } 1227 1228 void 1229 rms_wunlock(struct rmslock *rms) 1230 { 1231 1232 mtx_lock(&rms->mtx); 1233 KASSERT(rms->owner == curthread, 1234 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1235 MPASS(rms->writers >= 1); 1236 MPASS(rms->readers == 0); 1237 rms->writers--; 1238 if (rms->writers > 0) { 1239 wakeup_one(&rms->owner); 1240 rms->owner = RMS_TRANSIENT; 1241 } else { 1242 wakeup(&rms->readers); 1243 rms->owner = RMS_NOOWNER; 1244 } 1245 mtx_unlock(&rms->mtx); 1246 TD_LOCKS_DEC(curthread); 1247 } 1248 1249 void 1250 rms_unlock(struct rmslock *rms) 1251 { 1252 1253 if (rms_wowned(rms)) 1254 rms_wunlock(rms); 1255 else 1256 rms_runlock(rms); 1257 } 1258