1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 44 #include <sys/kernel.h> 45 #include <sys/kdb.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/proc.h> 50 #include <sys/rmlock.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/turnstile.h> 54 #include <sys/lock_profile.h> 55 #include <machine/cpu.h> 56 #include <vm/uma.h> 57 58 #ifdef DDB 59 #include <ddb/ddb.h> 60 #endif 61 62 /* 63 * A cookie to mark destroyed rmlocks. This is stored in the head of 64 * rm_activeReaders. 65 */ 66 #define RM_DESTROYED ((void *)0xdead) 67 68 #define rm_destroyed(rm) \ 69 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 70 71 #define RMPF_ONQUEUE 1 72 #define RMPF_SIGNAL 2 73 74 #ifndef INVARIANTS 75 #define _rm_assert(c, what, file, line) 76 #endif 77 78 static void assert_rm(const struct lock_object *lock, int what); 79 #ifdef DDB 80 static void db_show_rm(const struct lock_object *lock); 81 #endif 82 static void lock_rm(struct lock_object *lock, uintptr_t how); 83 #ifdef KDTRACE_HOOKS 84 static int owner_rm(const struct lock_object *lock, struct thread **owner); 85 #endif 86 static uintptr_t unlock_rm(struct lock_object *lock); 87 88 struct lock_class lock_class_rm = { 89 .lc_name = "rm", 90 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 91 .lc_assert = assert_rm, 92 #ifdef DDB 93 .lc_ddb_show = db_show_rm, 94 #endif 95 .lc_lock = lock_rm, 96 .lc_unlock = unlock_rm, 97 #ifdef KDTRACE_HOOKS 98 .lc_owner = owner_rm, 99 #endif 100 }; 101 102 struct lock_class lock_class_rm_sleepable = { 103 .lc_name = "sleepable rm", 104 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 105 .lc_assert = assert_rm, 106 #ifdef DDB 107 .lc_ddb_show = db_show_rm, 108 #endif 109 .lc_lock = lock_rm, 110 .lc_unlock = unlock_rm, 111 #ifdef KDTRACE_HOOKS 112 .lc_owner = owner_rm, 113 #endif 114 }; 115 116 static void 117 assert_rm(const struct lock_object *lock, int what) 118 { 119 120 rm_assert((const struct rmlock *)lock, what); 121 } 122 123 static void 124 lock_rm(struct lock_object *lock, uintptr_t how) 125 { 126 struct rmlock *rm; 127 struct rm_priotracker *tracker; 128 129 rm = (struct rmlock *)lock; 130 if (how == 0) 131 rm_wlock(rm); 132 else { 133 tracker = (struct rm_priotracker *)how; 134 rm_rlock(rm, tracker); 135 } 136 } 137 138 static uintptr_t 139 unlock_rm(struct lock_object *lock) 140 { 141 struct thread *td; 142 struct pcpu *pc; 143 struct rmlock *rm; 144 struct rm_queue *queue; 145 struct rm_priotracker *tracker; 146 uintptr_t how; 147 148 rm = (struct rmlock *)lock; 149 tracker = NULL; 150 how = 0; 151 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 152 if (rm_wowned(rm)) 153 rm_wunlock(rm); 154 else { 155 /* 156 * Find the right rm_priotracker structure for curthread. 157 * The guarantee about its uniqueness is given by the fact 158 * we already asserted the lock wasn't recursively acquired. 159 */ 160 critical_enter(); 161 td = curthread; 162 pc = get_pcpu(); 163 for (queue = pc->pc_rm_queue.rmq_next; 164 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 165 tracker = (struct rm_priotracker *)queue; 166 if ((tracker->rmp_rmlock == rm) && 167 (tracker->rmp_thread == td)) { 168 how = (uintptr_t)tracker; 169 break; 170 } 171 } 172 KASSERT(tracker != NULL, 173 ("rm_priotracker is non-NULL when lock held in read mode")); 174 critical_exit(); 175 rm_runlock(rm, tracker); 176 } 177 return (how); 178 } 179 180 #ifdef KDTRACE_HOOKS 181 static int 182 owner_rm(const struct lock_object *lock, struct thread **owner) 183 { 184 const struct rmlock *rm; 185 struct lock_class *lc; 186 187 rm = (const struct rmlock *)lock; 188 lc = LOCK_CLASS(&rm->rm_wlock_object); 189 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 190 } 191 #endif 192 193 static struct mtx rm_spinlock; 194 195 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 196 197 /* 198 * Add or remove tracker from per-cpu list. 199 * 200 * The per-cpu list can be traversed at any time in forward direction from an 201 * interrupt on the *local* cpu. 202 */ 203 static void inline 204 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 205 { 206 struct rm_queue *next; 207 208 /* Initialize all tracker pointers */ 209 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 210 next = pc->pc_rm_queue.rmq_next; 211 tracker->rmp_cpuQueue.rmq_next = next; 212 213 /* rmq_prev is not used during froward traversal. */ 214 next->rmq_prev = &tracker->rmp_cpuQueue; 215 216 /* Update pointer to first element. */ 217 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 218 } 219 220 /* 221 * Return a count of the number of trackers the thread 'td' already 222 * has on this CPU for the lock 'rm'. 223 */ 224 static int 225 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 226 const struct thread *td) 227 { 228 struct rm_queue *queue; 229 struct rm_priotracker *tracker; 230 int count; 231 232 count = 0; 233 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 234 queue = queue->rmq_next) { 235 tracker = (struct rm_priotracker *)queue; 236 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 237 count++; 238 } 239 return (count); 240 } 241 242 static void inline 243 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 244 { 245 struct rm_queue *next, *prev; 246 247 next = tracker->rmp_cpuQueue.rmq_next; 248 prev = tracker->rmp_cpuQueue.rmq_prev; 249 250 /* Not used during forward traversal. */ 251 next->rmq_prev = prev; 252 253 /* Remove from list. */ 254 prev->rmq_next = next; 255 } 256 257 static void 258 rm_cleanIPI(void *arg) 259 { 260 struct pcpu *pc; 261 struct rmlock *rm = arg; 262 struct rm_priotracker *tracker; 263 struct rm_queue *queue; 264 pc = get_pcpu(); 265 266 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 267 queue = queue->rmq_next) { 268 tracker = (struct rm_priotracker *)queue; 269 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 270 tracker->rmp_flags = RMPF_ONQUEUE; 271 mtx_lock_spin(&rm_spinlock); 272 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 273 rmp_qentry); 274 mtx_unlock_spin(&rm_spinlock); 275 } 276 } 277 } 278 279 void 280 rm_init_flags(struct rmlock *rm, const char *name, int opts) 281 { 282 struct lock_class *lc; 283 int liflags, xflags; 284 285 liflags = 0; 286 if (!(opts & RM_NOWITNESS)) 287 liflags |= LO_WITNESS; 288 if (opts & RM_RECURSE) 289 liflags |= LO_RECURSABLE; 290 if (opts & RM_NEW) 291 liflags |= LO_NEW; 292 rm->rm_writecpus = all_cpus; 293 LIST_INIT(&rm->rm_activeReaders); 294 if (opts & RM_SLEEPABLE) { 295 liflags |= LO_SLEEPABLE; 296 lc = &lock_class_rm_sleepable; 297 xflags = (opts & RM_NEW ? SX_NEW : 0); 298 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 299 xflags | SX_NOWITNESS); 300 } else { 301 lc = &lock_class_rm; 302 xflags = (opts & RM_NEW ? MTX_NEW : 0); 303 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 304 xflags | MTX_NOWITNESS); 305 } 306 lock_init(&rm->lock_object, lc, name, NULL, liflags); 307 } 308 309 void 310 rm_init(struct rmlock *rm, const char *name) 311 { 312 313 rm_init_flags(rm, name, 0); 314 } 315 316 void 317 rm_destroy(struct rmlock *rm) 318 { 319 320 rm_assert(rm, RA_UNLOCKED); 321 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 322 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 323 sx_destroy(&rm->rm_lock_sx); 324 else 325 mtx_destroy(&rm->rm_lock_mtx); 326 lock_destroy(&rm->lock_object); 327 } 328 329 int 330 rm_wowned(const struct rmlock *rm) 331 { 332 333 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 334 return (sx_xlocked(&rm->rm_lock_sx)); 335 else 336 return (mtx_owned(&rm->rm_lock_mtx)); 337 } 338 339 void 340 rm_sysinit(void *arg) 341 { 342 struct rm_args *args; 343 344 args = arg; 345 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 346 } 347 348 static __noinline int 349 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 350 { 351 struct pcpu *pc; 352 353 critical_enter(); 354 pc = get_pcpu(); 355 356 /* Check if we just need to do a proper critical_exit. */ 357 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 358 critical_exit(); 359 return (1); 360 } 361 362 /* Remove our tracker from the per-cpu list. */ 363 rm_tracker_remove(pc, tracker); 364 365 /* Check to see if the IPI granted us the lock after all. */ 366 if (tracker->rmp_flags) { 367 /* Just add back tracker - we hold the lock. */ 368 rm_tracker_add(pc, tracker); 369 critical_exit(); 370 return (1); 371 } 372 373 /* 374 * We allow readers to acquire a lock even if a writer is blocked if 375 * the lock is recursive and the reader already holds the lock. 376 */ 377 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 378 /* 379 * Just grant the lock if this thread already has a tracker 380 * for this lock on the per-cpu queue. 381 */ 382 if (rm_trackers_present(pc, rm, curthread) != 0) { 383 mtx_lock_spin(&rm_spinlock); 384 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 385 rmp_qentry); 386 tracker->rmp_flags = RMPF_ONQUEUE; 387 mtx_unlock_spin(&rm_spinlock); 388 rm_tracker_add(pc, tracker); 389 critical_exit(); 390 return (1); 391 } 392 } 393 394 sched_unpin(); 395 critical_exit(); 396 397 if (trylock) { 398 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 399 if (!sx_try_xlock(&rm->rm_lock_sx)) 400 return (0); 401 } else { 402 if (!mtx_trylock(&rm->rm_lock_mtx)) 403 return (0); 404 } 405 } else { 406 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 407 THREAD_SLEEPING_OK(); 408 sx_xlock(&rm->rm_lock_sx); 409 THREAD_NO_SLEEPING(); 410 } else 411 mtx_lock(&rm->rm_lock_mtx); 412 } 413 414 critical_enter(); 415 pc = get_pcpu(); 416 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 417 rm_tracker_add(pc, tracker); 418 sched_pin(); 419 critical_exit(); 420 421 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 422 sx_xunlock(&rm->rm_lock_sx); 423 else 424 mtx_unlock(&rm->rm_lock_mtx); 425 426 return (1); 427 } 428 429 int 430 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 431 { 432 struct thread *td = curthread; 433 struct pcpu *pc; 434 435 if (SCHEDULER_STOPPED()) 436 return (1); 437 438 tracker->rmp_flags = 0; 439 tracker->rmp_thread = td; 440 tracker->rmp_rmlock = rm; 441 442 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 443 THREAD_NO_SLEEPING(); 444 445 td->td_critnest++; /* critical_enter(); */ 446 447 __compiler_membar(); 448 449 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 450 451 rm_tracker_add(pc, tracker); 452 453 sched_pin(); 454 455 __compiler_membar(); 456 457 td->td_critnest--; 458 459 /* 460 * Fast path to combine two common conditions into a single 461 * conditional jump. 462 */ 463 if (__predict_true(0 == (td->td_owepreempt | 464 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 465 return (1); 466 467 /* We do not have a read token and need to acquire one. */ 468 return _rm_rlock_hard(rm, tracker, trylock); 469 } 470 471 static __noinline void 472 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 473 { 474 475 if (td->td_owepreempt) { 476 td->td_critnest++; 477 critical_exit(); 478 } 479 480 if (!tracker->rmp_flags) 481 return; 482 483 mtx_lock_spin(&rm_spinlock); 484 LIST_REMOVE(tracker, rmp_qentry); 485 486 if (tracker->rmp_flags & RMPF_SIGNAL) { 487 struct rmlock *rm; 488 struct turnstile *ts; 489 490 rm = tracker->rmp_rmlock; 491 492 turnstile_chain_lock(&rm->lock_object); 493 mtx_unlock_spin(&rm_spinlock); 494 495 ts = turnstile_lookup(&rm->lock_object); 496 497 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 498 turnstile_unpend(ts); 499 turnstile_chain_unlock(&rm->lock_object); 500 } else 501 mtx_unlock_spin(&rm_spinlock); 502 } 503 504 void 505 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 506 { 507 struct pcpu *pc; 508 struct thread *td = tracker->rmp_thread; 509 510 if (SCHEDULER_STOPPED()) 511 return; 512 513 td->td_critnest++; /* critical_enter(); */ 514 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 515 rm_tracker_remove(pc, tracker); 516 td->td_critnest--; 517 sched_unpin(); 518 519 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 520 THREAD_SLEEPING_OK(); 521 522 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 523 return; 524 525 _rm_unlock_hard(td, tracker); 526 } 527 528 void 529 _rm_wlock(struct rmlock *rm) 530 { 531 struct rm_priotracker *prio; 532 struct turnstile *ts; 533 cpuset_t readcpus; 534 535 if (SCHEDULER_STOPPED()) 536 return; 537 538 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 539 sx_xlock(&rm->rm_lock_sx); 540 else 541 mtx_lock(&rm->rm_lock_mtx); 542 543 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 544 /* Get all read tokens back */ 545 readcpus = all_cpus; 546 CPU_ANDNOT(&readcpus, &rm->rm_writecpus); 547 rm->rm_writecpus = all_cpus; 548 549 /* 550 * Assumes rm->rm_writecpus update is visible on other CPUs 551 * before rm_cleanIPI is called. 552 */ 553 #ifdef SMP 554 smp_rendezvous_cpus(readcpus, 555 smp_no_rendezvous_barrier, 556 rm_cleanIPI, 557 smp_no_rendezvous_barrier, 558 rm); 559 560 #else 561 rm_cleanIPI(rm); 562 #endif 563 564 mtx_lock_spin(&rm_spinlock); 565 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 566 ts = turnstile_trywait(&rm->lock_object); 567 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 568 mtx_unlock_spin(&rm_spinlock); 569 turnstile_wait(ts, prio->rmp_thread, 570 TS_EXCLUSIVE_QUEUE); 571 mtx_lock_spin(&rm_spinlock); 572 } 573 mtx_unlock_spin(&rm_spinlock); 574 } 575 } 576 577 void 578 _rm_wunlock(struct rmlock *rm) 579 { 580 581 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 582 sx_xunlock(&rm->rm_lock_sx); 583 else 584 mtx_unlock(&rm->rm_lock_mtx); 585 } 586 587 #if LOCK_DEBUG > 0 588 589 void 590 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 591 { 592 593 if (SCHEDULER_STOPPED()) 594 return; 595 596 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 597 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 598 curthread, rm->lock_object.lo_name, file, line)); 599 KASSERT(!rm_destroyed(rm), 600 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 601 _rm_assert(rm, RA_UNLOCKED, file, line); 602 603 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 604 file, line, NULL); 605 606 _rm_wlock(rm); 607 608 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 609 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 610 TD_LOCKS_INC(curthread); 611 } 612 613 void 614 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 615 { 616 617 if (SCHEDULER_STOPPED()) 618 return; 619 620 KASSERT(!rm_destroyed(rm), 621 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 622 _rm_assert(rm, RA_WLOCKED, file, line); 623 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 624 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 625 _rm_wunlock(rm); 626 TD_LOCKS_DEC(curthread); 627 } 628 629 int 630 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 631 int trylock, const char *file, int line) 632 { 633 634 if (SCHEDULER_STOPPED()) 635 return (1); 636 637 #ifdef INVARIANTS 638 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 639 critical_enter(); 640 KASSERT(rm_trackers_present(get_pcpu(), rm, 641 curthread) == 0, 642 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 643 rm->lock_object.lo_name, file, line)); 644 critical_exit(); 645 } 646 #endif 647 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 648 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 649 curthread, rm->lock_object.lo_name, file, line)); 650 KASSERT(!rm_destroyed(rm), 651 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 652 if (!trylock) { 653 KASSERT(!rm_wowned(rm), 654 ("rm_rlock: wlock already held for %s @ %s:%d", 655 rm->lock_object.lo_name, file, line)); 656 WITNESS_CHECKORDER(&rm->lock_object, 657 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 658 } 659 660 if (_rm_rlock(rm, tracker, trylock)) { 661 if (trylock) 662 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 663 line); 664 else 665 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 666 line); 667 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 668 TD_LOCKS_INC(curthread); 669 return (1); 670 } else if (trylock) 671 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 672 673 return (0); 674 } 675 676 void 677 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 678 const char *file, int line) 679 { 680 681 if (SCHEDULER_STOPPED()) 682 return; 683 684 KASSERT(!rm_destroyed(rm), 685 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 686 _rm_assert(rm, RA_RLOCKED, file, line); 687 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 688 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 689 _rm_runlock(rm, tracker); 690 TD_LOCKS_DEC(curthread); 691 } 692 693 #else 694 695 /* 696 * Just strip out file and line arguments if no lock debugging is enabled in 697 * the kernel - we are called from a kernel module. 698 */ 699 void 700 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 701 { 702 703 _rm_wlock(rm); 704 } 705 706 void 707 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 708 { 709 710 _rm_wunlock(rm); 711 } 712 713 int 714 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 715 int trylock, const char *file, int line) 716 { 717 718 return _rm_rlock(rm, tracker, trylock); 719 } 720 721 void 722 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 723 const char *file, int line) 724 { 725 726 _rm_runlock(rm, tracker); 727 } 728 729 #endif 730 731 #ifdef INVARIANT_SUPPORT 732 #ifndef INVARIANTS 733 #undef _rm_assert 734 #endif 735 736 /* 737 * Note that this does not need to use witness_assert() for read lock 738 * assertions since an exact count of read locks held by this thread 739 * is computable. 740 */ 741 void 742 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 743 { 744 int count; 745 746 if (SCHEDULER_STOPPED()) 747 return; 748 switch (what) { 749 case RA_LOCKED: 750 case RA_LOCKED | RA_RECURSED: 751 case RA_LOCKED | RA_NOTRECURSED: 752 case RA_RLOCKED: 753 case RA_RLOCKED | RA_RECURSED: 754 case RA_RLOCKED | RA_NOTRECURSED: 755 /* 756 * Handle the write-locked case. Unlike other 757 * primitives, writers can never recurse. 758 */ 759 if (rm_wowned(rm)) { 760 if (what & RA_RLOCKED) 761 panic("Lock %s exclusively locked @ %s:%d\n", 762 rm->lock_object.lo_name, file, line); 763 if (what & RA_RECURSED) 764 panic("Lock %s not recursed @ %s:%d\n", 765 rm->lock_object.lo_name, file, line); 766 break; 767 } 768 769 critical_enter(); 770 count = rm_trackers_present(get_pcpu(), rm, curthread); 771 critical_exit(); 772 773 if (count == 0) 774 panic("Lock %s not %slocked @ %s:%d\n", 775 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 776 "read " : "", file, line); 777 if (count > 1) { 778 if (what & RA_NOTRECURSED) 779 panic("Lock %s recursed @ %s:%d\n", 780 rm->lock_object.lo_name, file, line); 781 } else if (what & RA_RECURSED) 782 panic("Lock %s not recursed @ %s:%d\n", 783 rm->lock_object.lo_name, file, line); 784 break; 785 case RA_WLOCKED: 786 if (!rm_wowned(rm)) 787 panic("Lock %s not exclusively locked @ %s:%d\n", 788 rm->lock_object.lo_name, file, line); 789 break; 790 case RA_UNLOCKED: 791 if (rm_wowned(rm)) 792 panic("Lock %s exclusively locked @ %s:%d\n", 793 rm->lock_object.lo_name, file, line); 794 795 critical_enter(); 796 count = rm_trackers_present(get_pcpu(), rm, curthread); 797 critical_exit(); 798 799 if (count != 0) 800 panic("Lock %s read locked @ %s:%d\n", 801 rm->lock_object.lo_name, file, line); 802 break; 803 default: 804 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 805 line); 806 } 807 } 808 #endif /* INVARIANT_SUPPORT */ 809 810 #ifdef DDB 811 static void 812 print_tracker(struct rm_priotracker *tr) 813 { 814 struct thread *td; 815 816 td = tr->rmp_thread; 817 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 818 td->td_proc->p_pid, td->td_name); 819 if (tr->rmp_flags & RMPF_ONQUEUE) { 820 db_printf("ONQUEUE"); 821 if (tr->rmp_flags & RMPF_SIGNAL) 822 db_printf(",SIGNAL"); 823 } else 824 db_printf("0"); 825 db_printf("}\n"); 826 } 827 828 static void 829 db_show_rm(const struct lock_object *lock) 830 { 831 struct rm_priotracker *tr; 832 struct rm_queue *queue; 833 const struct rmlock *rm; 834 struct lock_class *lc; 835 struct pcpu *pc; 836 837 rm = (const struct rmlock *)lock; 838 db_printf(" writecpus: "); 839 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 840 db_printf("\n"); 841 db_printf(" per-CPU readers:\n"); 842 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 843 for (queue = pc->pc_rm_queue.rmq_next; 844 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 845 tr = (struct rm_priotracker *)queue; 846 if (tr->rmp_rmlock == rm) 847 print_tracker(tr); 848 } 849 db_printf(" active readers:\n"); 850 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 851 print_tracker(tr); 852 lc = LOCK_CLASS(&rm->rm_wlock_object); 853 db_printf("Backing write-lock (%s):\n", lc->lc_name); 854 lc->lc_ddb_show(&rm->rm_wlock_object); 855 } 856 #endif 857 858 /* 859 * Read-mostly sleepable locks. 860 * 861 * These primitives allow both readers and writers to sleep. However, neither 862 * readers nor writers are tracked and subsequently there is no priority 863 * propagation. 864 * 865 * They are intended to be only used when write-locking is almost never needed 866 * (e.g., they can guard against unloading a kernel module) while read-locking 867 * happens all the time. 868 * 869 * Concurrent writers take turns taking the lock while going off cpu. If this is 870 * of concern for your usecase, this is not the right primitive. 871 * 872 * Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are 873 * inserted to prevert reordering of generated code. Execution ordering is 874 * provided with the use of an IPI handler. 875 * 876 * No attempt is made to track which CPUs read locked at least once, 877 * consequently write locking sends IPIs to all of them. This will become a 878 * problem at some point. The easiest way to lessen it is to provide a bitmap. 879 */ 880 881 #define rms_int_membar() __compiler_membar() 882 883 #define RMS_NOOWNER ((void *)0x1) 884 #define RMS_TRANSIENT ((void *)0x2) 885 #define RMS_FLAGMASK 0xf 886 887 struct rmslock_pcpu { 888 int influx; 889 int readers; 890 }; 891 892 _Static_assert(sizeof(struct rmslock_pcpu) == 8, "bad size"); 893 894 /* 895 * Internal routines 896 */ 897 static struct rmslock_pcpu * 898 rms_int_pcpu(struct rmslock *rms) 899 { 900 901 CRITICAL_ASSERT(curthread); 902 return (zpcpu_get(rms->pcpu)); 903 } 904 905 static struct rmslock_pcpu * 906 rms_int_remote_pcpu(struct rmslock *rms, int cpu) 907 { 908 909 return (zpcpu_get_cpu(rms->pcpu, cpu)); 910 } 911 912 static void 913 rms_int_influx_enter(struct rmslock *rms, struct rmslock_pcpu *pcpu) 914 { 915 916 CRITICAL_ASSERT(curthread); 917 MPASS(pcpu->influx == 0); 918 pcpu->influx = 1; 919 } 920 921 static void 922 rms_int_influx_exit(struct rmslock *rms, struct rmslock_pcpu *pcpu) 923 { 924 925 CRITICAL_ASSERT(curthread); 926 MPASS(pcpu->influx == 1); 927 pcpu->influx = 0; 928 } 929 930 #ifdef INVARIANTS 931 static void 932 rms_int_debug_readers_inc(struct rmslock *rms) 933 { 934 int old; 935 old = atomic_fetchadd_int(&rms->debug_readers, 1); 936 KASSERT(old >= 0, ("%s: bad readers count %d\n", __func__, old)); 937 } 938 939 static void 940 rms_int_debug_readers_dec(struct rmslock *rms) 941 { 942 int old; 943 944 old = atomic_fetchadd_int(&rms->debug_readers, -1); 945 KASSERT(old > 0, ("%s: bad readers count %d\n", __func__, old)); 946 } 947 #else 948 static void 949 rms_int_debug_readers_inc(struct rmslock *rms) 950 { 951 } 952 953 static void 954 rms_int_debug_readers_dec(struct rmslock *rms) 955 { 956 } 957 #endif 958 959 static void 960 rms_int_readers_inc(struct rmslock *rms, struct rmslock_pcpu *pcpu) 961 { 962 963 CRITICAL_ASSERT(curthread); 964 rms_int_debug_readers_inc(rms); 965 pcpu->readers++; 966 } 967 968 static void 969 rms_int_readers_dec(struct rmslock *rms, struct rmslock_pcpu *pcpu) 970 { 971 972 CRITICAL_ASSERT(curthread); 973 rms_int_debug_readers_dec(rms); 974 pcpu->readers--; 975 } 976 977 /* 978 * Public API 979 */ 980 void 981 rms_init(struct rmslock *rms, const char *name) 982 { 983 984 rms->owner = RMS_NOOWNER; 985 rms->writers = 0; 986 rms->readers = 0; 987 rms->debug_readers = 0; 988 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 989 rms->pcpu = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK | M_ZERO); 990 } 991 992 void 993 rms_destroy(struct rmslock *rms) 994 { 995 996 MPASS(rms->writers == 0); 997 MPASS(rms->readers == 0); 998 mtx_destroy(&rms->mtx); 999 uma_zfree_pcpu(pcpu_zone_8, rms->pcpu); 1000 } 1001 1002 static void __noinline 1003 rms_rlock_fallback(struct rmslock *rms) 1004 { 1005 1006 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1007 critical_exit(); 1008 1009 mtx_lock(&rms->mtx); 1010 while (rms->writers > 0) 1011 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 1012 critical_enter(); 1013 rms_int_readers_inc(rms, rms_int_pcpu(rms)); 1014 mtx_unlock(&rms->mtx); 1015 critical_exit(); 1016 } 1017 1018 void 1019 rms_rlock(struct rmslock *rms) 1020 { 1021 struct rmslock_pcpu *pcpu; 1022 1023 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1024 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1025 1026 critical_enter(); 1027 pcpu = rms_int_pcpu(rms); 1028 rms_int_influx_enter(rms, pcpu); 1029 rms_int_membar(); 1030 if (__predict_false(rms->writers > 0)) { 1031 rms_rlock_fallback(rms); 1032 return; 1033 } 1034 rms_int_membar(); 1035 rms_int_readers_inc(rms, pcpu); 1036 rms_int_membar(); 1037 rms_int_influx_exit(rms, pcpu); 1038 critical_exit(); 1039 } 1040 1041 int 1042 rms_try_rlock(struct rmslock *rms) 1043 { 1044 struct rmslock_pcpu *pcpu; 1045 1046 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1047 1048 critical_enter(); 1049 pcpu = rms_int_pcpu(rms); 1050 rms_int_influx_enter(rms, pcpu); 1051 rms_int_membar(); 1052 if (__predict_false(rms->writers > 0)) { 1053 rms_int_influx_exit(rms, pcpu); 1054 critical_exit(); 1055 return (0); 1056 } 1057 rms_int_membar(); 1058 rms_int_readers_inc(rms, pcpu); 1059 rms_int_membar(); 1060 rms_int_influx_exit(rms, pcpu); 1061 critical_exit(); 1062 return (1); 1063 } 1064 1065 static void __noinline 1066 rms_runlock_fallback(struct rmslock *rms) 1067 { 1068 1069 rms_int_influx_exit(rms, rms_int_pcpu(rms)); 1070 critical_exit(); 1071 1072 mtx_lock(&rms->mtx); 1073 MPASS(rms->writers > 0); 1074 MPASS(rms->readers > 0); 1075 MPASS(rms->debug_readers == rms->readers); 1076 rms_int_debug_readers_dec(rms); 1077 rms->readers--; 1078 if (rms->readers == 0) 1079 wakeup_one(&rms->writers); 1080 mtx_unlock(&rms->mtx); 1081 } 1082 1083 void 1084 rms_runlock(struct rmslock *rms) 1085 { 1086 struct rmslock_pcpu *pcpu; 1087 1088 critical_enter(); 1089 pcpu = rms_int_pcpu(rms); 1090 rms_int_influx_enter(rms, pcpu); 1091 rms_int_membar(); 1092 if (__predict_false(rms->writers > 0)) { 1093 rms_runlock_fallback(rms); 1094 return; 1095 } 1096 rms_int_membar(); 1097 rms_int_readers_dec(rms, pcpu); 1098 rms_int_membar(); 1099 rms_int_influx_exit(rms, pcpu); 1100 critical_exit(); 1101 } 1102 1103 struct rmslock_ipi { 1104 struct rmslock *rms; 1105 struct smp_rendezvous_cpus_retry_arg srcra; 1106 }; 1107 1108 static void 1109 rms_action_func(void *arg) 1110 { 1111 struct rmslock_ipi *rmsipi; 1112 struct rmslock_pcpu *pcpu; 1113 struct rmslock *rms; 1114 1115 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1116 rms = rmsipi->rms; 1117 pcpu = rms_int_pcpu(rms); 1118 1119 if (pcpu->influx) 1120 return; 1121 if (pcpu->readers != 0) { 1122 atomic_add_int(&rms->readers, pcpu->readers); 1123 pcpu->readers = 0; 1124 } 1125 smp_rendezvous_cpus_done(arg); 1126 } 1127 1128 static void 1129 rms_wait_func(void *arg, int cpu) 1130 { 1131 struct rmslock_ipi *rmsipi; 1132 struct rmslock_pcpu *pcpu; 1133 struct rmslock *rms; 1134 1135 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1136 rms = rmsipi->rms; 1137 pcpu = rms_int_remote_pcpu(rms, cpu); 1138 1139 while (atomic_load_int(&pcpu->influx)) 1140 cpu_spinwait(); 1141 } 1142 1143 #ifdef INVARIANTS 1144 static void 1145 rms_assert_no_pcpu_readers(struct rmslock *rms) 1146 { 1147 struct rmslock_pcpu *pcpu; 1148 int cpu; 1149 1150 CPU_FOREACH(cpu) { 1151 pcpu = rms_int_remote_pcpu(rms, cpu); 1152 if (pcpu->readers != 0) { 1153 panic("%s: got %d readers on cpu %d\n", __func__, 1154 pcpu->readers, cpu); 1155 } 1156 } 1157 } 1158 #else 1159 static void 1160 rms_assert_no_pcpu_readers(struct rmslock *rms) 1161 { 1162 } 1163 #endif 1164 1165 static void 1166 rms_wlock_switch(struct rmslock *rms) 1167 { 1168 struct rmslock_ipi rmsipi; 1169 1170 MPASS(rms->readers == 0); 1171 MPASS(rms->writers == 1); 1172 1173 rmsipi.rms = rms; 1174 1175 smp_rendezvous_cpus_retry(all_cpus, 1176 smp_no_rendezvous_barrier, 1177 rms_action_func, 1178 smp_no_rendezvous_barrier, 1179 rms_wait_func, 1180 &rmsipi.srcra); 1181 } 1182 1183 void 1184 rms_wlock(struct rmslock *rms) 1185 { 1186 1187 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1188 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1189 1190 mtx_lock(&rms->mtx); 1191 rms->writers++; 1192 if (rms->writers > 1) { 1193 msleep(&rms->owner, &rms->mtx, (PUSER - 1), 1194 mtx_name(&rms->mtx), 0); 1195 MPASS(rms->readers == 0); 1196 KASSERT(rms->owner == RMS_TRANSIENT, 1197 ("%s: unexpected owner value %p\n", __func__, 1198 rms->owner)); 1199 goto out_grab; 1200 } 1201 1202 KASSERT(rms->owner == RMS_NOOWNER, 1203 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1204 1205 rms_wlock_switch(rms); 1206 rms_assert_no_pcpu_readers(rms); 1207 1208 if (rms->readers > 0) { 1209 msleep(&rms->writers, &rms->mtx, (PUSER - 1), 1210 mtx_name(&rms->mtx), 0); 1211 } 1212 1213 out_grab: 1214 rms->owner = curthread; 1215 rms_assert_no_pcpu_readers(rms); 1216 mtx_unlock(&rms->mtx); 1217 MPASS(rms->readers == 0); 1218 } 1219 1220 void 1221 rms_wunlock(struct rmslock *rms) 1222 { 1223 1224 mtx_lock(&rms->mtx); 1225 KASSERT(rms->owner == curthread, 1226 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1227 MPASS(rms->writers >= 1); 1228 MPASS(rms->readers == 0); 1229 rms->writers--; 1230 if (rms->writers > 0) { 1231 wakeup_one(&rms->owner); 1232 rms->owner = RMS_TRANSIENT; 1233 } else { 1234 wakeup(&rms->readers); 1235 rms->owner = RMS_NOOWNER; 1236 } 1237 mtx_unlock(&rms->mtx); 1238 } 1239 1240 void 1241 rms_unlock(struct rmslock *rms) 1242 { 1243 1244 if (rms_wowned(rms)) 1245 rms_wunlock(rms); 1246 else 1247 rms_runlock(rms); 1248 } 1249