1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 44 #include <sys/kernel.h> 45 #include <sys/kdb.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/proc.h> 50 #include <sys/rmlock.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/turnstile.h> 54 #include <sys/lock_profile.h> 55 #include <machine/cpu.h> 56 #include <vm/uma.h> 57 58 #ifdef DDB 59 #include <ddb/ddb.h> 60 #endif 61 62 /* 63 * A cookie to mark destroyed rmlocks. This is stored in the head of 64 * rm_activeReaders. 65 */ 66 #define RM_DESTROYED ((void *)0xdead) 67 68 #define rm_destroyed(rm) \ 69 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 70 71 #define RMPF_ONQUEUE 1 72 #define RMPF_SIGNAL 2 73 74 #ifndef INVARIANTS 75 #define _rm_assert(c, what, file, line) 76 #endif 77 78 static void assert_rm(const struct lock_object *lock, int what); 79 #ifdef DDB 80 static void db_show_rm(const struct lock_object *lock); 81 #endif 82 static void lock_rm(struct lock_object *lock, uintptr_t how); 83 #ifdef KDTRACE_HOOKS 84 static int owner_rm(const struct lock_object *lock, struct thread **owner); 85 #endif 86 static uintptr_t unlock_rm(struct lock_object *lock); 87 88 struct lock_class lock_class_rm = { 89 .lc_name = "rm", 90 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 91 .lc_assert = assert_rm, 92 #ifdef DDB 93 .lc_ddb_show = db_show_rm, 94 #endif 95 .lc_lock = lock_rm, 96 .lc_unlock = unlock_rm, 97 #ifdef KDTRACE_HOOKS 98 .lc_owner = owner_rm, 99 #endif 100 }; 101 102 struct lock_class lock_class_rm_sleepable = { 103 .lc_name = "sleepable rm", 104 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 105 .lc_assert = assert_rm, 106 #ifdef DDB 107 .lc_ddb_show = db_show_rm, 108 #endif 109 .lc_lock = lock_rm, 110 .lc_unlock = unlock_rm, 111 #ifdef KDTRACE_HOOKS 112 .lc_owner = owner_rm, 113 #endif 114 }; 115 116 static void 117 assert_rm(const struct lock_object *lock, int what) 118 { 119 120 rm_assert((const struct rmlock *)lock, what); 121 } 122 123 static void 124 lock_rm(struct lock_object *lock, uintptr_t how) 125 { 126 struct rmlock *rm; 127 struct rm_priotracker *tracker; 128 129 rm = (struct rmlock *)lock; 130 if (how == 0) 131 rm_wlock(rm); 132 else { 133 tracker = (struct rm_priotracker *)how; 134 rm_rlock(rm, tracker); 135 } 136 } 137 138 static uintptr_t 139 unlock_rm(struct lock_object *lock) 140 { 141 struct thread *td; 142 struct pcpu *pc; 143 struct rmlock *rm; 144 struct rm_queue *queue; 145 struct rm_priotracker *tracker; 146 uintptr_t how; 147 148 rm = (struct rmlock *)lock; 149 tracker = NULL; 150 how = 0; 151 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 152 if (rm_wowned(rm)) 153 rm_wunlock(rm); 154 else { 155 /* 156 * Find the right rm_priotracker structure for curthread. 157 * The guarantee about its uniqueness is given by the fact 158 * we already asserted the lock wasn't recursively acquired. 159 */ 160 critical_enter(); 161 td = curthread; 162 pc = get_pcpu(); 163 for (queue = pc->pc_rm_queue.rmq_next; 164 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 165 tracker = (struct rm_priotracker *)queue; 166 if ((tracker->rmp_rmlock == rm) && 167 (tracker->rmp_thread == td)) { 168 how = (uintptr_t)tracker; 169 break; 170 } 171 } 172 KASSERT(tracker != NULL, 173 ("rm_priotracker is non-NULL when lock held in read mode")); 174 critical_exit(); 175 rm_runlock(rm, tracker); 176 } 177 return (how); 178 } 179 180 #ifdef KDTRACE_HOOKS 181 static int 182 owner_rm(const struct lock_object *lock, struct thread **owner) 183 { 184 const struct rmlock *rm; 185 struct lock_class *lc; 186 187 rm = (const struct rmlock *)lock; 188 lc = LOCK_CLASS(&rm->rm_wlock_object); 189 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 190 } 191 #endif 192 193 static struct mtx rm_spinlock; 194 195 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 196 197 /* 198 * Add or remove tracker from per-cpu list. 199 * 200 * The per-cpu list can be traversed at any time in forward direction from an 201 * interrupt on the *local* cpu. 202 */ 203 static void inline 204 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 205 { 206 struct rm_queue *next; 207 208 /* Initialize all tracker pointers */ 209 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 210 next = pc->pc_rm_queue.rmq_next; 211 tracker->rmp_cpuQueue.rmq_next = next; 212 213 /* rmq_prev is not used during froward traversal. */ 214 next->rmq_prev = &tracker->rmp_cpuQueue; 215 216 /* Update pointer to first element. */ 217 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 218 } 219 220 /* 221 * Return a count of the number of trackers the thread 'td' already 222 * has on this CPU for the lock 'rm'. 223 */ 224 static int 225 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 226 const struct thread *td) 227 { 228 struct rm_queue *queue; 229 struct rm_priotracker *tracker; 230 int count; 231 232 count = 0; 233 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 234 queue = queue->rmq_next) { 235 tracker = (struct rm_priotracker *)queue; 236 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 237 count++; 238 } 239 return (count); 240 } 241 242 static void inline 243 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 244 { 245 struct rm_queue *next, *prev; 246 247 next = tracker->rmp_cpuQueue.rmq_next; 248 prev = tracker->rmp_cpuQueue.rmq_prev; 249 250 /* Not used during forward traversal. */ 251 next->rmq_prev = prev; 252 253 /* Remove from list. */ 254 prev->rmq_next = next; 255 } 256 257 static void 258 rm_cleanIPI(void *arg) 259 { 260 struct pcpu *pc; 261 struct rmlock *rm = arg; 262 struct rm_priotracker *tracker; 263 struct rm_queue *queue; 264 pc = get_pcpu(); 265 266 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 267 queue = queue->rmq_next) { 268 tracker = (struct rm_priotracker *)queue; 269 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 270 tracker->rmp_flags = RMPF_ONQUEUE; 271 mtx_lock_spin(&rm_spinlock); 272 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 273 rmp_qentry); 274 mtx_unlock_spin(&rm_spinlock); 275 } 276 } 277 } 278 279 void 280 rm_init_flags(struct rmlock *rm, const char *name, int opts) 281 { 282 struct lock_class *lc; 283 int liflags, xflags; 284 285 liflags = 0; 286 if (!(opts & RM_NOWITNESS)) 287 liflags |= LO_WITNESS; 288 if (opts & RM_RECURSE) 289 liflags |= LO_RECURSABLE; 290 if (opts & RM_NEW) 291 liflags |= LO_NEW; 292 rm->rm_writecpus = all_cpus; 293 LIST_INIT(&rm->rm_activeReaders); 294 if (opts & RM_SLEEPABLE) { 295 liflags |= LO_SLEEPABLE; 296 lc = &lock_class_rm_sleepable; 297 xflags = (opts & RM_NEW ? SX_NEW : 0); 298 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 299 xflags | SX_NOWITNESS); 300 } else { 301 lc = &lock_class_rm; 302 xflags = (opts & RM_NEW ? MTX_NEW : 0); 303 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 304 xflags | MTX_NOWITNESS); 305 } 306 lock_init(&rm->lock_object, lc, name, NULL, liflags); 307 } 308 309 void 310 rm_init(struct rmlock *rm, const char *name) 311 { 312 313 rm_init_flags(rm, name, 0); 314 } 315 316 void 317 rm_destroy(struct rmlock *rm) 318 { 319 320 rm_assert(rm, RA_UNLOCKED); 321 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 322 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 323 sx_destroy(&rm->rm_lock_sx); 324 else 325 mtx_destroy(&rm->rm_lock_mtx); 326 lock_destroy(&rm->lock_object); 327 } 328 329 int 330 rm_wowned(const struct rmlock *rm) 331 { 332 333 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 334 return (sx_xlocked(&rm->rm_lock_sx)); 335 else 336 return (mtx_owned(&rm->rm_lock_mtx)); 337 } 338 339 void 340 rm_sysinit(void *arg) 341 { 342 struct rm_args *args; 343 344 args = arg; 345 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 346 } 347 348 static __noinline int 349 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 350 { 351 struct pcpu *pc; 352 353 critical_enter(); 354 pc = get_pcpu(); 355 356 /* Check if we just need to do a proper critical_exit. */ 357 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 358 critical_exit(); 359 return (1); 360 } 361 362 /* Remove our tracker from the per-cpu list. */ 363 rm_tracker_remove(pc, tracker); 364 365 /* Check to see if the IPI granted us the lock after all. */ 366 if (tracker->rmp_flags) { 367 /* Just add back tracker - we hold the lock. */ 368 rm_tracker_add(pc, tracker); 369 critical_exit(); 370 return (1); 371 } 372 373 /* 374 * We allow readers to acquire a lock even if a writer is blocked if 375 * the lock is recursive and the reader already holds the lock. 376 */ 377 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 378 /* 379 * Just grant the lock if this thread already has a tracker 380 * for this lock on the per-cpu queue. 381 */ 382 if (rm_trackers_present(pc, rm, curthread) != 0) { 383 mtx_lock_spin(&rm_spinlock); 384 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 385 rmp_qentry); 386 tracker->rmp_flags = RMPF_ONQUEUE; 387 mtx_unlock_spin(&rm_spinlock); 388 rm_tracker_add(pc, tracker); 389 critical_exit(); 390 return (1); 391 } 392 } 393 394 sched_unpin(); 395 critical_exit(); 396 397 if (trylock) { 398 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 399 if (!sx_try_xlock(&rm->rm_lock_sx)) 400 return (0); 401 } else { 402 if (!mtx_trylock(&rm->rm_lock_mtx)) 403 return (0); 404 } 405 } else { 406 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 407 THREAD_SLEEPING_OK(); 408 sx_xlock(&rm->rm_lock_sx); 409 THREAD_NO_SLEEPING(); 410 } else 411 mtx_lock(&rm->rm_lock_mtx); 412 } 413 414 critical_enter(); 415 pc = get_pcpu(); 416 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 417 rm_tracker_add(pc, tracker); 418 sched_pin(); 419 critical_exit(); 420 421 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 422 sx_xunlock(&rm->rm_lock_sx); 423 else 424 mtx_unlock(&rm->rm_lock_mtx); 425 426 return (1); 427 } 428 429 int 430 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 431 { 432 struct thread *td = curthread; 433 struct pcpu *pc; 434 435 if (SCHEDULER_STOPPED()) 436 return (1); 437 438 tracker->rmp_flags = 0; 439 tracker->rmp_thread = td; 440 tracker->rmp_rmlock = rm; 441 442 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 443 THREAD_NO_SLEEPING(); 444 445 td->td_critnest++; /* critical_enter(); */ 446 447 __compiler_membar(); 448 449 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 450 451 rm_tracker_add(pc, tracker); 452 453 sched_pin(); 454 455 __compiler_membar(); 456 457 td->td_critnest--; 458 459 /* 460 * Fast path to combine two common conditions into a single 461 * conditional jump. 462 */ 463 if (__predict_true(0 == (td->td_owepreempt | 464 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 465 return (1); 466 467 /* We do not have a read token and need to acquire one. */ 468 return _rm_rlock_hard(rm, tracker, trylock); 469 } 470 471 static __noinline void 472 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 473 { 474 475 if (td->td_owepreempt) { 476 td->td_critnest++; 477 critical_exit(); 478 } 479 480 if (!tracker->rmp_flags) 481 return; 482 483 mtx_lock_spin(&rm_spinlock); 484 LIST_REMOVE(tracker, rmp_qentry); 485 486 if (tracker->rmp_flags & RMPF_SIGNAL) { 487 struct rmlock *rm; 488 struct turnstile *ts; 489 490 rm = tracker->rmp_rmlock; 491 492 turnstile_chain_lock(&rm->lock_object); 493 mtx_unlock_spin(&rm_spinlock); 494 495 ts = turnstile_lookup(&rm->lock_object); 496 497 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 498 turnstile_unpend(ts); 499 turnstile_chain_unlock(&rm->lock_object); 500 } else 501 mtx_unlock_spin(&rm_spinlock); 502 } 503 504 void 505 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 506 { 507 struct pcpu *pc; 508 struct thread *td = tracker->rmp_thread; 509 510 if (SCHEDULER_STOPPED()) 511 return; 512 513 td->td_critnest++; /* critical_enter(); */ 514 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 515 rm_tracker_remove(pc, tracker); 516 td->td_critnest--; 517 sched_unpin(); 518 519 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 520 THREAD_SLEEPING_OK(); 521 522 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 523 return; 524 525 _rm_unlock_hard(td, tracker); 526 } 527 528 void 529 _rm_wlock(struct rmlock *rm) 530 { 531 struct rm_priotracker *prio; 532 struct turnstile *ts; 533 cpuset_t readcpus; 534 535 if (SCHEDULER_STOPPED()) 536 return; 537 538 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 539 sx_xlock(&rm->rm_lock_sx); 540 else 541 mtx_lock(&rm->rm_lock_mtx); 542 543 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 544 /* Get all read tokens back */ 545 readcpus = all_cpus; 546 CPU_ANDNOT(&readcpus, &rm->rm_writecpus); 547 rm->rm_writecpus = all_cpus; 548 549 /* 550 * Assumes rm->rm_writecpus update is visible on other CPUs 551 * before rm_cleanIPI is called. 552 */ 553 #ifdef SMP 554 smp_rendezvous_cpus(readcpus, 555 smp_no_rendezvous_barrier, 556 rm_cleanIPI, 557 smp_no_rendezvous_barrier, 558 rm); 559 560 #else 561 rm_cleanIPI(rm); 562 #endif 563 564 mtx_lock_spin(&rm_spinlock); 565 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 566 ts = turnstile_trywait(&rm->lock_object); 567 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 568 mtx_unlock_spin(&rm_spinlock); 569 turnstile_wait(ts, prio->rmp_thread, 570 TS_EXCLUSIVE_QUEUE); 571 mtx_lock_spin(&rm_spinlock); 572 } 573 mtx_unlock_spin(&rm_spinlock); 574 } 575 } 576 577 void 578 _rm_wunlock(struct rmlock *rm) 579 { 580 581 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 582 sx_xunlock(&rm->rm_lock_sx); 583 else 584 mtx_unlock(&rm->rm_lock_mtx); 585 } 586 587 #if LOCK_DEBUG > 0 588 589 void 590 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 591 { 592 593 if (SCHEDULER_STOPPED()) 594 return; 595 596 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 597 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 598 curthread, rm->lock_object.lo_name, file, line)); 599 KASSERT(!rm_destroyed(rm), 600 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 601 _rm_assert(rm, RA_UNLOCKED, file, line); 602 603 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 604 file, line, NULL); 605 606 _rm_wlock(rm); 607 608 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 609 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 610 TD_LOCKS_INC(curthread); 611 } 612 613 void 614 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 615 { 616 617 if (SCHEDULER_STOPPED()) 618 return; 619 620 KASSERT(!rm_destroyed(rm), 621 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 622 _rm_assert(rm, RA_WLOCKED, file, line); 623 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 624 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 625 _rm_wunlock(rm); 626 TD_LOCKS_DEC(curthread); 627 } 628 629 int 630 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 631 int trylock, const char *file, int line) 632 { 633 634 if (SCHEDULER_STOPPED()) 635 return (1); 636 637 #ifdef INVARIANTS 638 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 639 critical_enter(); 640 KASSERT(rm_trackers_present(get_pcpu(), rm, 641 curthread) == 0, 642 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 643 rm->lock_object.lo_name, file, line)); 644 critical_exit(); 645 } 646 #endif 647 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 648 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 649 curthread, rm->lock_object.lo_name, file, line)); 650 KASSERT(!rm_destroyed(rm), 651 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 652 if (!trylock) { 653 KASSERT(!rm_wowned(rm), 654 ("rm_rlock: wlock already held for %s @ %s:%d", 655 rm->lock_object.lo_name, file, line)); 656 WITNESS_CHECKORDER(&rm->lock_object, 657 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 658 } 659 660 if (_rm_rlock(rm, tracker, trylock)) { 661 if (trylock) 662 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 663 line); 664 else 665 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 666 line); 667 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 668 TD_LOCKS_INC(curthread); 669 return (1); 670 } else if (trylock) 671 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 672 673 return (0); 674 } 675 676 void 677 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 678 const char *file, int line) 679 { 680 681 if (SCHEDULER_STOPPED()) 682 return; 683 684 KASSERT(!rm_destroyed(rm), 685 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 686 _rm_assert(rm, RA_RLOCKED, file, line); 687 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 688 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 689 _rm_runlock(rm, tracker); 690 TD_LOCKS_DEC(curthread); 691 } 692 693 #else 694 695 /* 696 * Just strip out file and line arguments if no lock debugging is enabled in 697 * the kernel - we are called from a kernel module. 698 */ 699 void 700 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 701 { 702 703 _rm_wlock(rm); 704 } 705 706 void 707 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 708 { 709 710 _rm_wunlock(rm); 711 } 712 713 int 714 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 715 int trylock, const char *file, int line) 716 { 717 718 return _rm_rlock(rm, tracker, trylock); 719 } 720 721 void 722 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 723 const char *file, int line) 724 { 725 726 _rm_runlock(rm, tracker); 727 } 728 729 #endif 730 731 #ifdef INVARIANT_SUPPORT 732 #ifndef INVARIANTS 733 #undef _rm_assert 734 #endif 735 736 /* 737 * Note that this does not need to use witness_assert() for read lock 738 * assertions since an exact count of read locks held by this thread 739 * is computable. 740 */ 741 void 742 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 743 { 744 int count; 745 746 if (SCHEDULER_STOPPED()) 747 return; 748 switch (what) { 749 case RA_LOCKED: 750 case RA_LOCKED | RA_RECURSED: 751 case RA_LOCKED | RA_NOTRECURSED: 752 case RA_RLOCKED: 753 case RA_RLOCKED | RA_RECURSED: 754 case RA_RLOCKED | RA_NOTRECURSED: 755 /* 756 * Handle the write-locked case. Unlike other 757 * primitives, writers can never recurse. 758 */ 759 if (rm_wowned(rm)) { 760 if (what & RA_RLOCKED) 761 panic("Lock %s exclusively locked @ %s:%d\n", 762 rm->lock_object.lo_name, file, line); 763 if (what & RA_RECURSED) 764 panic("Lock %s not recursed @ %s:%d\n", 765 rm->lock_object.lo_name, file, line); 766 break; 767 } 768 769 critical_enter(); 770 count = rm_trackers_present(get_pcpu(), rm, curthread); 771 critical_exit(); 772 773 if (count == 0) 774 panic("Lock %s not %slocked @ %s:%d\n", 775 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 776 "read " : "", file, line); 777 if (count > 1) { 778 if (what & RA_NOTRECURSED) 779 panic("Lock %s recursed @ %s:%d\n", 780 rm->lock_object.lo_name, file, line); 781 } else if (what & RA_RECURSED) 782 panic("Lock %s not recursed @ %s:%d\n", 783 rm->lock_object.lo_name, file, line); 784 break; 785 case RA_WLOCKED: 786 if (!rm_wowned(rm)) 787 panic("Lock %s not exclusively locked @ %s:%d\n", 788 rm->lock_object.lo_name, file, line); 789 break; 790 case RA_UNLOCKED: 791 if (rm_wowned(rm)) 792 panic("Lock %s exclusively locked @ %s:%d\n", 793 rm->lock_object.lo_name, file, line); 794 795 critical_enter(); 796 count = rm_trackers_present(get_pcpu(), rm, curthread); 797 critical_exit(); 798 799 if (count != 0) 800 panic("Lock %s read locked @ %s:%d\n", 801 rm->lock_object.lo_name, file, line); 802 break; 803 default: 804 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 805 line); 806 } 807 } 808 #endif /* INVARIANT_SUPPORT */ 809 810 #ifdef DDB 811 static void 812 print_tracker(struct rm_priotracker *tr) 813 { 814 struct thread *td; 815 816 td = tr->rmp_thread; 817 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 818 td->td_proc->p_pid, td->td_name); 819 if (tr->rmp_flags & RMPF_ONQUEUE) { 820 db_printf("ONQUEUE"); 821 if (tr->rmp_flags & RMPF_SIGNAL) 822 db_printf(",SIGNAL"); 823 } else 824 db_printf("0"); 825 db_printf("}\n"); 826 } 827 828 static void 829 db_show_rm(const struct lock_object *lock) 830 { 831 struct rm_priotracker *tr; 832 struct rm_queue *queue; 833 const struct rmlock *rm; 834 struct lock_class *lc; 835 struct pcpu *pc; 836 837 rm = (const struct rmlock *)lock; 838 db_printf(" writecpus: "); 839 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 840 db_printf("\n"); 841 db_printf(" per-CPU readers:\n"); 842 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 843 for (queue = pc->pc_rm_queue.rmq_next; 844 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 845 tr = (struct rm_priotracker *)queue; 846 if (tr->rmp_rmlock == rm) 847 print_tracker(tr); 848 } 849 db_printf(" active readers:\n"); 850 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 851 print_tracker(tr); 852 lc = LOCK_CLASS(&rm->rm_wlock_object); 853 db_printf("Backing write-lock (%s):\n", lc->lc_name); 854 lc->lc_ddb_show(&rm->rm_wlock_object); 855 } 856 #endif 857 858 /* 859 * Read-mostly sleepable locks. 860 * 861 * These primitives allow both readers and writers to sleep. However, neither 862 * readers nor writers are tracked and subsequently there is no priority 863 * propagation. 864 * 865 * They are intended to be only used when write-locking is almost never needed 866 * (e.g., they can guard against unloading a kernel module) while read-locking 867 * happens all the time. 868 * 869 * Concurrent writers take turns taking the lock while going off cpu. If this is 870 * of concern for your usecase, this is not the right primitive. 871 * 872 * Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are 873 * inserted to prevert reordering of generated code. Execution ordering is 874 * provided with the use of an IPI handler. 875 * 876 * No attempt is made to track which CPUs read locked at least once, 877 * consequently write locking sends IPIs to all of them. This will become a 878 * problem at some point. The easiest way to lessen it is to provide a bitmap. 879 */ 880 881 #define RMS_NOOWNER ((void *)0x1) 882 #define RMS_TRANSIENT ((void *)0x2) 883 #define RMS_FLAGMASK 0xf 884 885 void 886 rms_init(struct rmslock *rms, const char *name) 887 { 888 889 rms->owner = RMS_NOOWNER; 890 rms->writers = 0; 891 rms->readers = 0; 892 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 893 rms->readers_pcpu = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO); 894 rms->readers_influx = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO); 895 } 896 897 void 898 rms_destroy(struct rmslock *rms) 899 { 900 901 MPASS(rms->writers == 0); 902 MPASS(rms->readers == 0); 903 mtx_destroy(&rms->mtx); 904 uma_zfree_pcpu(pcpu_zone_int, rms->readers_pcpu); 905 uma_zfree_pcpu(pcpu_zone_int, rms->readers_influx); 906 } 907 908 static void __noinline 909 rms_rlock_fallback(struct rmslock *rms) 910 { 911 912 zpcpu_set_protected(rms->readers_influx, 0); 913 critical_exit(); 914 915 mtx_lock(&rms->mtx); 916 MPASS(*zpcpu_get(rms->readers_pcpu) == 0); 917 while (rms->writers > 0) 918 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 919 critical_enter(); 920 zpcpu_add_protected(rms->readers_pcpu, 1); 921 mtx_unlock(&rms->mtx); 922 critical_exit(); 923 } 924 925 void 926 rms_rlock(struct rmslock *rms) 927 { 928 929 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 930 MPASS(atomic_load_ptr(&rms->owner) != curthread); 931 932 critical_enter(); 933 zpcpu_set_protected(rms->readers_influx, 1); 934 __compiler_membar(); 935 if (__predict_false(rms->writers > 0)) { 936 rms_rlock_fallback(rms); 937 return; 938 } 939 __compiler_membar(); 940 zpcpu_add_protected(rms->readers_pcpu, 1); 941 __compiler_membar(); 942 zpcpu_set_protected(rms->readers_influx, 0); 943 critical_exit(); 944 } 945 946 int 947 rms_try_rlock(struct rmslock *rms) 948 { 949 950 MPASS(atomic_load_ptr(&rms->owner) != curthread); 951 952 critical_enter(); 953 zpcpu_set_protected(rms->readers_influx, 1); 954 __compiler_membar(); 955 if (__predict_false(rms->writers > 0)) { 956 __compiler_membar(); 957 zpcpu_set_protected(rms->readers_influx, 0); 958 critical_exit(); 959 return (0); 960 } 961 __compiler_membar(); 962 zpcpu_add_protected(rms->readers_pcpu, 1); 963 __compiler_membar(); 964 zpcpu_set_protected(rms->readers_influx, 0); 965 critical_exit(); 966 return (1); 967 } 968 969 static void __noinline 970 rms_runlock_fallback(struct rmslock *rms) 971 { 972 973 zpcpu_set_protected(rms->readers_influx, 0); 974 critical_exit(); 975 976 mtx_lock(&rms->mtx); 977 MPASS(*zpcpu_get(rms->readers_pcpu) == 0); 978 MPASS(rms->writers > 0); 979 MPASS(rms->readers > 0); 980 rms->readers--; 981 if (rms->readers == 0) 982 wakeup_one(&rms->writers); 983 mtx_unlock(&rms->mtx); 984 } 985 986 void 987 rms_runlock(struct rmslock *rms) 988 { 989 990 critical_enter(); 991 zpcpu_set_protected(rms->readers_influx, 1); 992 __compiler_membar(); 993 if (__predict_false(rms->writers > 0)) { 994 rms_runlock_fallback(rms); 995 return; 996 } 997 __compiler_membar(); 998 zpcpu_sub_protected(rms->readers_pcpu, 1); 999 __compiler_membar(); 1000 zpcpu_set_protected(rms->readers_influx, 0); 1001 critical_exit(); 1002 } 1003 1004 struct rmslock_ipi { 1005 struct rmslock *rms; 1006 struct smp_rendezvous_cpus_retry_arg srcra; 1007 }; 1008 1009 static void 1010 rms_action_func(void *arg) 1011 { 1012 struct rmslock_ipi *rmsipi; 1013 struct rmslock *rms; 1014 int readers; 1015 1016 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1017 rms = rmsipi->rms; 1018 1019 if (*zpcpu_get(rms->readers_influx)) 1020 return; 1021 readers = zpcpu_replace(rms->readers_pcpu, 0); 1022 if (readers != 0) 1023 atomic_add_int(&rms->readers, readers); 1024 smp_rendezvous_cpus_done(arg); 1025 } 1026 1027 static void 1028 rms_wait_func(void *arg, int cpu) 1029 { 1030 struct rmslock_ipi *rmsipi; 1031 struct rmslock *rms; 1032 int *in_op; 1033 1034 rmsipi = __containerof(arg, struct rmslock_ipi, srcra); 1035 rms = rmsipi->rms; 1036 1037 in_op = zpcpu_get_cpu(rms->readers_influx, cpu); 1038 while (atomic_load_int(in_op)) 1039 cpu_spinwait(); 1040 } 1041 1042 static void 1043 rms_wlock_switch(struct rmslock *rms) 1044 { 1045 struct rmslock_ipi rmsipi; 1046 1047 MPASS(rms->readers == 0); 1048 MPASS(rms->writers == 1); 1049 1050 rmsipi.rms = rms; 1051 1052 smp_rendezvous_cpus_retry(all_cpus, 1053 smp_no_rendezvous_barrier, 1054 rms_action_func, 1055 smp_no_rendezvous_barrier, 1056 rms_wait_func, 1057 &rmsipi.srcra); 1058 } 1059 1060 void 1061 rms_wlock(struct rmslock *rms) 1062 { 1063 1064 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1065 MPASS(atomic_load_ptr(&rms->owner) != curthread); 1066 1067 mtx_lock(&rms->mtx); 1068 rms->writers++; 1069 if (rms->writers > 1) { 1070 msleep(&rms->owner, &rms->mtx, (PUSER - 1), 1071 mtx_name(&rms->mtx), 0); 1072 MPASS(rms->readers == 0); 1073 KASSERT(rms->owner == RMS_TRANSIENT, 1074 ("%s: unexpected owner value %p\n", __func__, 1075 rms->owner)); 1076 goto out_grab; 1077 } 1078 1079 KASSERT(rms->owner == RMS_NOOWNER, 1080 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1081 1082 rms_wlock_switch(rms); 1083 1084 if (rms->readers > 0) { 1085 msleep(&rms->writers, &rms->mtx, (PUSER - 1), 1086 mtx_name(&rms->mtx), 0); 1087 } 1088 1089 out_grab: 1090 rms->owner = curthread; 1091 mtx_unlock(&rms->mtx); 1092 MPASS(rms->readers == 0); 1093 } 1094 1095 void 1096 rms_wunlock(struct rmslock *rms) 1097 { 1098 1099 mtx_lock(&rms->mtx); 1100 KASSERT(rms->owner == curthread, 1101 ("%s: unexpected owner value %p\n", __func__, rms->owner)); 1102 MPASS(rms->writers >= 1); 1103 MPASS(rms->readers == 0); 1104 rms->writers--; 1105 if (rms->writers > 0) { 1106 wakeup_one(&rms->owner); 1107 rms->owner = RMS_TRANSIENT; 1108 } else { 1109 wakeup(&rms->readers); 1110 rms->owner = RMS_NOOWNER; 1111 } 1112 mtx_unlock(&rms->mtx); 1113 } 1114 1115 void 1116 rms_unlock(struct rmslock *rms) 1117 { 1118 1119 if (rms_wowned(rms)) 1120 rms_wunlock(rms); 1121 else 1122 rms_runlock(rms); 1123 } 1124