1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Machine independent bits of reader/writer lock implementation. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 44 #include <sys/kernel.h> 45 #include <sys/kdb.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/proc.h> 50 #include <sys/rmlock.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/turnstile.h> 54 #include <sys/lock_profile.h> 55 #include <machine/cpu.h> 56 #include <vm/uma.h> 57 58 #ifdef DDB 59 #include <ddb/ddb.h> 60 #endif 61 62 /* 63 * A cookie to mark destroyed rmlocks. This is stored in the head of 64 * rm_activeReaders. 65 */ 66 #define RM_DESTROYED ((void *)0xdead) 67 68 #define rm_destroyed(rm) \ 69 (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED) 70 71 #define RMPF_ONQUEUE 1 72 #define RMPF_SIGNAL 2 73 74 #ifndef INVARIANTS 75 #define _rm_assert(c, what, file, line) 76 #endif 77 78 static void assert_rm(const struct lock_object *lock, int what); 79 #ifdef DDB 80 static void db_show_rm(const struct lock_object *lock); 81 #endif 82 static void lock_rm(struct lock_object *lock, uintptr_t how); 83 #ifdef KDTRACE_HOOKS 84 static int owner_rm(const struct lock_object *lock, struct thread **owner); 85 #endif 86 static uintptr_t unlock_rm(struct lock_object *lock); 87 88 struct lock_class lock_class_rm = { 89 .lc_name = "rm", 90 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, 91 .lc_assert = assert_rm, 92 #ifdef DDB 93 .lc_ddb_show = db_show_rm, 94 #endif 95 .lc_lock = lock_rm, 96 .lc_unlock = unlock_rm, 97 #ifdef KDTRACE_HOOKS 98 .lc_owner = owner_rm, 99 #endif 100 }; 101 102 struct lock_class lock_class_rm_sleepable = { 103 .lc_name = "sleepable rm", 104 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE, 105 .lc_assert = assert_rm, 106 #ifdef DDB 107 .lc_ddb_show = db_show_rm, 108 #endif 109 .lc_lock = lock_rm, 110 .lc_unlock = unlock_rm, 111 #ifdef KDTRACE_HOOKS 112 .lc_owner = owner_rm, 113 #endif 114 }; 115 116 static void 117 assert_rm(const struct lock_object *lock, int what) 118 { 119 120 rm_assert((const struct rmlock *)lock, what); 121 } 122 123 static void 124 lock_rm(struct lock_object *lock, uintptr_t how) 125 { 126 struct rmlock *rm; 127 struct rm_priotracker *tracker; 128 129 rm = (struct rmlock *)lock; 130 if (how == 0) 131 rm_wlock(rm); 132 else { 133 tracker = (struct rm_priotracker *)how; 134 rm_rlock(rm, tracker); 135 } 136 } 137 138 static uintptr_t 139 unlock_rm(struct lock_object *lock) 140 { 141 struct thread *td; 142 struct pcpu *pc; 143 struct rmlock *rm; 144 struct rm_queue *queue; 145 struct rm_priotracker *tracker; 146 uintptr_t how; 147 148 rm = (struct rmlock *)lock; 149 tracker = NULL; 150 how = 0; 151 rm_assert(rm, RA_LOCKED | RA_NOTRECURSED); 152 if (rm_wowned(rm)) 153 rm_wunlock(rm); 154 else { 155 /* 156 * Find the right rm_priotracker structure for curthread. 157 * The guarantee about its uniqueness is given by the fact 158 * we already asserted the lock wasn't recursively acquired. 159 */ 160 critical_enter(); 161 td = curthread; 162 pc = get_pcpu(); 163 for (queue = pc->pc_rm_queue.rmq_next; 164 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 165 tracker = (struct rm_priotracker *)queue; 166 if ((tracker->rmp_rmlock == rm) && 167 (tracker->rmp_thread == td)) { 168 how = (uintptr_t)tracker; 169 break; 170 } 171 } 172 KASSERT(tracker != NULL, 173 ("rm_priotracker is non-NULL when lock held in read mode")); 174 critical_exit(); 175 rm_runlock(rm, tracker); 176 } 177 return (how); 178 } 179 180 #ifdef KDTRACE_HOOKS 181 static int 182 owner_rm(const struct lock_object *lock, struct thread **owner) 183 { 184 const struct rmlock *rm; 185 struct lock_class *lc; 186 187 rm = (const struct rmlock *)lock; 188 lc = LOCK_CLASS(&rm->rm_wlock_object); 189 return (lc->lc_owner(&rm->rm_wlock_object, owner)); 190 } 191 #endif 192 193 static struct mtx rm_spinlock; 194 195 MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); 196 197 /* 198 * Add or remove tracker from per-cpu list. 199 * 200 * The per-cpu list can be traversed at any time in forward direction from an 201 * interrupt on the *local* cpu. 202 */ 203 static void inline 204 rm_tracker_add(struct pcpu *pc, struct rm_priotracker *tracker) 205 { 206 struct rm_queue *next; 207 208 /* Initialize all tracker pointers */ 209 tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; 210 next = pc->pc_rm_queue.rmq_next; 211 tracker->rmp_cpuQueue.rmq_next = next; 212 213 /* rmq_prev is not used during froward traversal. */ 214 next->rmq_prev = &tracker->rmp_cpuQueue; 215 216 /* Update pointer to first element. */ 217 pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; 218 } 219 220 /* 221 * Return a count of the number of trackers the thread 'td' already 222 * has on this CPU for the lock 'rm'. 223 */ 224 static int 225 rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm, 226 const struct thread *td) 227 { 228 struct rm_queue *queue; 229 struct rm_priotracker *tracker; 230 int count; 231 232 count = 0; 233 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 234 queue = queue->rmq_next) { 235 tracker = (struct rm_priotracker *)queue; 236 if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td)) 237 count++; 238 } 239 return (count); 240 } 241 242 static void inline 243 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker) 244 { 245 struct rm_queue *next, *prev; 246 247 next = tracker->rmp_cpuQueue.rmq_next; 248 prev = tracker->rmp_cpuQueue.rmq_prev; 249 250 /* Not used during forward traversal. */ 251 next->rmq_prev = prev; 252 253 /* Remove from list. */ 254 prev->rmq_next = next; 255 } 256 257 static void 258 rm_cleanIPI(void *arg) 259 { 260 struct pcpu *pc; 261 struct rmlock *rm = arg; 262 struct rm_priotracker *tracker; 263 struct rm_queue *queue; 264 pc = get_pcpu(); 265 266 for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue; 267 queue = queue->rmq_next) { 268 tracker = (struct rm_priotracker *)queue; 269 if (tracker->rmp_rmlock == rm && tracker->rmp_flags == 0) { 270 tracker->rmp_flags = RMPF_ONQUEUE; 271 mtx_lock_spin(&rm_spinlock); 272 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 273 rmp_qentry); 274 mtx_unlock_spin(&rm_spinlock); 275 } 276 } 277 } 278 279 void 280 rm_init_flags(struct rmlock *rm, const char *name, int opts) 281 { 282 struct lock_class *lc; 283 int liflags, xflags; 284 285 liflags = 0; 286 if (!(opts & RM_NOWITNESS)) 287 liflags |= LO_WITNESS; 288 if (opts & RM_RECURSE) 289 liflags |= LO_RECURSABLE; 290 if (opts & RM_NEW) 291 liflags |= LO_NEW; 292 rm->rm_writecpus = all_cpus; 293 LIST_INIT(&rm->rm_activeReaders); 294 if (opts & RM_SLEEPABLE) { 295 liflags |= LO_SLEEPABLE; 296 lc = &lock_class_rm_sleepable; 297 xflags = (opts & RM_NEW ? SX_NEW : 0); 298 sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", 299 xflags | SX_NOWITNESS); 300 } else { 301 lc = &lock_class_rm; 302 xflags = (opts & RM_NEW ? MTX_NEW : 0); 303 mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", 304 xflags | MTX_NOWITNESS); 305 } 306 lock_init(&rm->lock_object, lc, name, NULL, liflags); 307 } 308 309 void 310 rm_init(struct rmlock *rm, const char *name) 311 { 312 313 rm_init_flags(rm, name, 0); 314 } 315 316 void 317 rm_destroy(struct rmlock *rm) 318 { 319 320 rm_assert(rm, RA_UNLOCKED); 321 LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED; 322 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 323 sx_destroy(&rm->rm_lock_sx); 324 else 325 mtx_destroy(&rm->rm_lock_mtx); 326 lock_destroy(&rm->lock_object); 327 } 328 329 int 330 rm_wowned(const struct rmlock *rm) 331 { 332 333 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 334 return (sx_xlocked(&rm->rm_lock_sx)); 335 else 336 return (mtx_owned(&rm->rm_lock_mtx)); 337 } 338 339 void 340 rm_sysinit(void *arg) 341 { 342 struct rm_args *args; 343 344 args = arg; 345 rm_init_flags(args->ra_rm, args->ra_desc, args->ra_flags); 346 } 347 348 static __noinline int 349 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 350 { 351 struct pcpu *pc; 352 353 critical_enter(); 354 pc = get_pcpu(); 355 356 /* Check if we just need to do a proper critical_exit. */ 357 if (!CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)) { 358 critical_exit(); 359 return (1); 360 } 361 362 /* Remove our tracker from the per-cpu list. */ 363 rm_tracker_remove(pc, tracker); 364 365 /* Check to see if the IPI granted us the lock after all. */ 366 if (tracker->rmp_flags) { 367 /* Just add back tracker - we hold the lock. */ 368 rm_tracker_add(pc, tracker); 369 critical_exit(); 370 return (1); 371 } 372 373 /* 374 * We allow readers to acquire a lock even if a writer is blocked if 375 * the lock is recursive and the reader already holds the lock. 376 */ 377 if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { 378 /* 379 * Just grant the lock if this thread already has a tracker 380 * for this lock on the per-cpu queue. 381 */ 382 if (rm_trackers_present(pc, rm, curthread) != 0) { 383 mtx_lock_spin(&rm_spinlock); 384 LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker, 385 rmp_qentry); 386 tracker->rmp_flags = RMPF_ONQUEUE; 387 mtx_unlock_spin(&rm_spinlock); 388 rm_tracker_add(pc, tracker); 389 critical_exit(); 390 return (1); 391 } 392 } 393 394 sched_unpin(); 395 critical_exit(); 396 397 if (trylock) { 398 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 399 if (!sx_try_xlock(&rm->rm_lock_sx)) 400 return (0); 401 } else { 402 if (!mtx_trylock(&rm->rm_lock_mtx)) 403 return (0); 404 } 405 } else { 406 if (rm->lock_object.lo_flags & LO_SLEEPABLE) { 407 THREAD_SLEEPING_OK(); 408 sx_xlock(&rm->rm_lock_sx); 409 THREAD_NO_SLEEPING(); 410 } else 411 mtx_lock(&rm->rm_lock_mtx); 412 } 413 414 critical_enter(); 415 pc = get_pcpu(); 416 CPU_CLR(pc->pc_cpuid, &rm->rm_writecpus); 417 rm_tracker_add(pc, tracker); 418 sched_pin(); 419 critical_exit(); 420 421 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 422 sx_xunlock(&rm->rm_lock_sx); 423 else 424 mtx_unlock(&rm->rm_lock_mtx); 425 426 return (1); 427 } 428 429 int 430 _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) 431 { 432 struct thread *td = curthread; 433 struct pcpu *pc; 434 435 if (SCHEDULER_STOPPED()) 436 return (1); 437 438 tracker->rmp_flags = 0; 439 tracker->rmp_thread = td; 440 tracker->rmp_rmlock = rm; 441 442 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 443 THREAD_NO_SLEEPING(); 444 445 td->td_critnest++; /* critical_enter(); */ 446 447 __compiler_membar(); 448 449 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 450 451 rm_tracker_add(pc, tracker); 452 453 sched_pin(); 454 455 __compiler_membar(); 456 457 td->td_critnest--; 458 459 /* 460 * Fast path to combine two common conditions into a single 461 * conditional jump. 462 */ 463 if (__predict_true(0 == (td->td_owepreempt | 464 CPU_ISSET(pc->pc_cpuid, &rm->rm_writecpus)))) 465 return (1); 466 467 /* We do not have a read token and need to acquire one. */ 468 return _rm_rlock_hard(rm, tracker, trylock); 469 } 470 471 static __noinline void 472 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker) 473 { 474 475 if (td->td_owepreempt) { 476 td->td_critnest++; 477 critical_exit(); 478 } 479 480 if (!tracker->rmp_flags) 481 return; 482 483 mtx_lock_spin(&rm_spinlock); 484 LIST_REMOVE(tracker, rmp_qentry); 485 486 if (tracker->rmp_flags & RMPF_SIGNAL) { 487 struct rmlock *rm; 488 struct turnstile *ts; 489 490 rm = tracker->rmp_rmlock; 491 492 turnstile_chain_lock(&rm->lock_object); 493 mtx_unlock_spin(&rm_spinlock); 494 495 ts = turnstile_lookup(&rm->lock_object); 496 497 turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); 498 turnstile_unpend(ts); 499 turnstile_chain_unlock(&rm->lock_object); 500 } else 501 mtx_unlock_spin(&rm_spinlock); 502 } 503 504 void 505 _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) 506 { 507 struct pcpu *pc; 508 struct thread *td = tracker->rmp_thread; 509 510 if (SCHEDULER_STOPPED()) 511 return; 512 513 td->td_critnest++; /* critical_enter(); */ 514 pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ 515 rm_tracker_remove(pc, tracker); 516 td->td_critnest--; 517 sched_unpin(); 518 519 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 520 THREAD_SLEEPING_OK(); 521 522 if (__predict_true(0 == (td->td_owepreempt | tracker->rmp_flags))) 523 return; 524 525 _rm_unlock_hard(td, tracker); 526 } 527 528 void 529 _rm_wlock(struct rmlock *rm) 530 { 531 struct rm_priotracker *prio; 532 struct turnstile *ts; 533 cpuset_t readcpus; 534 535 if (SCHEDULER_STOPPED()) 536 return; 537 538 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 539 sx_xlock(&rm->rm_lock_sx); 540 else 541 mtx_lock(&rm->rm_lock_mtx); 542 543 if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { 544 /* Get all read tokens back */ 545 readcpus = all_cpus; 546 CPU_ANDNOT(&readcpus, &rm->rm_writecpus); 547 rm->rm_writecpus = all_cpus; 548 549 /* 550 * Assumes rm->rm_writecpus update is visible on other CPUs 551 * before rm_cleanIPI is called. 552 */ 553 #ifdef SMP 554 smp_rendezvous_cpus(readcpus, 555 smp_no_rendezvous_barrier, 556 rm_cleanIPI, 557 smp_no_rendezvous_barrier, 558 rm); 559 560 #else 561 rm_cleanIPI(rm); 562 #endif 563 564 mtx_lock_spin(&rm_spinlock); 565 while ((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { 566 ts = turnstile_trywait(&rm->lock_object); 567 prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; 568 mtx_unlock_spin(&rm_spinlock); 569 turnstile_wait(ts, prio->rmp_thread, 570 TS_EXCLUSIVE_QUEUE); 571 mtx_lock_spin(&rm_spinlock); 572 } 573 mtx_unlock_spin(&rm_spinlock); 574 } 575 } 576 577 void 578 _rm_wunlock(struct rmlock *rm) 579 { 580 581 if (rm->lock_object.lo_flags & LO_SLEEPABLE) 582 sx_xunlock(&rm->rm_lock_sx); 583 else 584 mtx_unlock(&rm->rm_lock_mtx); 585 } 586 587 #if LOCK_DEBUG > 0 588 589 void 590 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 591 { 592 593 if (SCHEDULER_STOPPED()) 594 return; 595 596 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 597 ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d", 598 curthread, rm->lock_object.lo_name, file, line)); 599 KASSERT(!rm_destroyed(rm), 600 ("rm_wlock() of destroyed rmlock @ %s:%d", file, line)); 601 _rm_assert(rm, RA_UNLOCKED, file, line); 602 603 WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, 604 file, line, NULL); 605 606 _rm_wlock(rm); 607 608 LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); 609 WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 610 TD_LOCKS_INC(curthread); 611 } 612 613 void 614 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 615 { 616 617 if (SCHEDULER_STOPPED()) 618 return; 619 620 KASSERT(!rm_destroyed(rm), 621 ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line)); 622 _rm_assert(rm, RA_WLOCKED, file, line); 623 WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); 624 LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); 625 _rm_wunlock(rm); 626 TD_LOCKS_DEC(curthread); 627 } 628 629 int 630 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 631 int trylock, const char *file, int line) 632 { 633 634 if (SCHEDULER_STOPPED()) 635 return (1); 636 637 #ifdef INVARIANTS 638 if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) { 639 critical_enter(); 640 KASSERT(rm_trackers_present(get_pcpu(), rm, 641 curthread) == 0, 642 ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n", 643 rm->lock_object.lo_name, file, line)); 644 critical_exit(); 645 } 646 #endif 647 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 648 ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d", 649 curthread, rm->lock_object.lo_name, file, line)); 650 KASSERT(!rm_destroyed(rm), 651 ("rm_rlock() of destroyed rmlock @ %s:%d", file, line)); 652 if (!trylock) { 653 KASSERT(!rm_wowned(rm), 654 ("rm_rlock: wlock already held for %s @ %s:%d", 655 rm->lock_object.lo_name, file, line)); 656 WITNESS_CHECKORDER(&rm->lock_object, 657 LOP_NEWORDER | LOP_NOSLEEP, file, line, NULL); 658 } 659 660 if (_rm_rlock(rm, tracker, trylock)) { 661 if (trylock) 662 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file, 663 line); 664 else 665 LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, 666 line); 667 WITNESS_LOCK(&rm->lock_object, LOP_NOSLEEP, file, line); 668 TD_LOCKS_INC(curthread); 669 return (1); 670 } else if (trylock) 671 LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line); 672 673 return (0); 674 } 675 676 void 677 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 678 const char *file, int line) 679 { 680 681 if (SCHEDULER_STOPPED()) 682 return; 683 684 KASSERT(!rm_destroyed(rm), 685 ("rm_runlock() of destroyed rmlock @ %s:%d", file, line)); 686 _rm_assert(rm, RA_RLOCKED, file, line); 687 WITNESS_UNLOCK(&rm->lock_object, 0, file, line); 688 LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); 689 _rm_runlock(rm, tracker); 690 TD_LOCKS_DEC(curthread); 691 } 692 693 #else 694 695 /* 696 * Just strip out file and line arguments if no lock debugging is enabled in 697 * the kernel - we are called from a kernel module. 698 */ 699 void 700 _rm_wlock_debug(struct rmlock *rm, const char *file, int line) 701 { 702 703 _rm_wlock(rm); 704 } 705 706 void 707 _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) 708 { 709 710 _rm_wunlock(rm); 711 } 712 713 int 714 _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 715 int trylock, const char *file, int line) 716 { 717 718 return _rm_rlock(rm, tracker, trylock); 719 } 720 721 void 722 _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, 723 const char *file, int line) 724 { 725 726 _rm_runlock(rm, tracker); 727 } 728 729 #endif 730 731 #ifdef INVARIANT_SUPPORT 732 #ifndef INVARIANTS 733 #undef _rm_assert 734 #endif 735 736 /* 737 * Note that this does not need to use witness_assert() for read lock 738 * assertions since an exact count of read locks held by this thread 739 * is computable. 740 */ 741 void 742 _rm_assert(const struct rmlock *rm, int what, const char *file, int line) 743 { 744 int count; 745 746 if (SCHEDULER_STOPPED()) 747 return; 748 switch (what) { 749 case RA_LOCKED: 750 case RA_LOCKED | RA_RECURSED: 751 case RA_LOCKED | RA_NOTRECURSED: 752 case RA_RLOCKED: 753 case RA_RLOCKED | RA_RECURSED: 754 case RA_RLOCKED | RA_NOTRECURSED: 755 /* 756 * Handle the write-locked case. Unlike other 757 * primitives, writers can never recurse. 758 */ 759 if (rm_wowned(rm)) { 760 if (what & RA_RLOCKED) 761 panic("Lock %s exclusively locked @ %s:%d\n", 762 rm->lock_object.lo_name, file, line); 763 if (what & RA_RECURSED) 764 panic("Lock %s not recursed @ %s:%d\n", 765 rm->lock_object.lo_name, file, line); 766 break; 767 } 768 769 critical_enter(); 770 count = rm_trackers_present(get_pcpu(), rm, curthread); 771 critical_exit(); 772 773 if (count == 0) 774 panic("Lock %s not %slocked @ %s:%d\n", 775 rm->lock_object.lo_name, (what & RA_RLOCKED) ? 776 "read " : "", file, line); 777 if (count > 1) { 778 if (what & RA_NOTRECURSED) 779 panic("Lock %s recursed @ %s:%d\n", 780 rm->lock_object.lo_name, file, line); 781 } else if (what & RA_RECURSED) 782 panic("Lock %s not recursed @ %s:%d\n", 783 rm->lock_object.lo_name, file, line); 784 break; 785 case RA_WLOCKED: 786 if (!rm_wowned(rm)) 787 panic("Lock %s not exclusively locked @ %s:%d\n", 788 rm->lock_object.lo_name, file, line); 789 break; 790 case RA_UNLOCKED: 791 if (rm_wowned(rm)) 792 panic("Lock %s exclusively locked @ %s:%d\n", 793 rm->lock_object.lo_name, file, line); 794 795 critical_enter(); 796 count = rm_trackers_present(get_pcpu(), rm, curthread); 797 critical_exit(); 798 799 if (count != 0) 800 panic("Lock %s read locked @ %s:%d\n", 801 rm->lock_object.lo_name, file, line); 802 break; 803 default: 804 panic("Unknown rm lock assertion: %d @ %s:%d", what, file, 805 line); 806 } 807 } 808 #endif /* INVARIANT_SUPPORT */ 809 810 #ifdef DDB 811 static void 812 print_tracker(struct rm_priotracker *tr) 813 { 814 struct thread *td; 815 816 td = tr->rmp_thread; 817 db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid, 818 td->td_proc->p_pid, td->td_name); 819 if (tr->rmp_flags & RMPF_ONQUEUE) { 820 db_printf("ONQUEUE"); 821 if (tr->rmp_flags & RMPF_SIGNAL) 822 db_printf(",SIGNAL"); 823 } else 824 db_printf("0"); 825 db_printf("}\n"); 826 } 827 828 static void 829 db_show_rm(const struct lock_object *lock) 830 { 831 struct rm_priotracker *tr; 832 struct rm_queue *queue; 833 const struct rmlock *rm; 834 struct lock_class *lc; 835 struct pcpu *pc; 836 837 rm = (const struct rmlock *)lock; 838 db_printf(" writecpus: "); 839 ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus)); 840 db_printf("\n"); 841 db_printf(" per-CPU readers:\n"); 842 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) 843 for (queue = pc->pc_rm_queue.rmq_next; 844 queue != &pc->pc_rm_queue; queue = queue->rmq_next) { 845 tr = (struct rm_priotracker *)queue; 846 if (tr->rmp_rmlock == rm) 847 print_tracker(tr); 848 } 849 db_printf(" active readers:\n"); 850 LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry) 851 print_tracker(tr); 852 lc = LOCK_CLASS(&rm->rm_wlock_object); 853 db_printf("Backing write-lock (%s):\n", lc->lc_name); 854 lc->lc_ddb_show(&rm->rm_wlock_object); 855 } 856 #endif 857 858 /* 859 * Read-mostly sleepable locks. 860 * 861 * These primitives allow both readers and writers to sleep. However, neither 862 * readers nor writers are tracked and subsequently there is no priority 863 * propagation. 864 * 865 * They are intended to be only used when write-locking is almost never needed 866 * (e.g., they can guard against unloading a kernel module) while read-locking 867 * happens all the time. 868 * 869 * Concurrent writers take turns taking the lock while going off cpu. If this is 870 * of concern for your usecase, this is not the right primitive. 871 * 872 * Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are 873 * inserted to prevert reordering of generated code. Execution ordering is 874 * provided with the use of an IPI handler. 875 */ 876 877 void 878 rms_init(struct rmslock *rms, const char *name) 879 { 880 881 rms->writers = 0; 882 rms->readers = 0; 883 mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW); 884 rms->readers_pcpu = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO); 885 rms->readers_influx = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO); 886 } 887 888 void 889 rms_destroy(struct rmslock *rms) 890 { 891 892 MPASS(rms->writers == 0); 893 MPASS(rms->readers == 0); 894 mtx_destroy(&rms->mtx); 895 uma_zfree_pcpu(pcpu_zone_int, rms->readers_pcpu); 896 uma_zfree_pcpu(pcpu_zone_int, rms->readers_influx); 897 } 898 899 static void __noinline 900 rms_rlock_fallback(struct rmslock *rms) 901 { 902 903 (*zpcpu_get(rms->readers_influx)) = 0; 904 critical_exit(); 905 906 mtx_lock(&rms->mtx); 907 MPASS(*zpcpu_get(rms->readers_pcpu) == 0); 908 while (rms->writers > 0) 909 msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0); 910 (*zpcpu_get(rms->readers_pcpu))++; 911 mtx_unlock(&rms->mtx); 912 } 913 914 void 915 rms_rlock(struct rmslock *rms) 916 { 917 int *influx; 918 919 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 920 921 critical_enter(); 922 influx = zpcpu_get(rms->readers_influx); 923 __compiler_membar(); 924 *influx = 1; 925 __compiler_membar(); 926 if (__predict_false(rms->writers > 0)) { 927 rms_rlock_fallback(rms); 928 return; 929 } 930 __compiler_membar(); 931 (*zpcpu_get(rms->readers_pcpu))++; 932 __compiler_membar(); 933 *influx = 0; 934 critical_exit(); 935 } 936 937 static void __noinline 938 rms_runlock_fallback(struct rmslock *rms) 939 { 940 941 (*zpcpu_get(rms->readers_influx)) = 0; 942 critical_exit(); 943 944 mtx_lock(&rms->mtx); 945 MPASS(*zpcpu_get(rms->readers_pcpu) == 0); 946 MPASS(rms->writers > 0); 947 MPASS(rms->readers > 0); 948 rms->readers--; 949 if (rms->readers == 0) 950 wakeup_one(&rms->writers); 951 mtx_unlock(&rms->mtx); 952 } 953 954 void 955 rms_runlock(struct rmslock *rms) 956 { 957 int *influx; 958 959 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 960 961 critical_enter(); 962 influx = zpcpu_get(rms->readers_influx); 963 __compiler_membar(); 964 *influx = 1; 965 __compiler_membar(); 966 if (__predict_false(rms->writers > 0)) { 967 rms_runlock_fallback(rms); 968 return; 969 } 970 __compiler_membar(); 971 (*zpcpu_get(rms->readers_pcpu))--; 972 __compiler_membar(); 973 *influx = 0; 974 critical_exit(); 975 } 976 977 struct rmslock_ipi { 978 struct rmslock *rms; 979 cpuset_t signal; 980 }; 981 982 static void 983 rms_wlock_IPI(void *arg) 984 { 985 struct rmslock_ipi *rmsipi; 986 struct rmslock *rms; 987 int readers; 988 989 rmsipi = arg; 990 rms = rmsipi->rms; 991 992 if (*zpcpu_get(rms->readers_influx)) 993 return; 994 readers = zpcpu_replace(rms->readers_pcpu, 0); 995 if (readers != 0) 996 atomic_add_int(&rms->readers, readers); 997 CPU_CLR_ATOMIC(curcpu, &rmsipi->signal); 998 } 999 1000 static void 1001 rms_wlock_switch(struct rmslock *rms) 1002 { 1003 struct rmslock_ipi rmsipi; 1004 int *in_op; 1005 int cpu; 1006 1007 MPASS(rms->readers == 0); 1008 MPASS(rms->writers == 1); 1009 1010 rmsipi.rms = rms; 1011 1012 /* 1013 * Publishes rms->writers. rlock and runlock will get this ordered 1014 * via IPI in the worst case. 1015 */ 1016 atomic_thread_fence_rel(); 1017 1018 /* 1019 * Collect reader counts from all CPUs using an IPI. The handler can 1020 * find itself running while the interrupted CPU was doing either 1021 * rlock or runlock in which case it will fail. 1022 * 1023 * Successful attempts clear the cpu id in the bitmap. 1024 * 1025 * In case of failure we observe all failing CPUs not executing there to 1026 * determine when to make the next attempt. Note that threads having 1027 * the var set have preemption disabled. Setting of readers_influx 1028 * only uses compiler barriers making these loads unreliable, which is 1029 * fine -- the IPI handler will always see the correct result. 1030 * 1031 * We retry until all counts are collected. Forward progress is 1032 * guaranteed by that fact that the total number of threads which can 1033 * be caught like this is finite and they all are going to block on 1034 * their own. 1035 */ 1036 CPU_COPY(&all_cpus, &rmsipi.signal); 1037 for (;;) { 1038 smp_rendezvous_cpus( 1039 rmsipi.signal, 1040 smp_no_rendezvous_barrier, 1041 rms_wlock_IPI, 1042 smp_no_rendezvous_barrier, 1043 &rmsipi); 1044 1045 if (CPU_EMPTY(&rmsipi.signal)) 1046 break; 1047 1048 CPU_FOREACH(cpu) { 1049 if (!CPU_ISSET(cpu, &rmsipi.signal)) 1050 continue; 1051 in_op = zpcpu_get_cpu(rms->readers_influx, cpu); 1052 while (atomic_load_int(in_op)) 1053 cpu_spinwait(); 1054 } 1055 } 1056 } 1057 1058 void 1059 rms_wlock(struct rmslock *rms) 1060 { 1061 1062 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 1063 1064 mtx_lock(&rms->mtx); 1065 rms->writers++; 1066 if (rms->writers > 1) { 1067 msleep(&rms->writers, &rms->mtx, (PUSER - 1) | PDROP, 1068 mtx_name(&rms->mtx), 0); 1069 MPASS(rms->readers == 0); 1070 return; 1071 } 1072 1073 rms_wlock_switch(rms); 1074 1075 if (rms->readers > 0) 1076 msleep(&rms->writers, &rms->mtx, (PUSER - 1) | PDROP, 1077 mtx_name(&rms->mtx), 0); 1078 else 1079 mtx_unlock(&rms->mtx); 1080 MPASS(rms->readers == 0); 1081 } 1082 1083 void 1084 rms_wunlock(struct rmslock *rms) 1085 { 1086 1087 mtx_lock(&rms->mtx); 1088 MPASS(rms->writers >= 1); 1089 MPASS(rms->readers == 0); 1090 rms->writers--; 1091 if (rms->writers > 0) 1092 wakeup_one(&rms->writers); 1093 else 1094 wakeup(&rms->readers); 1095 mtx_unlock(&rms->mtx); 1096 } 1097