1 /*- 2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * Machine independent bits of reader/writer lock implementation. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_ddb.h" 35 #include "opt_hwpmc_hooks.h" 36 #include "opt_no_adaptive_rwlocks.h" 37 38 #include <sys/param.h> 39 #include <sys/kdb.h> 40 #include <sys/ktr.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/rwlock.h> 46 #include <sys/sched.h> 47 #include <sys/smp.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 #include <sys/turnstile.h> 51 52 #include <machine/cpu.h> 53 54 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 55 #define ADAPTIVE_RWLOCKS 56 #endif 57 58 #ifdef HWPMC_HOOKS 59 #include <sys/pmckern.h> 60 PMC_SOFT_DECLARE( , , lock, failed); 61 #endif 62 63 /* 64 * Return the rwlock address when the lock cookie address is provided. 65 * This functionality assumes that struct rwlock* have a member named rw_lock. 66 */ 67 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) 68 69 #ifdef DDB 70 #include <ddb/ddb.h> 71 72 static void db_show_rwlock(const struct lock_object *lock); 73 #endif 74 static void assert_rw(const struct lock_object *lock, int what); 75 static void lock_rw(struct lock_object *lock, uintptr_t how); 76 #ifdef KDTRACE_HOOKS 77 static int owner_rw(const struct lock_object *lock, struct thread **owner); 78 #endif 79 static uintptr_t unlock_rw(struct lock_object *lock); 80 81 struct lock_class lock_class_rw = { 82 .lc_name = "rw", 83 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 84 .lc_assert = assert_rw, 85 #ifdef DDB 86 .lc_ddb_show = db_show_rwlock, 87 #endif 88 .lc_lock = lock_rw, 89 .lc_unlock = unlock_rw, 90 #ifdef KDTRACE_HOOKS 91 .lc_owner = owner_rw, 92 #endif 93 }; 94 95 #ifdef ADAPTIVE_RWLOCKS 96 static int __read_frequently rowner_retries = 10; 97 static int __read_frequently rowner_loops = 10000; 98 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, 99 "rwlock debugging"); 100 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 101 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 102 103 static struct lock_delay_config __read_frequently rw_delay; 104 105 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 106 0, ""); 107 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 108 0, ""); 109 110 LOCK_DELAY_SYSINIT_DEFAULT(rw_delay); 111 #endif 112 113 /* 114 * Return a pointer to the owning thread if the lock is write-locked or 115 * NULL if the lock is unlocked or read-locked. 116 */ 117 118 #define lv_rw_wowner(v) \ 119 ((v) & RW_LOCK_READ ? NULL : \ 120 (struct thread *)RW_OWNER((v))) 121 122 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) 123 124 /* 125 * Returns if a write owner is recursed. Write ownership is not assured 126 * here and should be previously checked. 127 */ 128 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 129 130 /* 131 * Return true if curthread helds the lock. 132 */ 133 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 134 135 /* 136 * Return a pointer to the owning thread for this lock who should receive 137 * any priority lent by threads that block on this lock. Currently this 138 * is identical to rw_wowner(). 139 */ 140 #define rw_owner(rw) rw_wowner(rw) 141 142 #ifndef INVARIANTS 143 #define __rw_assert(c, what, file, line) 144 #endif 145 146 void 147 assert_rw(const struct lock_object *lock, int what) 148 { 149 150 rw_assert((const struct rwlock *)lock, what); 151 } 152 153 void 154 lock_rw(struct lock_object *lock, uintptr_t how) 155 { 156 struct rwlock *rw; 157 158 rw = (struct rwlock *)lock; 159 if (how) 160 rw_rlock(rw); 161 else 162 rw_wlock(rw); 163 } 164 165 uintptr_t 166 unlock_rw(struct lock_object *lock) 167 { 168 struct rwlock *rw; 169 170 rw = (struct rwlock *)lock; 171 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 172 if (rw->rw_lock & RW_LOCK_READ) { 173 rw_runlock(rw); 174 return (1); 175 } else { 176 rw_wunlock(rw); 177 return (0); 178 } 179 } 180 181 #ifdef KDTRACE_HOOKS 182 int 183 owner_rw(const struct lock_object *lock, struct thread **owner) 184 { 185 const struct rwlock *rw = (const struct rwlock *)lock; 186 uintptr_t x = rw->rw_lock; 187 188 *owner = rw_wowner(rw); 189 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 190 (*owner != NULL)); 191 } 192 #endif 193 194 void 195 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) 196 { 197 struct rwlock *rw; 198 int flags; 199 200 rw = rwlock2rw(c); 201 202 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 203 RW_RECURSE | RW_NEW)) == 0); 204 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 205 ("%s: rw_lock not aligned for %s: %p", __func__, name, 206 &rw->rw_lock)); 207 208 flags = LO_UPGRADABLE; 209 if (opts & RW_DUPOK) 210 flags |= LO_DUPOK; 211 if (opts & RW_NOPROFILE) 212 flags |= LO_NOPROFILE; 213 if (!(opts & RW_NOWITNESS)) 214 flags |= LO_WITNESS; 215 if (opts & RW_RECURSE) 216 flags |= LO_RECURSABLE; 217 if (opts & RW_QUIET) 218 flags |= LO_QUIET; 219 if (opts & RW_NEW) 220 flags |= LO_NEW; 221 222 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 223 rw->rw_lock = RW_UNLOCKED; 224 rw->rw_recurse = 0; 225 } 226 227 void 228 _rw_destroy(volatile uintptr_t *c) 229 { 230 struct rwlock *rw; 231 232 rw = rwlock2rw(c); 233 234 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 235 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 236 rw->rw_lock = RW_DESTROYED; 237 lock_destroy(&rw->lock_object); 238 } 239 240 void 241 rw_sysinit(void *arg) 242 { 243 struct rw_args *args = arg; 244 245 rw_init((struct rwlock *)args->ra_rw, args->ra_desc); 246 } 247 248 void 249 rw_sysinit_flags(void *arg) 250 { 251 struct rw_args_flags *args = arg; 252 253 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, 254 args->ra_flags); 255 } 256 257 int 258 _rw_wowned(const volatile uintptr_t *c) 259 { 260 261 return (rw_wowner(rwlock2rw(c)) == curthread); 262 } 263 264 void 265 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) 266 { 267 struct rwlock *rw; 268 uintptr_t tid, v; 269 270 rw = rwlock2rw(c); 271 272 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 273 !TD_IS_IDLETHREAD(curthread), 274 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", 275 curthread, rw->lock_object.lo_name, file, line)); 276 KASSERT(rw->rw_lock != RW_DESTROYED, 277 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 278 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 279 line, NULL); 280 tid = (uintptr_t)curthread; 281 v = RW_UNLOCKED; 282 if (!_rw_write_lock_fetch(rw, &v, tid)) 283 _rw_wlock_hard(rw, v, tid, file, line); 284 else 285 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 286 0, 0, file, line, LOCKSTAT_WRITER); 287 288 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 289 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 290 TD_LOCKS_INC(curthread); 291 } 292 293 int 294 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) 295 { 296 struct rwlock *rw; 297 struct thread *td; 298 uintptr_t tid, v; 299 int rval; 300 bool recursed; 301 302 td = curthread; 303 tid = (uintptr_t)td; 304 if (SCHEDULER_STOPPED_TD(td)) 305 return (1); 306 307 rw = rwlock2rw(c); 308 309 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), 310 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", 311 curthread, rw->lock_object.lo_name, file, line)); 312 KASSERT(rw->rw_lock != RW_DESTROYED, 313 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 314 315 rval = 1; 316 recursed = false; 317 v = RW_UNLOCKED; 318 for (;;) { 319 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 320 break; 321 if (v == RW_UNLOCKED) 322 continue; 323 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { 324 rw->rw_recurse++; 325 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 326 break; 327 } 328 rval = 0; 329 break; 330 } 331 332 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 333 if (rval) { 334 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 335 file, line); 336 if (!recursed) 337 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 338 rw, 0, 0, file, line, LOCKSTAT_WRITER); 339 TD_LOCKS_INC(curthread); 340 } 341 return (rval); 342 } 343 344 void 345 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) 346 { 347 struct rwlock *rw; 348 349 rw = rwlock2rw(c); 350 351 KASSERT(rw->rw_lock != RW_DESTROYED, 352 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 353 __rw_assert(c, RA_WLOCKED, file, line); 354 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 355 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 356 line); 357 358 #ifdef LOCK_PROFILING 359 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); 360 #else 361 __rw_wunlock(rw, curthread, file, line); 362 #endif 363 364 TD_LOCKS_DEC(curthread); 365 } 366 367 /* 368 * Determines whether a new reader can acquire a lock. Succeeds if the 369 * reader already owns a read lock and the lock is locked for read to 370 * prevent deadlock from reader recursion. Also succeeds if the lock 371 * is unlocked and has no writer waiters or spinners. Failing otherwise 372 * prioritizes writers before readers. 373 */ 374 #define RW_CAN_READ(td, _rw) \ 375 (((_rw) & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==\ 376 RW_LOCK_READ || ((td)->td_rw_rlocks && (_rw) & RW_LOCK_READ)) 377 378 static bool __always_inline 379 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, 380 const char *file, int line) 381 { 382 383 /* 384 * Handle the easy case. If no other thread has a write 385 * lock, then try to bump up the count of read locks. Note 386 * that we have to preserve the current state of the 387 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 388 * read lock, then rw_lock must have changed, so restart 389 * the loop. Note that this handles the case of a 390 * completely unlocked rwlock since such a lock is encoded 391 * as a read lock with no waiters. 392 */ 393 while (RW_CAN_READ(td, *vp)) { 394 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, 395 *vp + RW_ONE_READER)) { 396 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 397 CTR4(KTR_LOCK, 398 "%s: %p succeed %p -> %p", __func__, 399 rw, (void *)*vp, 400 (void *)(*vp + RW_ONE_READER)); 401 td->td_rw_rlocks++; 402 return (true); 403 } 404 } 405 return (false); 406 } 407 408 static void __noinline 409 __rw_rlock_hard(volatile uintptr_t *c, struct thread *td, uintptr_t v, 410 const char *file, int line) 411 { 412 struct rwlock *rw; 413 struct turnstile *ts; 414 #ifdef ADAPTIVE_RWLOCKS 415 volatile struct thread *owner; 416 int spintries = 0; 417 int i, n; 418 #endif 419 #ifdef LOCK_PROFILING 420 uint64_t waittime = 0; 421 int contested = 0; 422 #endif 423 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 424 struct lock_delay_arg lda; 425 #endif 426 #ifdef KDTRACE_HOOKS 427 u_int sleep_cnt = 0; 428 int64_t sleep_time = 0; 429 int64_t all_time = 0; 430 #endif 431 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 432 uintptr_t state; 433 int doing_lockprof; 434 #endif 435 436 if (SCHEDULER_STOPPED()) 437 return; 438 439 #if defined(ADAPTIVE_RWLOCKS) 440 lock_delay_arg_init(&lda, &rw_delay); 441 #elif defined(KDTRACE_HOOKS) 442 lock_delay_arg_init(&lda, NULL); 443 #endif 444 rw = rwlock2rw(c); 445 446 #ifdef HWPMC_HOOKS 447 PMC_SOFT_CALL( , , lock, failed); 448 #endif 449 lock_profile_obtain_lock_failed(&rw->lock_object, 450 &contested, &waittime); 451 452 #ifdef LOCK_PROFILING 453 doing_lockprof = 1; 454 state = v; 455 #elif defined(KDTRACE_HOOKS) 456 doing_lockprof = lockstat_enabled; 457 if (__predict_false(doing_lockprof)) { 458 all_time -= lockstat_nsecs(&rw->lock_object); 459 state = v; 460 } 461 #endif 462 463 for (;;) { 464 if (__rw_rlock_try(rw, td, &v, file, line)) 465 break; 466 #ifdef KDTRACE_HOOKS 467 lda.spin_cnt++; 468 #endif 469 470 #ifdef ADAPTIVE_RWLOCKS 471 /* 472 * If the owner is running on another CPU, spin until 473 * the owner stops running or the state of the lock 474 * changes. 475 */ 476 if ((v & RW_LOCK_READ) == 0) { 477 owner = (struct thread *)RW_OWNER(v); 478 if (TD_IS_RUNNING(owner)) { 479 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 480 CTR3(KTR_LOCK, 481 "%s: spinning on %p held by %p", 482 __func__, rw, owner); 483 KTR_STATE1(KTR_SCHED, "thread", 484 sched_tdname(curthread), "spinning", 485 "lockname:\"%s\"", rw->lock_object.lo_name); 486 do { 487 lock_delay(&lda); 488 v = RW_READ_VALUE(rw); 489 owner = lv_rw_wowner(v); 490 } while (owner != NULL && TD_IS_RUNNING(owner)); 491 KTR_STATE0(KTR_SCHED, "thread", 492 sched_tdname(curthread), "running"); 493 continue; 494 } 495 } else if (spintries < rowner_retries) { 496 spintries++; 497 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 498 "spinning", "lockname:\"%s\"", 499 rw->lock_object.lo_name); 500 for (i = 0; i < rowner_loops; i += n) { 501 n = RW_READERS(v); 502 lock_delay_spin(n); 503 v = RW_READ_VALUE(rw); 504 if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(td, v)) 505 break; 506 } 507 #ifdef KDTRACE_HOOKS 508 lda.spin_cnt += rowner_loops - i; 509 #endif 510 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 511 "running"); 512 if (i != rowner_loops) 513 continue; 514 } 515 #endif 516 517 /* 518 * Okay, now it's the hard case. Some other thread already 519 * has a write lock or there are write waiters present, 520 * acquire the turnstile lock so we can begin the process 521 * of blocking. 522 */ 523 ts = turnstile_trywait(&rw->lock_object); 524 525 /* 526 * The lock might have been released while we spun, so 527 * recheck its state and restart the loop if needed. 528 */ 529 v = RW_READ_VALUE(rw); 530 if (RW_CAN_READ(td, v)) { 531 turnstile_cancel(ts); 532 continue; 533 } 534 535 #ifdef ADAPTIVE_RWLOCKS 536 /* 537 * The current lock owner might have started executing 538 * on another CPU (or the lock could have changed 539 * owners) while we were waiting on the turnstile 540 * chain lock. If so, drop the turnstile lock and try 541 * again. 542 */ 543 if ((v & RW_LOCK_READ) == 0) { 544 owner = (struct thread *)RW_OWNER(v); 545 if (TD_IS_RUNNING(owner)) { 546 turnstile_cancel(ts); 547 continue; 548 } 549 } 550 #endif 551 552 /* 553 * The lock is held in write mode or it already has waiters. 554 */ 555 MPASS(!RW_CAN_READ(td, v)); 556 557 /* 558 * If the RW_LOCK_READ_WAITERS flag is already set, then 559 * we can go ahead and block. If it is not set then try 560 * to set it. If we fail to set it drop the turnstile 561 * lock and restart the loop. 562 */ 563 if (!(v & RW_LOCK_READ_WAITERS)) { 564 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 565 v | RW_LOCK_READ_WAITERS)) { 566 turnstile_cancel(ts); 567 v = RW_READ_VALUE(rw); 568 continue; 569 } 570 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 571 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 572 __func__, rw); 573 } 574 575 /* 576 * We were unable to acquire the lock and the read waiters 577 * flag is set, so we must block on the turnstile. 578 */ 579 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 580 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 581 rw); 582 #ifdef KDTRACE_HOOKS 583 sleep_time -= lockstat_nsecs(&rw->lock_object); 584 #endif 585 turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); 586 #ifdef KDTRACE_HOOKS 587 sleep_time += lockstat_nsecs(&rw->lock_object); 588 sleep_cnt++; 589 #endif 590 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 591 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 592 __func__, rw); 593 v = RW_READ_VALUE(rw); 594 } 595 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 596 if (__predict_true(!doing_lockprof)) 597 return; 598 #endif 599 #ifdef KDTRACE_HOOKS 600 all_time += lockstat_nsecs(&rw->lock_object); 601 if (sleep_time) 602 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 603 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 604 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 605 606 /* Record only the loops spinning and not sleeping. */ 607 if (lda.spin_cnt > sleep_cnt) 608 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 609 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 610 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 611 #endif 612 /* 613 * TODO: acquire "owner of record" here. Here be turnstile dragons 614 * however. turnstiles don't like owners changing between calls to 615 * turnstile_wait() currently. 616 */ 617 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 618 waittime, file, line, LOCKSTAT_READER); 619 } 620 621 void 622 __rw_rlock(volatile uintptr_t *c, const char *file, int line) 623 { 624 struct rwlock *rw; 625 struct thread *td; 626 uintptr_t v; 627 628 td = curthread; 629 rw = rwlock2rw(c); 630 631 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) || 632 !TD_IS_IDLETHREAD(td), 633 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", 634 td, rw->lock_object.lo_name, file, line)); 635 KASSERT(rw->rw_lock != RW_DESTROYED, 636 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 637 KASSERT(rw_wowner(rw) != td, 638 ("rw_rlock: wlock already held for %s @ %s:%d", 639 rw->lock_object.lo_name, file, line)); 640 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 641 642 v = RW_READ_VALUE(rw); 643 if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(rw__acquire) || 644 !__rw_rlock_try(rw, td, &v, file, line))) 645 __rw_rlock_hard(c, td, v, file, line); 646 647 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 648 WITNESS_LOCK(&rw->lock_object, 0, file, line); 649 TD_LOCKS_INC(curthread); 650 } 651 652 int 653 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) 654 { 655 struct rwlock *rw; 656 uintptr_t x; 657 658 if (SCHEDULER_STOPPED()) 659 return (1); 660 661 rw = rwlock2rw(c); 662 663 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 664 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", 665 curthread, rw->lock_object.lo_name, file, line)); 666 667 x = rw->rw_lock; 668 for (;;) { 669 KASSERT(rw->rw_lock != RW_DESTROYED, 670 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 671 if (!(x & RW_LOCK_READ)) 672 break; 673 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { 674 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 675 line); 676 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 677 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 678 rw, 0, 0, file, line, LOCKSTAT_READER); 679 TD_LOCKS_INC(curthread); 680 curthread->td_rw_rlocks++; 681 return (1); 682 } 683 } 684 685 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 686 return (0); 687 } 688 689 static bool __always_inline 690 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) 691 { 692 693 for (;;) { 694 /* 695 * See if there is more than one read lock held. If so, 696 * just drop one and return. 697 */ 698 if (RW_READERS(*vp) > 1) { 699 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 700 *vp - RW_ONE_READER)) { 701 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 702 CTR4(KTR_LOCK, 703 "%s: %p succeeded %p -> %p", 704 __func__, rw, (void *)*vp, 705 (void *)(*vp - RW_ONE_READER)); 706 td->td_rw_rlocks--; 707 return (true); 708 } 709 continue; 710 } 711 /* 712 * If there aren't any waiters for a write lock, then try 713 * to drop it quickly. 714 */ 715 if (!(*vp & RW_LOCK_WAITERS)) { 716 MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) == 717 RW_READERS_LOCK(1)); 718 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 719 RW_UNLOCKED)) { 720 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 721 CTR2(KTR_LOCK, "%s: %p last succeeded", 722 __func__, rw); 723 td->td_rw_rlocks--; 724 return (true); 725 } 726 continue; 727 } 728 break; 729 } 730 return (false); 731 } 732 733 static void __noinline 734 __rw_runlock_hard(volatile uintptr_t *c, struct thread *td, uintptr_t v, 735 const char *file, int line) 736 { 737 struct rwlock *rw; 738 struct turnstile *ts; 739 uintptr_t x, queue; 740 741 if (SCHEDULER_STOPPED()) 742 return; 743 744 rw = rwlock2rw(c); 745 746 for (;;) { 747 if (__rw_runlock_try(rw, td, &v)) 748 break; 749 750 /* 751 * Ok, we know we have waiters and we think we are the 752 * last reader, so grab the turnstile lock. 753 */ 754 turnstile_chain_lock(&rw->lock_object); 755 v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 756 MPASS(v & RW_LOCK_WAITERS); 757 758 /* 759 * Try to drop our lock leaving the lock in a unlocked 760 * state. 761 * 762 * If you wanted to do explicit lock handoff you'd have to 763 * do it here. You'd also want to use turnstile_signal() 764 * and you'd have to handle the race where a higher 765 * priority thread blocks on the write lock before the 766 * thread you wakeup actually runs and have the new thread 767 * "steal" the lock. For now it's a lot simpler to just 768 * wakeup all of the waiters. 769 * 770 * As above, if we fail, then another thread might have 771 * acquired a read lock, so drop the turnstile lock and 772 * restart. 773 */ 774 x = RW_UNLOCKED; 775 if (v & RW_LOCK_WRITE_WAITERS) { 776 queue = TS_EXCLUSIVE_QUEUE; 777 x |= (v & RW_LOCK_READ_WAITERS); 778 } else 779 queue = TS_SHARED_QUEUE; 780 if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 781 x)) { 782 turnstile_chain_unlock(&rw->lock_object); 783 v = RW_READ_VALUE(rw); 784 continue; 785 } 786 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 787 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 788 __func__, rw); 789 790 /* 791 * Ok. The lock is released and all that's left is to 792 * wake up the waiters. Note that the lock might not be 793 * free anymore, but in that case the writers will just 794 * block again if they run before the new lock holder(s) 795 * release the lock. 796 */ 797 ts = turnstile_lookup(&rw->lock_object); 798 MPASS(ts != NULL); 799 turnstile_broadcast(ts, queue); 800 turnstile_unpend(ts, TS_SHARED_LOCK); 801 turnstile_chain_unlock(&rw->lock_object); 802 td->td_rw_rlocks--; 803 break; 804 } 805 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); 806 } 807 808 void 809 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) 810 { 811 struct rwlock *rw; 812 struct thread *td; 813 uintptr_t v; 814 815 rw = rwlock2rw(c); 816 817 KASSERT(rw->rw_lock != RW_DESTROYED, 818 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 819 __rw_assert(c, RA_RLOCKED, file, line); 820 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 821 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 822 823 td = curthread; 824 v = RW_READ_VALUE(rw); 825 826 if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(rw__release) || 827 !__rw_runlock_try(rw, td, &v))) 828 __rw_runlock_hard(c, td, v, file, line); 829 830 TD_LOCKS_DEC(curthread); 831 } 832 833 /* 834 * This function is called when we are unable to obtain a write lock on the 835 * first try. This means that at least one other thread holds either a 836 * read or write lock. 837 */ 838 void 839 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, 840 const char *file, int line) 841 { 842 struct rwlock *rw; 843 struct turnstile *ts; 844 #ifdef ADAPTIVE_RWLOCKS 845 volatile struct thread *owner; 846 int spintries = 0; 847 int i, n; 848 #endif 849 uintptr_t x; 850 #ifdef LOCK_PROFILING 851 uint64_t waittime = 0; 852 int contested = 0; 853 #endif 854 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 855 struct lock_delay_arg lda; 856 #endif 857 #ifdef KDTRACE_HOOKS 858 u_int sleep_cnt = 0; 859 int64_t sleep_time = 0; 860 int64_t all_time = 0; 861 #endif 862 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 863 uintptr_t state; 864 int doing_lockprof; 865 #endif 866 867 if (SCHEDULER_STOPPED()) 868 return; 869 870 #if defined(ADAPTIVE_RWLOCKS) 871 lock_delay_arg_init(&lda, &rw_delay); 872 #elif defined(KDTRACE_HOOKS) 873 lock_delay_arg_init(&lda, NULL); 874 #endif 875 rw = rwlock2rw(c); 876 if (__predict_false(v == RW_UNLOCKED)) 877 v = RW_READ_VALUE(rw); 878 879 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { 880 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 881 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 882 __func__, rw->lock_object.lo_name, file, line)); 883 rw->rw_recurse++; 884 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 885 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 886 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 887 return; 888 } 889 890 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 891 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 892 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 893 894 #ifdef HWPMC_HOOKS 895 PMC_SOFT_CALL( , , lock, failed); 896 #endif 897 lock_profile_obtain_lock_failed(&rw->lock_object, 898 &contested, &waittime); 899 900 #ifdef LOCK_PROFILING 901 doing_lockprof = 1; 902 state = v; 903 #elif defined(KDTRACE_HOOKS) 904 doing_lockprof = lockstat_enabled; 905 if (__predict_false(doing_lockprof)) { 906 all_time -= lockstat_nsecs(&rw->lock_object); 907 state = v; 908 } 909 #endif 910 911 for (;;) { 912 if (v == RW_UNLOCKED) { 913 if (_rw_write_lock_fetch(rw, &v, tid)) 914 break; 915 continue; 916 } 917 #ifdef KDTRACE_HOOKS 918 lda.spin_cnt++; 919 #endif 920 921 #ifdef ADAPTIVE_RWLOCKS 922 /* 923 * If the lock is write locked and the owner is 924 * running on another CPU, spin until the owner stops 925 * running or the state of the lock changes. 926 */ 927 owner = lv_rw_wowner(v); 928 if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 929 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 930 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 931 __func__, rw, owner); 932 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 933 "spinning", "lockname:\"%s\"", 934 rw->lock_object.lo_name); 935 do { 936 lock_delay(&lda); 937 v = RW_READ_VALUE(rw); 938 owner = lv_rw_wowner(v); 939 } while (owner != NULL && TD_IS_RUNNING(owner)); 940 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 941 "running"); 942 continue; 943 } 944 if ((v & RW_LOCK_READ) && RW_READERS(v) && 945 spintries < rowner_retries) { 946 if (!(v & RW_LOCK_WRITE_SPINNER)) { 947 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 948 v | RW_LOCK_WRITE_SPINNER)) { 949 continue; 950 } 951 } 952 spintries++; 953 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 954 "spinning", "lockname:\"%s\"", 955 rw->lock_object.lo_name); 956 for (i = 0; i < rowner_loops; i += n) { 957 n = RW_READERS(v); 958 lock_delay_spin(n); 959 v = RW_READ_VALUE(rw); 960 if ((v & RW_LOCK_WRITE_SPINNER) == 0) 961 break; 962 } 963 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 964 "running"); 965 #ifdef KDTRACE_HOOKS 966 lda.spin_cnt += rowner_loops - i; 967 #endif 968 if (i != rowner_loops) 969 continue; 970 } 971 #endif 972 ts = turnstile_trywait(&rw->lock_object); 973 v = RW_READ_VALUE(rw); 974 975 #ifdef ADAPTIVE_RWLOCKS 976 /* 977 * The current lock owner might have started executing 978 * on another CPU (or the lock could have changed 979 * owners) while we were waiting on the turnstile 980 * chain lock. If so, drop the turnstile lock and try 981 * again. 982 */ 983 if (!(v & RW_LOCK_READ)) { 984 owner = (struct thread *)RW_OWNER(v); 985 if (TD_IS_RUNNING(owner)) { 986 turnstile_cancel(ts); 987 continue; 988 } 989 } 990 #endif 991 /* 992 * Check for the waiters flags about this rwlock. 993 * If the lock was released, without maintain any pending 994 * waiters queue, simply try to acquire it. 995 * If a pending waiters queue is present, claim the lock 996 * ownership and maintain the pending queue. 997 */ 998 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 999 if ((v & ~x) == RW_UNLOCKED) { 1000 x &= ~RW_LOCK_WRITE_SPINNER; 1001 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { 1002 if (x) 1003 turnstile_claim(ts); 1004 else 1005 turnstile_cancel(ts); 1006 break; 1007 } 1008 turnstile_cancel(ts); 1009 v = RW_READ_VALUE(rw); 1010 continue; 1011 } 1012 /* 1013 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 1014 * set it. If we fail to set it, then loop back and try 1015 * again. 1016 */ 1017 if (!(v & RW_LOCK_WRITE_WAITERS)) { 1018 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 1019 v | RW_LOCK_WRITE_WAITERS)) { 1020 turnstile_cancel(ts); 1021 v = RW_READ_VALUE(rw); 1022 continue; 1023 } 1024 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1025 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 1026 __func__, rw); 1027 } 1028 /* 1029 * We were unable to acquire the lock and the write waiters 1030 * flag is set, so we must block on the turnstile. 1031 */ 1032 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1033 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 1034 rw); 1035 #ifdef KDTRACE_HOOKS 1036 sleep_time -= lockstat_nsecs(&rw->lock_object); 1037 #endif 1038 turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); 1039 #ifdef KDTRACE_HOOKS 1040 sleep_time += lockstat_nsecs(&rw->lock_object); 1041 sleep_cnt++; 1042 #endif 1043 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1044 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 1045 __func__, rw); 1046 #ifdef ADAPTIVE_RWLOCKS 1047 spintries = 0; 1048 #endif 1049 v = RW_READ_VALUE(rw); 1050 } 1051 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 1052 if (__predict_true(!doing_lockprof)) 1053 return; 1054 #endif 1055 #ifdef KDTRACE_HOOKS 1056 all_time += lockstat_nsecs(&rw->lock_object); 1057 if (sleep_time) 1058 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 1059 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1060 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1061 1062 /* Record only the loops spinning and not sleeping. */ 1063 if (lda.spin_cnt > sleep_cnt) 1064 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 1065 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1066 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1067 #endif 1068 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 1069 waittime, file, line, LOCKSTAT_WRITER); 1070 } 1071 1072 /* 1073 * This function is called if lockstat is active or the first try at releasing 1074 * a write lock failed. The latter means that the lock is recursed or one of 1075 * the 2 waiter bits must be set indicating that at least one thread is waiting 1076 * on this lock. 1077 */ 1078 void 1079 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, 1080 int line) 1081 { 1082 struct rwlock *rw; 1083 struct turnstile *ts; 1084 uintptr_t v, setv; 1085 int queue; 1086 1087 if (SCHEDULER_STOPPED()) 1088 return; 1089 1090 rw = rwlock2rw(c); 1091 v = RW_READ_VALUE(rw); 1092 if (v & RW_LOCK_WRITER_RECURSED) { 1093 if (--(rw->rw_recurse) == 0) 1094 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 1095 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1096 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 1097 return; 1098 } 1099 1100 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); 1101 if (v == tid && _rw_write_unlock(rw, tid)) 1102 return; 1103 1104 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 1105 ("%s: neither of the waiter flags are set", __func__)); 1106 1107 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1108 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 1109 1110 turnstile_chain_lock(&rw->lock_object); 1111 1112 /* 1113 * Use the same algo as sx locks for now. Prefer waking up shared 1114 * waiters if we have any over writers. This is probably not ideal. 1115 * 1116 * 'v' is the value we are going to write back to rw_lock. If we 1117 * have waiters on both queues, we need to preserve the state of 1118 * the waiter flag for the queue we don't wake up. For now this is 1119 * hardcoded for the algorithm mentioned above. 1120 * 1121 * In the case of both readers and writers waiting we wakeup the 1122 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 1123 * new writer comes in before a reader it will claim the lock up 1124 * above. There is probably a potential priority inversion in 1125 * there that could be worked around either by waking both queues 1126 * of waiters or doing some complicated lock handoff gymnastics. 1127 */ 1128 setv = RW_UNLOCKED; 1129 v = RW_READ_VALUE(rw); 1130 queue = TS_SHARED_QUEUE; 1131 if (v & RW_LOCK_WRITE_WAITERS) { 1132 queue = TS_EXCLUSIVE_QUEUE; 1133 setv |= (v & RW_LOCK_READ_WAITERS); 1134 } 1135 atomic_store_rel_ptr(&rw->rw_lock, setv); 1136 1137 /* Wake up all waiters for the specific queue. */ 1138 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1139 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 1140 queue == TS_SHARED_QUEUE ? "read" : "write"); 1141 1142 ts = turnstile_lookup(&rw->lock_object); 1143 MPASS(ts != NULL); 1144 turnstile_broadcast(ts, queue); 1145 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 1146 turnstile_chain_unlock(&rw->lock_object); 1147 } 1148 1149 /* 1150 * Attempt to do a non-blocking upgrade from a read lock to a write 1151 * lock. This will only succeed if this thread holds a single read 1152 * lock. Returns true if the upgrade succeeded and false otherwise. 1153 */ 1154 int 1155 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) 1156 { 1157 struct rwlock *rw; 1158 uintptr_t v, x, tid; 1159 struct turnstile *ts; 1160 int success; 1161 1162 if (SCHEDULER_STOPPED()) 1163 return (1); 1164 1165 rw = rwlock2rw(c); 1166 1167 KASSERT(rw->rw_lock != RW_DESTROYED, 1168 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 1169 __rw_assert(c, RA_RLOCKED, file, line); 1170 1171 /* 1172 * Attempt to switch from one reader to a writer. If there 1173 * are any write waiters, then we will have to lock the 1174 * turnstile first to prevent races with another writer 1175 * calling turnstile_wait() before we have claimed this 1176 * turnstile. So, do the simple case of no waiters first. 1177 */ 1178 tid = (uintptr_t)curthread; 1179 success = 0; 1180 for (;;) { 1181 v = rw->rw_lock; 1182 if (RW_READERS(v) > 1) 1183 break; 1184 if (!(v & RW_LOCK_WAITERS)) { 1185 success = atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid); 1186 if (!success) 1187 continue; 1188 break; 1189 } 1190 1191 /* 1192 * Ok, we think we have waiters, so lock the turnstile. 1193 */ 1194 ts = turnstile_trywait(&rw->lock_object); 1195 v = rw->rw_lock; 1196 if (RW_READERS(v) > 1) { 1197 turnstile_cancel(ts); 1198 break; 1199 } 1200 /* 1201 * Try to switch from one reader to a writer again. This time 1202 * we honor the current state of the waiters flags. 1203 * If we obtain the lock with the flags set, then claim 1204 * ownership of the turnstile. 1205 */ 1206 x = rw->rw_lock & RW_LOCK_WAITERS; 1207 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); 1208 if (success) { 1209 if (x) 1210 turnstile_claim(ts); 1211 else 1212 turnstile_cancel(ts); 1213 break; 1214 } 1215 turnstile_cancel(ts); 1216 } 1217 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 1218 if (success) { 1219 curthread->td_rw_rlocks--; 1220 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 1221 file, line); 1222 LOCKSTAT_RECORD0(rw__upgrade, rw); 1223 } 1224 return (success); 1225 } 1226 1227 /* 1228 * Downgrade a write lock into a single read lock. 1229 */ 1230 void 1231 __rw_downgrade(volatile uintptr_t *c, const char *file, int line) 1232 { 1233 struct rwlock *rw; 1234 struct turnstile *ts; 1235 uintptr_t tid, v; 1236 int rwait, wwait; 1237 1238 if (SCHEDULER_STOPPED()) 1239 return; 1240 1241 rw = rwlock2rw(c); 1242 1243 KASSERT(rw->rw_lock != RW_DESTROYED, 1244 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 1245 __rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line); 1246 #ifndef INVARIANTS 1247 if (rw_recursed(rw)) 1248 panic("downgrade of a recursed lock"); 1249 #endif 1250 1251 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1252 1253 /* 1254 * Convert from a writer to a single reader. First we handle 1255 * the easy case with no waiters. If there are any waiters, we 1256 * lock the turnstile and "disown" the lock. 1257 */ 1258 tid = (uintptr_t)curthread; 1259 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1260 goto out; 1261 1262 /* 1263 * Ok, we think we have waiters, so lock the turnstile so we can 1264 * read the waiter flags without any races. 1265 */ 1266 turnstile_chain_lock(&rw->lock_object); 1267 v = rw->rw_lock & RW_LOCK_WAITERS; 1268 rwait = v & RW_LOCK_READ_WAITERS; 1269 wwait = v & RW_LOCK_WRITE_WAITERS; 1270 MPASS(rwait | wwait); 1271 1272 /* 1273 * Downgrade from a write lock while preserving waiters flag 1274 * and give up ownership of the turnstile. 1275 */ 1276 ts = turnstile_lookup(&rw->lock_object); 1277 MPASS(ts != NULL); 1278 if (!wwait) 1279 v &= ~RW_LOCK_READ_WAITERS; 1280 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1281 /* 1282 * Wake other readers if there are no writers pending. Otherwise they 1283 * won't be able to acquire the lock anyway. 1284 */ 1285 if (rwait && !wwait) { 1286 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1287 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 1288 } else 1289 turnstile_disown(ts); 1290 turnstile_chain_unlock(&rw->lock_object); 1291 out: 1292 curthread->td_rw_rlocks++; 1293 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1294 LOCKSTAT_RECORD0(rw__downgrade, rw); 1295 } 1296 1297 #ifdef INVARIANT_SUPPORT 1298 #ifndef INVARIANTS 1299 #undef __rw_assert 1300 #endif 1301 1302 /* 1303 * In the non-WITNESS case, rw_assert() can only detect that at least 1304 * *some* thread owns an rlock, but it cannot guarantee that *this* 1305 * thread owns an rlock. 1306 */ 1307 void 1308 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) 1309 { 1310 const struct rwlock *rw; 1311 1312 if (panicstr != NULL) 1313 return; 1314 1315 rw = rwlock2rw(c); 1316 1317 switch (what) { 1318 case RA_LOCKED: 1319 case RA_LOCKED | RA_RECURSED: 1320 case RA_LOCKED | RA_NOTRECURSED: 1321 case RA_RLOCKED: 1322 case RA_RLOCKED | RA_RECURSED: 1323 case RA_RLOCKED | RA_NOTRECURSED: 1324 #ifdef WITNESS 1325 witness_assert(&rw->lock_object, what, file, line); 1326 #else 1327 /* 1328 * If some other thread has a write lock or we have one 1329 * and are asserting a read lock, fail. Also, if no one 1330 * has a lock at all, fail. 1331 */ 1332 if (rw->rw_lock == RW_UNLOCKED || 1333 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || 1334 rw_wowner(rw) != curthread))) 1335 panic("Lock %s not %slocked @ %s:%d\n", 1336 rw->lock_object.lo_name, (what & RA_RLOCKED) ? 1337 "read " : "", file, line); 1338 1339 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { 1340 if (rw_recursed(rw)) { 1341 if (what & RA_NOTRECURSED) 1342 panic("Lock %s recursed @ %s:%d\n", 1343 rw->lock_object.lo_name, file, 1344 line); 1345 } else if (what & RA_RECURSED) 1346 panic("Lock %s not recursed @ %s:%d\n", 1347 rw->lock_object.lo_name, file, line); 1348 } 1349 #endif 1350 break; 1351 case RA_WLOCKED: 1352 case RA_WLOCKED | RA_RECURSED: 1353 case RA_WLOCKED | RA_NOTRECURSED: 1354 if (rw_wowner(rw) != curthread) 1355 panic("Lock %s not exclusively locked @ %s:%d\n", 1356 rw->lock_object.lo_name, file, line); 1357 if (rw_recursed(rw)) { 1358 if (what & RA_NOTRECURSED) 1359 panic("Lock %s recursed @ %s:%d\n", 1360 rw->lock_object.lo_name, file, line); 1361 } else if (what & RA_RECURSED) 1362 panic("Lock %s not recursed @ %s:%d\n", 1363 rw->lock_object.lo_name, file, line); 1364 break; 1365 case RA_UNLOCKED: 1366 #ifdef WITNESS 1367 witness_assert(&rw->lock_object, what, file, line); 1368 #else 1369 /* 1370 * If we hold a write lock fail. We can't reliably check 1371 * to see if we hold a read lock or not. 1372 */ 1373 if (rw_wowner(rw) == curthread) 1374 panic("Lock %s exclusively locked @ %s:%d\n", 1375 rw->lock_object.lo_name, file, line); 1376 #endif 1377 break; 1378 default: 1379 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1380 line); 1381 } 1382 } 1383 #endif /* INVARIANT_SUPPORT */ 1384 1385 #ifdef DDB 1386 void 1387 db_show_rwlock(const struct lock_object *lock) 1388 { 1389 const struct rwlock *rw; 1390 struct thread *td; 1391 1392 rw = (const struct rwlock *)lock; 1393 1394 db_printf(" state: "); 1395 if (rw->rw_lock == RW_UNLOCKED) 1396 db_printf("UNLOCKED\n"); 1397 else if (rw->rw_lock == RW_DESTROYED) { 1398 db_printf("DESTROYED\n"); 1399 return; 1400 } else if (rw->rw_lock & RW_LOCK_READ) 1401 db_printf("RLOCK: %ju locks\n", 1402 (uintmax_t)(RW_READERS(rw->rw_lock))); 1403 else { 1404 td = rw_wowner(rw); 1405 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1406 td->td_tid, td->td_proc->p_pid, td->td_name); 1407 if (rw_recursed(rw)) 1408 db_printf(" recursed: %u\n", rw->rw_recurse); 1409 } 1410 db_printf(" waiters: "); 1411 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1412 case RW_LOCK_READ_WAITERS: 1413 db_printf("readers\n"); 1414 break; 1415 case RW_LOCK_WRITE_WAITERS: 1416 db_printf("writers\n"); 1417 break; 1418 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1419 db_printf("readers and writers\n"); 1420 break; 1421 default: 1422 db_printf("none\n"); 1423 break; 1424 } 1425 } 1426 1427 #endif 1428