1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Machine independent bits of reader/writer lock implementation. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 #include "opt_hwpmc_hooks.h" 37 #include "opt_no_adaptive_rwlocks.h" 38 39 #include <sys/param.h> 40 #include <sys/kdb.h> 41 #include <sys/ktr.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/rwlock.h> 47 #include <sys/sched.h> 48 #include <sys/smp.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 #include <sys/turnstile.h> 52 53 #include <machine/cpu.h> 54 55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 56 #define ADAPTIVE_RWLOCKS 57 #endif 58 59 #ifdef HWPMC_HOOKS 60 #include <sys/pmckern.h> 61 PMC_SOFT_DECLARE( , , lock, failed); 62 #endif 63 64 /* 65 * Return the rwlock address when the lock cookie address is provided. 66 * This functionality assumes that struct rwlock* have a member named rw_lock. 67 */ 68 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) 69 70 #ifdef DDB 71 #include <ddb/ddb.h> 72 73 static void db_show_rwlock(const struct lock_object *lock); 74 #endif 75 static void assert_rw(const struct lock_object *lock, int what); 76 static void lock_rw(struct lock_object *lock, uintptr_t how); 77 #ifdef KDTRACE_HOOKS 78 static int owner_rw(const struct lock_object *lock, struct thread **owner); 79 #endif 80 static uintptr_t unlock_rw(struct lock_object *lock); 81 82 struct lock_class lock_class_rw = { 83 .lc_name = "rw", 84 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 85 .lc_assert = assert_rw, 86 #ifdef DDB 87 .lc_ddb_show = db_show_rwlock, 88 #endif 89 .lc_lock = lock_rw, 90 .lc_unlock = unlock_rw, 91 #ifdef KDTRACE_HOOKS 92 .lc_owner = owner_rw, 93 #endif 94 }; 95 96 #ifdef ADAPTIVE_RWLOCKS 97 #ifdef RWLOCK_CUSTOM_BACKOFF 98 static u_short __read_frequently rowner_retries; 99 static u_short __read_frequently rowner_loops; 100 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, 101 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 102 "rwlock debugging"); 103 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 104 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 105 106 static struct lock_delay_config __read_frequently rw_delay; 107 108 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 109 0, ""); 110 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 111 0, ""); 112 113 static void 114 rw_lock_delay_init(void *arg __unused) 115 { 116 117 lock_delay_default_init(&rw_delay); 118 rowner_retries = 10; 119 rowner_loops = max(10000, rw_delay.max); 120 } 121 LOCK_DELAY_SYSINIT(rw_lock_delay_init); 122 #else 123 #define rw_delay locks_delay 124 #define rowner_retries locks_delay_retries 125 #define rowner_loops locks_delay_loops 126 #endif 127 #endif 128 129 /* 130 * Return a pointer to the owning thread if the lock is write-locked or 131 * NULL if the lock is unlocked or read-locked. 132 */ 133 134 #define lv_rw_wowner(v) \ 135 ((v) & RW_LOCK_READ ? NULL : \ 136 (struct thread *)RW_OWNER((v))) 137 138 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) 139 140 /* 141 * Returns if a write owner is recursed. Write ownership is not assured 142 * here and should be previously checked. 143 */ 144 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 145 146 /* 147 * Return true if curthread helds the lock. 148 */ 149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 150 151 /* 152 * Return a pointer to the owning thread for this lock who should receive 153 * any priority lent by threads that block on this lock. Currently this 154 * is identical to rw_wowner(). 155 */ 156 #define rw_owner(rw) rw_wowner(rw) 157 158 #ifndef INVARIANTS 159 #define __rw_assert(c, what, file, line) 160 #endif 161 162 void 163 assert_rw(const struct lock_object *lock, int what) 164 { 165 166 rw_assert((const struct rwlock *)lock, what); 167 } 168 169 void 170 lock_rw(struct lock_object *lock, uintptr_t how) 171 { 172 struct rwlock *rw; 173 174 rw = (struct rwlock *)lock; 175 if (how) 176 rw_rlock(rw); 177 else 178 rw_wlock(rw); 179 } 180 181 uintptr_t 182 unlock_rw(struct lock_object *lock) 183 { 184 struct rwlock *rw; 185 186 rw = (struct rwlock *)lock; 187 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 188 if (rw->rw_lock & RW_LOCK_READ) { 189 rw_runlock(rw); 190 return (1); 191 } else { 192 rw_wunlock(rw); 193 return (0); 194 } 195 } 196 197 #ifdef KDTRACE_HOOKS 198 int 199 owner_rw(const struct lock_object *lock, struct thread **owner) 200 { 201 const struct rwlock *rw = (const struct rwlock *)lock; 202 uintptr_t x = rw->rw_lock; 203 204 *owner = rw_wowner(rw); 205 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 206 (*owner != NULL)); 207 } 208 #endif 209 210 void 211 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) 212 { 213 struct rwlock *rw; 214 int flags; 215 216 rw = rwlock2rw(c); 217 218 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 219 RW_RECURSE | RW_NEW)) == 0); 220 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 221 ("%s: rw_lock not aligned for %s: %p", __func__, name, 222 &rw->rw_lock)); 223 224 flags = LO_UPGRADABLE; 225 if (opts & RW_DUPOK) 226 flags |= LO_DUPOK; 227 if (opts & RW_NOPROFILE) 228 flags |= LO_NOPROFILE; 229 if (!(opts & RW_NOWITNESS)) 230 flags |= LO_WITNESS; 231 if (opts & RW_RECURSE) 232 flags |= LO_RECURSABLE; 233 if (opts & RW_QUIET) 234 flags |= LO_QUIET; 235 if (opts & RW_NEW) 236 flags |= LO_NEW; 237 238 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 239 rw->rw_lock = RW_UNLOCKED; 240 rw->rw_recurse = 0; 241 } 242 243 void 244 _rw_destroy(volatile uintptr_t *c) 245 { 246 struct rwlock *rw; 247 248 rw = rwlock2rw(c); 249 250 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 251 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 252 rw->rw_lock = RW_DESTROYED; 253 lock_destroy(&rw->lock_object); 254 } 255 256 void 257 rw_sysinit(void *arg) 258 { 259 struct rw_args *args; 260 261 args = arg; 262 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, 263 args->ra_flags); 264 } 265 266 int 267 _rw_wowned(const volatile uintptr_t *c) 268 { 269 270 return (rw_wowner(rwlock2rw(c)) == curthread); 271 } 272 273 void 274 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) 275 { 276 struct rwlock *rw; 277 uintptr_t tid, v; 278 279 rw = rwlock2rw(c); 280 281 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 282 !TD_IS_IDLETHREAD(curthread), 283 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", 284 curthread, rw->lock_object.lo_name, file, line)); 285 KASSERT(rw->rw_lock != RW_DESTROYED, 286 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 287 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 288 line, NULL); 289 tid = (uintptr_t)curthread; 290 v = RW_UNLOCKED; 291 if (!_rw_write_lock_fetch(rw, &v, tid)) 292 _rw_wlock_hard(rw, v, file, line); 293 else 294 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 295 0, 0, file, line, LOCKSTAT_WRITER); 296 297 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 298 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 299 TD_LOCKS_INC(curthread); 300 } 301 302 int 303 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 304 { 305 struct thread *td; 306 uintptr_t tid, v; 307 int rval; 308 bool recursed; 309 310 td = curthread; 311 tid = (uintptr_t)td; 312 if (SCHEDULER_STOPPED_TD(td)) 313 return (1); 314 315 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), 316 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", 317 curthread, rw->lock_object.lo_name, file, line)); 318 KASSERT(rw->rw_lock != RW_DESTROYED, 319 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 320 321 rval = 1; 322 recursed = false; 323 v = RW_UNLOCKED; 324 for (;;) { 325 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 326 break; 327 if (v == RW_UNLOCKED) 328 continue; 329 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { 330 rw->rw_recurse++; 331 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 332 break; 333 } 334 rval = 0; 335 break; 336 } 337 338 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 339 if (rval) { 340 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 341 file, line); 342 if (!recursed) 343 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 344 rw, 0, 0, file, line, LOCKSTAT_WRITER); 345 TD_LOCKS_INC(curthread); 346 } 347 return (rval); 348 } 349 350 int 351 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) 352 { 353 struct rwlock *rw; 354 355 rw = rwlock2rw(c); 356 return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG)); 357 } 358 359 void 360 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) 361 { 362 struct rwlock *rw; 363 364 rw = rwlock2rw(c); 365 366 KASSERT(rw->rw_lock != RW_DESTROYED, 367 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 368 __rw_assert(c, RA_WLOCKED, file, line); 369 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 370 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 371 line); 372 373 #ifdef LOCK_PROFILING 374 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); 375 #else 376 __rw_wunlock(rw, curthread, file, line); 377 #endif 378 379 TD_LOCKS_DEC(curthread); 380 } 381 382 /* 383 * Determines whether a new reader can acquire a lock. Succeeds if the 384 * reader already owns a read lock and the lock is locked for read to 385 * prevent deadlock from reader recursion. Also succeeds if the lock 386 * is unlocked and has no writer waiters or spinners. Failing otherwise 387 * prioritizes writers before readers. 388 */ 389 static bool __always_inline 390 __rw_can_read(struct thread *td, uintptr_t v, bool fp) 391 { 392 393 if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) 394 == RW_LOCK_READ) 395 return (true); 396 if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ)) 397 return (true); 398 return (false); 399 } 400 401 static bool __always_inline 402 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp 403 LOCK_FILE_LINE_ARG_DEF) 404 { 405 406 /* 407 * Handle the easy case. If no other thread has a write 408 * lock, then try to bump up the count of read locks. Note 409 * that we have to preserve the current state of the 410 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 411 * read lock, then rw_lock must have changed, so restart 412 * the loop. Note that this handles the case of a 413 * completely unlocked rwlock since such a lock is encoded 414 * as a read lock with no waiters. 415 */ 416 while (__rw_can_read(td, *vp, fp)) { 417 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, 418 *vp + RW_ONE_READER)) { 419 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 420 CTR4(KTR_LOCK, 421 "%s: %p succeed %p -> %p", __func__, 422 rw, (void *)*vp, 423 (void *)(*vp + RW_ONE_READER)); 424 td->td_rw_rlocks++; 425 return (true); 426 } 427 } 428 return (false); 429 } 430 431 static void __noinline 432 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 433 LOCK_FILE_LINE_ARG_DEF) 434 { 435 struct turnstile *ts; 436 struct thread *owner; 437 #ifdef ADAPTIVE_RWLOCKS 438 int spintries = 0; 439 int i, n; 440 #endif 441 #ifdef LOCK_PROFILING 442 uint64_t waittime = 0; 443 int contested = 0; 444 #endif 445 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 446 struct lock_delay_arg lda; 447 #endif 448 #ifdef KDTRACE_HOOKS 449 u_int sleep_cnt = 0; 450 int64_t sleep_time = 0; 451 int64_t all_time = 0; 452 #endif 453 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 454 uintptr_t state = 0; 455 int doing_lockprof = 0; 456 #endif 457 458 #ifdef KDTRACE_HOOKS 459 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 460 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 461 goto out_lockstat; 462 doing_lockprof = 1; 463 all_time -= lockstat_nsecs(&rw->lock_object); 464 state = v; 465 } 466 #endif 467 #ifdef LOCK_PROFILING 468 doing_lockprof = 1; 469 state = v; 470 #endif 471 472 if (SCHEDULER_STOPPED()) 473 return; 474 475 #if defined(ADAPTIVE_RWLOCKS) 476 lock_delay_arg_init(&lda, &rw_delay); 477 #elif defined(KDTRACE_HOOKS) 478 lock_delay_arg_init_noadapt(&lda); 479 #endif 480 481 #ifdef HWPMC_HOOKS 482 PMC_SOFT_CALL( , , lock, failed); 483 #endif 484 lock_profile_obtain_lock_failed(&rw->lock_object, 485 &contested, &waittime); 486 487 for (;;) { 488 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 489 break; 490 #ifdef KDTRACE_HOOKS 491 lda.spin_cnt++; 492 #endif 493 494 #ifdef ADAPTIVE_RWLOCKS 495 /* 496 * If the owner is running on another CPU, spin until 497 * the owner stops running or the state of the lock 498 * changes. 499 */ 500 if ((v & RW_LOCK_READ) == 0) { 501 owner = (struct thread *)RW_OWNER(v); 502 if (TD_IS_RUNNING(owner)) { 503 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 504 CTR3(KTR_LOCK, 505 "%s: spinning on %p held by %p", 506 __func__, rw, owner); 507 KTR_STATE1(KTR_SCHED, "thread", 508 sched_tdname(curthread), "spinning", 509 "lockname:\"%s\"", rw->lock_object.lo_name); 510 do { 511 lock_delay(&lda); 512 v = RW_READ_VALUE(rw); 513 owner = lv_rw_wowner(v); 514 } while (owner != NULL && TD_IS_RUNNING(owner)); 515 KTR_STATE0(KTR_SCHED, "thread", 516 sched_tdname(curthread), "running"); 517 continue; 518 } 519 } else { 520 if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) { 521 MPASS(!__rw_can_read(td, v, false)); 522 lock_delay_spin(2); 523 v = RW_READ_VALUE(rw); 524 continue; 525 } 526 if (spintries < rowner_retries) { 527 spintries++; 528 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 529 "spinning", "lockname:\"%s\"", 530 rw->lock_object.lo_name); 531 n = RW_READERS(v); 532 for (i = 0; i < rowner_loops; i += n) { 533 lock_delay_spin(n); 534 v = RW_READ_VALUE(rw); 535 if (!(v & RW_LOCK_READ)) 536 break; 537 n = RW_READERS(v); 538 if (n == 0) 539 break; 540 if (__rw_can_read(td, v, false)) 541 break; 542 } 543 #ifdef KDTRACE_HOOKS 544 lda.spin_cnt += rowner_loops - i; 545 #endif 546 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 547 "running"); 548 if (i < rowner_loops) 549 continue; 550 } 551 } 552 #endif 553 554 /* 555 * Okay, now it's the hard case. Some other thread already 556 * has a write lock or there are write waiters present, 557 * acquire the turnstile lock so we can begin the process 558 * of blocking. 559 */ 560 ts = turnstile_trywait(&rw->lock_object); 561 562 /* 563 * The lock might have been released while we spun, so 564 * recheck its state and restart the loop if needed. 565 */ 566 v = RW_READ_VALUE(rw); 567 retry_ts: 568 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) || 569 __rw_can_read(td, v, false)) { 570 turnstile_cancel(ts); 571 continue; 572 } 573 574 owner = lv_rw_wowner(v); 575 576 #ifdef ADAPTIVE_RWLOCKS 577 /* 578 * The current lock owner might have started executing 579 * on another CPU (or the lock could have changed 580 * owners) while we were waiting on the turnstile 581 * chain lock. If so, drop the turnstile lock and try 582 * again. 583 */ 584 if (owner != NULL) { 585 if (TD_IS_RUNNING(owner)) { 586 turnstile_cancel(ts); 587 continue; 588 } 589 } 590 #endif 591 592 /* 593 * The lock is held in write mode or it already has waiters. 594 */ 595 MPASS(!__rw_can_read(td, v, false)); 596 597 /* 598 * If the RW_LOCK_READ_WAITERS flag is already set, then 599 * we can go ahead and block. If it is not set then try 600 * to set it. If we fail to set it drop the turnstile 601 * lock and restart the loop. 602 */ 603 if (!(v & RW_LOCK_READ_WAITERS)) { 604 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 605 v | RW_LOCK_READ_WAITERS)) 606 goto retry_ts; 607 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 608 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 609 __func__, rw); 610 } 611 612 /* 613 * We were unable to acquire the lock and the read waiters 614 * flag is set, so we must block on the turnstile. 615 */ 616 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 617 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 618 rw); 619 #ifdef KDTRACE_HOOKS 620 sleep_time -= lockstat_nsecs(&rw->lock_object); 621 #endif 622 MPASS(owner == rw_owner(rw)); 623 turnstile_wait(ts, owner, TS_SHARED_QUEUE); 624 #ifdef KDTRACE_HOOKS 625 sleep_time += lockstat_nsecs(&rw->lock_object); 626 sleep_cnt++; 627 #endif 628 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 629 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 630 __func__, rw); 631 v = RW_READ_VALUE(rw); 632 } 633 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 634 if (__predict_true(!doing_lockprof)) 635 return; 636 #endif 637 #ifdef KDTRACE_HOOKS 638 all_time += lockstat_nsecs(&rw->lock_object); 639 if (sleep_time) 640 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 641 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 642 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 643 644 /* Record only the loops spinning and not sleeping. */ 645 if (lda.spin_cnt > sleep_cnt) 646 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 647 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 648 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 649 out_lockstat: 650 #endif 651 /* 652 * TODO: acquire "owner of record" here. Here be turnstile dragons 653 * however. turnstiles don't like owners changing between calls to 654 * turnstile_wait() currently. 655 */ 656 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 657 waittime, file, line, LOCKSTAT_READER); 658 } 659 660 void 661 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 662 { 663 struct thread *td; 664 uintptr_t v; 665 666 td = curthread; 667 668 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) || 669 !TD_IS_IDLETHREAD(td), 670 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", 671 td, rw->lock_object.lo_name, file, line)); 672 KASSERT(rw->rw_lock != RW_DESTROYED, 673 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 674 KASSERT(rw_wowner(rw) != td, 675 ("rw_rlock: wlock already held for %s @ %s:%d", 676 rw->lock_object.lo_name, file, line)); 677 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 678 679 v = RW_READ_VALUE(rw); 680 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || 681 !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG))) 682 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 683 else 684 lock_profile_obtain_lock_success(&rw->lock_object, 0, 0, 685 file, line); 686 687 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 688 WITNESS_LOCK(&rw->lock_object, 0, file, line); 689 TD_LOCKS_INC(curthread); 690 } 691 692 void 693 __rw_rlock(volatile uintptr_t *c, const char *file, int line) 694 { 695 struct rwlock *rw; 696 697 rw = rwlock2rw(c); 698 __rw_rlock_int(rw LOCK_FILE_LINE_ARG); 699 } 700 701 int 702 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 703 { 704 uintptr_t x; 705 706 if (SCHEDULER_STOPPED()) 707 return (1); 708 709 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 710 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", 711 curthread, rw->lock_object.lo_name, file, line)); 712 713 x = rw->rw_lock; 714 for (;;) { 715 KASSERT(rw->rw_lock != RW_DESTROYED, 716 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 717 if (!(x & RW_LOCK_READ)) 718 break; 719 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { 720 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 721 line); 722 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 723 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 724 rw, 0, 0, file, line, LOCKSTAT_READER); 725 TD_LOCKS_INC(curthread); 726 curthread->td_rw_rlocks++; 727 return (1); 728 } 729 } 730 731 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 732 return (0); 733 } 734 735 int 736 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) 737 { 738 struct rwlock *rw; 739 740 rw = rwlock2rw(c); 741 return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG)); 742 } 743 744 static bool __always_inline 745 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) 746 { 747 748 for (;;) { 749 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) { 750 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 751 *vp - RW_ONE_READER)) { 752 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 753 CTR4(KTR_LOCK, 754 "%s: %p succeeded %p -> %p", 755 __func__, rw, (void *)*vp, 756 (void *)(*vp - RW_ONE_READER)); 757 td->td_rw_rlocks--; 758 return (true); 759 } 760 continue; 761 } 762 break; 763 } 764 return (false); 765 } 766 767 static void __noinline 768 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 769 LOCK_FILE_LINE_ARG_DEF) 770 { 771 struct turnstile *ts; 772 uintptr_t setv, queue; 773 774 if (SCHEDULER_STOPPED()) 775 return; 776 777 if (__rw_runlock_try(rw, td, &v)) 778 goto out_lockstat; 779 780 /* 781 * Ok, we know we have waiters and we think we are the 782 * last reader, so grab the turnstile lock. 783 */ 784 turnstile_chain_lock(&rw->lock_object); 785 v = RW_READ_VALUE(rw); 786 for (;;) { 787 if (__rw_runlock_try(rw, td, &v)) 788 break; 789 790 MPASS(v & RW_LOCK_WAITERS); 791 792 /* 793 * Try to drop our lock leaving the lock in a unlocked 794 * state. 795 * 796 * If you wanted to do explicit lock handoff you'd have to 797 * do it here. You'd also want to use turnstile_signal() 798 * and you'd have to handle the race where a higher 799 * priority thread blocks on the write lock before the 800 * thread you wakeup actually runs and have the new thread 801 * "steal" the lock. For now it's a lot simpler to just 802 * wakeup all of the waiters. 803 * 804 * As above, if we fail, then another thread might have 805 * acquired a read lock, so drop the turnstile lock and 806 * restart. 807 */ 808 setv = RW_UNLOCKED; 809 queue = TS_SHARED_QUEUE; 810 if (v & RW_LOCK_WRITE_WAITERS) { 811 queue = TS_EXCLUSIVE_QUEUE; 812 setv |= (v & RW_LOCK_READ_WAITERS); 813 } 814 setv |= (v & RW_LOCK_WRITE_SPINNER); 815 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv)) 816 continue; 817 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 818 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 819 __func__, rw); 820 821 /* 822 * Ok. The lock is released and all that's left is to 823 * wake up the waiters. Note that the lock might not be 824 * free anymore, but in that case the writers will just 825 * block again if they run before the new lock holder(s) 826 * release the lock. 827 */ 828 ts = turnstile_lookup(&rw->lock_object); 829 MPASS(ts != NULL); 830 turnstile_broadcast(ts, queue); 831 turnstile_unpend(ts); 832 td->td_rw_rlocks--; 833 break; 834 } 835 turnstile_chain_unlock(&rw->lock_object); 836 out_lockstat: 837 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); 838 } 839 840 void 841 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 842 { 843 struct thread *td; 844 uintptr_t v; 845 846 KASSERT(rw->rw_lock != RW_DESTROYED, 847 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 848 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 849 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 850 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 851 852 td = curthread; 853 v = RW_READ_VALUE(rw); 854 855 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || 856 !__rw_runlock_try(rw, td, &v))) 857 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 858 else 859 lock_profile_release_lock(&rw->lock_object); 860 861 TD_LOCKS_DEC(curthread); 862 } 863 864 void 865 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) 866 { 867 struct rwlock *rw; 868 869 rw = rwlock2rw(c); 870 _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG); 871 } 872 873 #ifdef ADAPTIVE_RWLOCKS 874 static inline void 875 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work) 876 { 877 878 if (v & RW_LOCK_WRITE_SPINNER) 879 return; 880 if (*in_critical) { 881 critical_exit(); 882 *in_critical = false; 883 (*extra_work)--; 884 } 885 } 886 #else 887 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0) 888 #endif 889 890 /* 891 * This function is called when we are unable to obtain a write lock on the 892 * first try. This means that at least one other thread holds either a 893 * read or write lock. 894 */ 895 void 896 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 897 { 898 uintptr_t tid; 899 struct rwlock *rw; 900 struct turnstile *ts; 901 struct thread *owner; 902 #ifdef ADAPTIVE_RWLOCKS 903 int spintries = 0; 904 int i, n; 905 enum { READERS, WRITER } sleep_reason = READERS; 906 bool in_critical = false; 907 #endif 908 uintptr_t setv; 909 #ifdef LOCK_PROFILING 910 uint64_t waittime = 0; 911 int contested = 0; 912 #endif 913 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 914 struct lock_delay_arg lda; 915 #endif 916 #ifdef KDTRACE_HOOKS 917 u_int sleep_cnt = 0; 918 int64_t sleep_time = 0; 919 int64_t all_time = 0; 920 #endif 921 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 922 uintptr_t state = 0; 923 int doing_lockprof = 0; 924 #endif 925 int extra_work = 0; 926 927 tid = (uintptr_t)curthread; 928 rw = rwlock2rw(c); 929 930 #ifdef KDTRACE_HOOKS 931 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 932 while (v == RW_UNLOCKED) { 933 if (_rw_write_lock_fetch(rw, &v, tid)) 934 goto out_lockstat; 935 } 936 extra_work = 1; 937 doing_lockprof = 1; 938 all_time -= lockstat_nsecs(&rw->lock_object); 939 state = v; 940 } 941 #endif 942 #ifdef LOCK_PROFILING 943 extra_work = 1; 944 doing_lockprof = 1; 945 state = v; 946 #endif 947 948 if (SCHEDULER_STOPPED()) 949 return; 950 951 #if defined(ADAPTIVE_RWLOCKS) 952 lock_delay_arg_init(&lda, &rw_delay); 953 #elif defined(KDTRACE_HOOKS) 954 lock_delay_arg_init_noadapt(&lda); 955 #endif 956 if (__predict_false(v == RW_UNLOCKED)) 957 v = RW_READ_VALUE(rw); 958 959 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { 960 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 961 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 962 __func__, rw->lock_object.lo_name, file, line)); 963 rw->rw_recurse++; 964 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 965 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 966 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 967 return; 968 } 969 970 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 971 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 972 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 973 974 #ifdef HWPMC_HOOKS 975 PMC_SOFT_CALL( , , lock, failed); 976 #endif 977 lock_profile_obtain_lock_failed(&rw->lock_object, 978 &contested, &waittime); 979 980 for (;;) { 981 if (v == RW_UNLOCKED) { 982 if (_rw_write_lock_fetch(rw, &v, tid)) 983 break; 984 continue; 985 } 986 #ifdef KDTRACE_HOOKS 987 lda.spin_cnt++; 988 #endif 989 990 #ifdef ADAPTIVE_RWLOCKS 991 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) { 992 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 993 break; 994 continue; 995 } 996 997 /* 998 * If the lock is write locked and the owner is 999 * running on another CPU, spin until the owner stops 1000 * running or the state of the lock changes. 1001 */ 1002 if (!(v & RW_LOCK_READ)) { 1003 rw_drop_critical(v, &in_critical, &extra_work); 1004 sleep_reason = WRITER; 1005 owner = lv_rw_wowner(v); 1006 if (!TD_IS_RUNNING(owner)) 1007 goto ts; 1008 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1009 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 1010 __func__, rw, owner); 1011 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1012 "spinning", "lockname:\"%s\"", 1013 rw->lock_object.lo_name); 1014 do { 1015 lock_delay(&lda); 1016 v = RW_READ_VALUE(rw); 1017 owner = lv_rw_wowner(v); 1018 } while (owner != NULL && TD_IS_RUNNING(owner)); 1019 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1020 "running"); 1021 continue; 1022 } else if (RW_READERS(v) > 0) { 1023 sleep_reason = READERS; 1024 if (spintries == rowner_retries) 1025 goto ts; 1026 if (!(v & RW_LOCK_WRITE_SPINNER)) { 1027 if (!in_critical) { 1028 critical_enter(); 1029 in_critical = true; 1030 extra_work++; 1031 } 1032 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1033 v | RW_LOCK_WRITE_SPINNER)) { 1034 critical_exit(); 1035 in_critical = false; 1036 extra_work--; 1037 continue; 1038 } 1039 } 1040 spintries++; 1041 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1042 "spinning", "lockname:\"%s\"", 1043 rw->lock_object.lo_name); 1044 n = RW_READERS(v); 1045 for (i = 0; i < rowner_loops; i += n) { 1046 lock_delay_spin(n); 1047 v = RW_READ_VALUE(rw); 1048 if (!(v & RW_LOCK_WRITE_SPINNER)) 1049 break; 1050 if (!(v & RW_LOCK_READ)) 1051 break; 1052 n = RW_READERS(v); 1053 if (n == 0) 1054 break; 1055 } 1056 #ifdef KDTRACE_HOOKS 1057 lda.spin_cnt += i; 1058 #endif 1059 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1060 "running"); 1061 if (i < rowner_loops) 1062 continue; 1063 } 1064 ts: 1065 #endif 1066 ts = turnstile_trywait(&rw->lock_object); 1067 v = RW_READ_VALUE(rw); 1068 retry_ts: 1069 owner = lv_rw_wowner(v); 1070 1071 #ifdef ADAPTIVE_RWLOCKS 1072 /* 1073 * The current lock owner might have started executing 1074 * on another CPU (or the lock could have changed 1075 * owners) while we were waiting on the turnstile 1076 * chain lock. If so, drop the turnstile lock and try 1077 * again. 1078 */ 1079 if (owner != NULL) { 1080 if (TD_IS_RUNNING(owner)) { 1081 turnstile_cancel(ts); 1082 rw_drop_critical(v, &in_critical, &extra_work); 1083 continue; 1084 } 1085 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) { 1086 turnstile_cancel(ts); 1087 rw_drop_critical(v, &in_critical, &extra_work); 1088 continue; 1089 } 1090 #endif 1091 /* 1092 * Check for the waiters flags about this rwlock. 1093 * If the lock was released, without maintain any pending 1094 * waiters queue, simply try to acquire it. 1095 * If a pending waiters queue is present, claim the lock 1096 * ownership and maintain the pending queue. 1097 */ 1098 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 1099 if ((v & ~setv) == RW_UNLOCKED) { 1100 setv &= ~RW_LOCK_WRITE_SPINNER; 1101 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) { 1102 if (setv) 1103 turnstile_claim(ts); 1104 else 1105 turnstile_cancel(ts); 1106 break; 1107 } 1108 goto retry_ts; 1109 } 1110 1111 #ifdef ADAPTIVE_RWLOCKS 1112 if (in_critical) { 1113 if ((v & RW_LOCK_WRITE_SPINNER) || 1114 !((v & RW_LOCK_WRITE_WAITERS))) { 1115 setv = v & ~RW_LOCK_WRITE_SPINNER; 1116 setv |= RW_LOCK_WRITE_WAITERS; 1117 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv)) 1118 goto retry_ts; 1119 } 1120 critical_exit(); 1121 in_critical = false; 1122 extra_work--; 1123 } else { 1124 #endif 1125 /* 1126 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 1127 * set it. If we fail to set it, then loop back and try 1128 * again. 1129 */ 1130 if (!(v & RW_LOCK_WRITE_WAITERS)) { 1131 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1132 v | RW_LOCK_WRITE_WAITERS)) 1133 goto retry_ts; 1134 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1135 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 1136 __func__, rw); 1137 } 1138 #ifdef ADAPTIVE_RWLOCKS 1139 } 1140 #endif 1141 /* 1142 * We were unable to acquire the lock and the write waiters 1143 * flag is set, so we must block on the turnstile. 1144 */ 1145 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1146 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 1147 rw); 1148 #ifdef KDTRACE_HOOKS 1149 sleep_time -= lockstat_nsecs(&rw->lock_object); 1150 #endif 1151 MPASS(owner == rw_owner(rw)); 1152 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); 1153 #ifdef KDTRACE_HOOKS 1154 sleep_time += lockstat_nsecs(&rw->lock_object); 1155 sleep_cnt++; 1156 #endif 1157 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1158 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 1159 __func__, rw); 1160 #ifdef ADAPTIVE_RWLOCKS 1161 spintries = 0; 1162 #endif 1163 v = RW_READ_VALUE(rw); 1164 } 1165 if (__predict_true(!extra_work)) 1166 return; 1167 #ifdef ADAPTIVE_RWLOCKS 1168 if (in_critical) 1169 critical_exit(); 1170 #endif 1171 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 1172 if (__predict_true(!doing_lockprof)) 1173 return; 1174 #endif 1175 #ifdef KDTRACE_HOOKS 1176 all_time += lockstat_nsecs(&rw->lock_object); 1177 if (sleep_time) 1178 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 1179 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1180 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1181 1182 /* Record only the loops spinning and not sleeping. */ 1183 if (lda.spin_cnt > sleep_cnt) 1184 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 1185 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1186 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1187 out_lockstat: 1188 #endif 1189 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 1190 waittime, file, line, LOCKSTAT_WRITER); 1191 } 1192 1193 /* 1194 * This function is called if lockstat is active or the first try at releasing 1195 * a write lock failed. The latter means that the lock is recursed or one of 1196 * the 2 waiter bits must be set indicating that at least one thread is waiting 1197 * on this lock. 1198 */ 1199 void 1200 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 1201 { 1202 struct rwlock *rw; 1203 struct turnstile *ts; 1204 uintptr_t tid, setv; 1205 int queue; 1206 1207 tid = (uintptr_t)curthread; 1208 if (SCHEDULER_STOPPED()) 1209 return; 1210 1211 rw = rwlock2rw(c); 1212 if (__predict_false(v == tid)) 1213 v = RW_READ_VALUE(rw); 1214 1215 if (v & RW_LOCK_WRITER_RECURSED) { 1216 if (--(rw->rw_recurse) == 0) 1217 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 1218 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1219 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 1220 return; 1221 } 1222 1223 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); 1224 if (v == tid && _rw_write_unlock(rw, tid)) 1225 return; 1226 1227 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 1228 ("%s: neither of the waiter flags are set", __func__)); 1229 1230 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1231 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 1232 1233 turnstile_chain_lock(&rw->lock_object); 1234 1235 /* 1236 * Use the same algo as sx locks for now. Prefer waking up shared 1237 * waiters if we have any over writers. This is probably not ideal. 1238 * 1239 * 'v' is the value we are going to write back to rw_lock. If we 1240 * have waiters on both queues, we need to preserve the state of 1241 * the waiter flag for the queue we don't wake up. For now this is 1242 * hardcoded for the algorithm mentioned above. 1243 * 1244 * In the case of both readers and writers waiting we wakeup the 1245 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 1246 * new writer comes in before a reader it will claim the lock up 1247 * above. There is probably a potential priority inversion in 1248 * there that could be worked around either by waking both queues 1249 * of waiters or doing some complicated lock handoff gymnastics. 1250 */ 1251 setv = RW_UNLOCKED; 1252 v = RW_READ_VALUE(rw); 1253 queue = TS_SHARED_QUEUE; 1254 if (v & RW_LOCK_WRITE_WAITERS) { 1255 queue = TS_EXCLUSIVE_QUEUE; 1256 setv |= (v & RW_LOCK_READ_WAITERS); 1257 } 1258 atomic_store_rel_ptr(&rw->rw_lock, setv); 1259 1260 /* Wake up all waiters for the specific queue. */ 1261 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1262 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 1263 queue == TS_SHARED_QUEUE ? "read" : "write"); 1264 1265 ts = turnstile_lookup(&rw->lock_object); 1266 MPASS(ts != NULL); 1267 turnstile_broadcast(ts, queue); 1268 turnstile_unpend(ts); 1269 turnstile_chain_unlock(&rw->lock_object); 1270 } 1271 1272 /* 1273 * Attempt to do a non-blocking upgrade from a read lock to a write 1274 * lock. This will only succeed if this thread holds a single read 1275 * lock. Returns true if the upgrade succeeded and false otherwise. 1276 */ 1277 int 1278 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1279 { 1280 uintptr_t v, setv, tid; 1281 struct turnstile *ts; 1282 int success; 1283 1284 if (SCHEDULER_STOPPED()) 1285 return (1); 1286 1287 KASSERT(rw->rw_lock != RW_DESTROYED, 1288 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 1289 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 1290 1291 /* 1292 * Attempt to switch from one reader to a writer. If there 1293 * are any write waiters, then we will have to lock the 1294 * turnstile first to prevent races with another writer 1295 * calling turnstile_wait() before we have claimed this 1296 * turnstile. So, do the simple case of no waiters first. 1297 */ 1298 tid = (uintptr_t)curthread; 1299 success = 0; 1300 v = RW_READ_VALUE(rw); 1301 for (;;) { 1302 if (RW_READERS(v) > 1) 1303 break; 1304 if (!(v & RW_LOCK_WAITERS)) { 1305 success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid); 1306 if (!success) 1307 continue; 1308 break; 1309 } 1310 1311 /* 1312 * Ok, we think we have waiters, so lock the turnstile. 1313 */ 1314 ts = turnstile_trywait(&rw->lock_object); 1315 v = RW_READ_VALUE(rw); 1316 retry_ts: 1317 if (RW_READERS(v) > 1) { 1318 turnstile_cancel(ts); 1319 break; 1320 } 1321 /* 1322 * Try to switch from one reader to a writer again. This time 1323 * we honor the current state of the waiters flags. 1324 * If we obtain the lock with the flags set, then claim 1325 * ownership of the turnstile. 1326 */ 1327 setv = tid | (v & RW_LOCK_WAITERS); 1328 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv); 1329 if (success) { 1330 if (v & RW_LOCK_WAITERS) 1331 turnstile_claim(ts); 1332 else 1333 turnstile_cancel(ts); 1334 break; 1335 } 1336 goto retry_ts; 1337 } 1338 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 1339 if (success) { 1340 curthread->td_rw_rlocks--; 1341 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 1342 file, line); 1343 LOCKSTAT_RECORD0(rw__upgrade, rw); 1344 } 1345 return (success); 1346 } 1347 1348 int 1349 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) 1350 { 1351 struct rwlock *rw; 1352 1353 rw = rwlock2rw(c); 1354 return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG)); 1355 } 1356 1357 /* 1358 * Downgrade a write lock into a single read lock. 1359 */ 1360 void 1361 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1362 { 1363 struct turnstile *ts; 1364 uintptr_t tid, v; 1365 int rwait, wwait; 1366 1367 if (SCHEDULER_STOPPED()) 1368 return; 1369 1370 KASSERT(rw->rw_lock != RW_DESTROYED, 1371 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 1372 __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line); 1373 #ifndef INVARIANTS 1374 if (rw_recursed(rw)) 1375 panic("downgrade of a recursed lock"); 1376 #endif 1377 1378 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1379 1380 /* 1381 * Convert from a writer to a single reader. First we handle 1382 * the easy case with no waiters. If there are any waiters, we 1383 * lock the turnstile and "disown" the lock. 1384 */ 1385 tid = (uintptr_t)curthread; 1386 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1387 goto out; 1388 1389 /* 1390 * Ok, we think we have waiters, so lock the turnstile so we can 1391 * read the waiter flags without any races. 1392 */ 1393 turnstile_chain_lock(&rw->lock_object); 1394 v = rw->rw_lock & RW_LOCK_WAITERS; 1395 rwait = v & RW_LOCK_READ_WAITERS; 1396 wwait = v & RW_LOCK_WRITE_WAITERS; 1397 MPASS(rwait | wwait); 1398 1399 /* 1400 * Downgrade from a write lock while preserving waiters flag 1401 * and give up ownership of the turnstile. 1402 */ 1403 ts = turnstile_lookup(&rw->lock_object); 1404 MPASS(ts != NULL); 1405 if (!wwait) 1406 v &= ~RW_LOCK_READ_WAITERS; 1407 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1408 /* 1409 * Wake other readers if there are no writers pending. Otherwise they 1410 * won't be able to acquire the lock anyway. 1411 */ 1412 if (rwait && !wwait) { 1413 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1414 turnstile_unpend(ts); 1415 } else 1416 turnstile_disown(ts); 1417 turnstile_chain_unlock(&rw->lock_object); 1418 out: 1419 curthread->td_rw_rlocks++; 1420 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1421 LOCKSTAT_RECORD0(rw__downgrade, rw); 1422 } 1423 1424 void 1425 __rw_downgrade(volatile uintptr_t *c, const char *file, int line) 1426 { 1427 struct rwlock *rw; 1428 1429 rw = rwlock2rw(c); 1430 __rw_downgrade_int(rw LOCK_FILE_LINE_ARG); 1431 } 1432 1433 #ifdef INVARIANT_SUPPORT 1434 #ifndef INVARIANTS 1435 #undef __rw_assert 1436 #endif 1437 1438 /* 1439 * In the non-WITNESS case, rw_assert() can only detect that at least 1440 * *some* thread owns an rlock, but it cannot guarantee that *this* 1441 * thread owns an rlock. 1442 */ 1443 void 1444 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) 1445 { 1446 const struct rwlock *rw; 1447 1448 if (SCHEDULER_STOPPED()) 1449 return; 1450 1451 rw = rwlock2rw(c); 1452 1453 switch (what) { 1454 case RA_LOCKED: 1455 case RA_LOCKED | RA_RECURSED: 1456 case RA_LOCKED | RA_NOTRECURSED: 1457 case RA_RLOCKED: 1458 case RA_RLOCKED | RA_RECURSED: 1459 case RA_RLOCKED | RA_NOTRECURSED: 1460 #ifdef WITNESS 1461 witness_assert(&rw->lock_object, what, file, line); 1462 #else 1463 /* 1464 * If some other thread has a write lock or we have one 1465 * and are asserting a read lock, fail. Also, if no one 1466 * has a lock at all, fail. 1467 */ 1468 if (rw->rw_lock == RW_UNLOCKED || 1469 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || 1470 rw_wowner(rw) != curthread))) 1471 panic("Lock %s not %slocked @ %s:%d\n", 1472 rw->lock_object.lo_name, (what & RA_RLOCKED) ? 1473 "read " : "", file, line); 1474 1475 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { 1476 if (rw_recursed(rw)) { 1477 if (what & RA_NOTRECURSED) 1478 panic("Lock %s recursed @ %s:%d\n", 1479 rw->lock_object.lo_name, file, 1480 line); 1481 } else if (what & RA_RECURSED) 1482 panic("Lock %s not recursed @ %s:%d\n", 1483 rw->lock_object.lo_name, file, line); 1484 } 1485 #endif 1486 break; 1487 case RA_WLOCKED: 1488 case RA_WLOCKED | RA_RECURSED: 1489 case RA_WLOCKED | RA_NOTRECURSED: 1490 if (rw_wowner(rw) != curthread) 1491 panic("Lock %s not exclusively locked @ %s:%d\n", 1492 rw->lock_object.lo_name, file, line); 1493 if (rw_recursed(rw)) { 1494 if (what & RA_NOTRECURSED) 1495 panic("Lock %s recursed @ %s:%d\n", 1496 rw->lock_object.lo_name, file, line); 1497 } else if (what & RA_RECURSED) 1498 panic("Lock %s not recursed @ %s:%d\n", 1499 rw->lock_object.lo_name, file, line); 1500 break; 1501 case RA_UNLOCKED: 1502 #ifdef WITNESS 1503 witness_assert(&rw->lock_object, what, file, line); 1504 #else 1505 /* 1506 * If we hold a write lock fail. We can't reliably check 1507 * to see if we hold a read lock or not. 1508 */ 1509 if (rw_wowner(rw) == curthread) 1510 panic("Lock %s exclusively locked @ %s:%d\n", 1511 rw->lock_object.lo_name, file, line); 1512 #endif 1513 break; 1514 default: 1515 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1516 line); 1517 } 1518 } 1519 #endif /* INVARIANT_SUPPORT */ 1520 1521 #ifdef DDB 1522 void 1523 db_show_rwlock(const struct lock_object *lock) 1524 { 1525 const struct rwlock *rw; 1526 struct thread *td; 1527 1528 rw = (const struct rwlock *)lock; 1529 1530 db_printf(" state: "); 1531 if (rw->rw_lock == RW_UNLOCKED) 1532 db_printf("UNLOCKED\n"); 1533 else if (rw->rw_lock == RW_DESTROYED) { 1534 db_printf("DESTROYED\n"); 1535 return; 1536 } else if (rw->rw_lock & RW_LOCK_READ) 1537 db_printf("RLOCK: %ju locks\n", 1538 (uintmax_t)(RW_READERS(rw->rw_lock))); 1539 else { 1540 td = rw_wowner(rw); 1541 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1542 td->td_tid, td->td_proc->p_pid, td->td_name); 1543 if (rw_recursed(rw)) 1544 db_printf(" recursed: %u\n", rw->rw_recurse); 1545 } 1546 db_printf(" waiters: "); 1547 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1548 case RW_LOCK_READ_WAITERS: 1549 db_printf("readers\n"); 1550 break; 1551 case RW_LOCK_WRITE_WAITERS: 1552 db_printf("writers\n"); 1553 break; 1554 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1555 db_printf("readers and writers\n"); 1556 break; 1557 default: 1558 db_printf("none\n"); 1559 break; 1560 } 1561 } 1562 1563 #endif 1564