1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Machine independent bits of reader/writer lock implementation. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 #include "opt_hwpmc_hooks.h" 37 #include "opt_no_adaptive_rwlocks.h" 38 39 #include <sys/param.h> 40 #include <sys/kdb.h> 41 #include <sys/ktr.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/rwlock.h> 47 #include <sys/sched.h> 48 #include <sys/smp.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 #include <sys/turnstile.h> 52 53 #include <machine/cpu.h> 54 55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 56 #define ADAPTIVE_RWLOCKS 57 #endif 58 59 #ifdef HWPMC_HOOKS 60 #include <sys/pmckern.h> 61 PMC_SOFT_DECLARE( , , lock, failed); 62 #endif 63 64 /* 65 * Return the rwlock address when the lock cookie address is provided. 66 * This functionality assumes that struct rwlock* have a member named rw_lock. 67 */ 68 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) 69 70 #ifdef DDB 71 #include <ddb/ddb.h> 72 73 static void db_show_rwlock(const struct lock_object *lock); 74 #endif 75 static void assert_rw(const struct lock_object *lock, int what); 76 static void lock_rw(struct lock_object *lock, uintptr_t how); 77 #ifdef KDTRACE_HOOKS 78 static int owner_rw(const struct lock_object *lock, struct thread **owner); 79 #endif 80 static uintptr_t unlock_rw(struct lock_object *lock); 81 82 struct lock_class lock_class_rw = { 83 .lc_name = "rw", 84 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 85 .lc_assert = assert_rw, 86 #ifdef DDB 87 .lc_ddb_show = db_show_rwlock, 88 #endif 89 .lc_lock = lock_rw, 90 .lc_unlock = unlock_rw, 91 #ifdef KDTRACE_HOOKS 92 .lc_owner = owner_rw, 93 #endif 94 }; 95 96 #ifdef ADAPTIVE_RWLOCKS 97 #ifdef RWLOCK_CUSTOM_BACKOFF 98 static u_short __read_frequently rowner_retries; 99 static u_short __read_frequently rowner_loops; 100 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, 101 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 102 "rwlock debugging"); 103 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 104 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 105 106 static struct lock_delay_config __read_frequently rw_delay; 107 108 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 109 0, ""); 110 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 111 0, ""); 112 113 static void 114 rw_lock_delay_init(void *arg __unused) 115 { 116 117 lock_delay_default_init(&rw_delay); 118 rowner_retries = 10; 119 rowner_loops = max(10000, rw_delay.max); 120 } 121 LOCK_DELAY_SYSINIT(rw_lock_delay_init); 122 #else 123 #define rw_delay locks_delay 124 #define rowner_retries locks_delay_retries 125 #define rowner_loops locks_delay_loops 126 #endif 127 #endif 128 129 /* 130 * Return a pointer to the owning thread if the lock is write-locked or 131 * NULL if the lock is unlocked or read-locked. 132 */ 133 134 #define lv_rw_wowner(v) \ 135 ((v) & RW_LOCK_READ ? NULL : \ 136 (struct thread *)RW_OWNER((v))) 137 138 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) 139 140 /* 141 * Returns if a write owner is recursed. Write ownership is not assured 142 * here and should be previously checked. 143 */ 144 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 145 146 /* 147 * Return true if curthread helds the lock. 148 */ 149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 150 151 /* 152 * Return a pointer to the owning thread for this lock who should receive 153 * any priority lent by threads that block on this lock. Currently this 154 * is identical to rw_wowner(). 155 */ 156 #define rw_owner(rw) rw_wowner(rw) 157 158 #ifndef INVARIANTS 159 #define __rw_assert(c, what, file, line) 160 #endif 161 162 void 163 assert_rw(const struct lock_object *lock, int what) 164 { 165 166 rw_assert((const struct rwlock *)lock, what); 167 } 168 169 void 170 lock_rw(struct lock_object *lock, uintptr_t how) 171 { 172 struct rwlock *rw; 173 174 rw = (struct rwlock *)lock; 175 if (how) 176 rw_rlock(rw); 177 else 178 rw_wlock(rw); 179 } 180 181 uintptr_t 182 unlock_rw(struct lock_object *lock) 183 { 184 struct rwlock *rw; 185 186 rw = (struct rwlock *)lock; 187 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 188 if (rw->rw_lock & RW_LOCK_READ) { 189 rw_runlock(rw); 190 return (1); 191 } else { 192 rw_wunlock(rw); 193 return (0); 194 } 195 } 196 197 #ifdef KDTRACE_HOOKS 198 int 199 owner_rw(const struct lock_object *lock, struct thread **owner) 200 { 201 const struct rwlock *rw = (const struct rwlock *)lock; 202 uintptr_t x = rw->rw_lock; 203 204 *owner = rw_wowner(rw); 205 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 206 (*owner != NULL)); 207 } 208 #endif 209 210 void 211 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) 212 { 213 struct rwlock *rw; 214 int flags; 215 216 rw = rwlock2rw(c); 217 218 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 219 RW_RECURSE | RW_NEW)) == 0); 220 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 221 ("%s: rw_lock not aligned for %s: %p", __func__, name, 222 &rw->rw_lock)); 223 224 flags = LO_UPGRADABLE; 225 if (opts & RW_DUPOK) 226 flags |= LO_DUPOK; 227 if (opts & RW_NOPROFILE) 228 flags |= LO_NOPROFILE; 229 if (!(opts & RW_NOWITNESS)) 230 flags |= LO_WITNESS; 231 if (opts & RW_RECURSE) 232 flags |= LO_RECURSABLE; 233 if (opts & RW_QUIET) 234 flags |= LO_QUIET; 235 if (opts & RW_NEW) 236 flags |= LO_NEW; 237 238 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 239 rw->rw_lock = RW_UNLOCKED; 240 rw->rw_recurse = 0; 241 } 242 243 void 244 _rw_destroy(volatile uintptr_t *c) 245 { 246 struct rwlock *rw; 247 248 rw = rwlock2rw(c); 249 250 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 251 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 252 rw->rw_lock = RW_DESTROYED; 253 lock_destroy(&rw->lock_object); 254 } 255 256 void 257 rw_sysinit(void *arg) 258 { 259 struct rw_args *args; 260 261 args = arg; 262 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, 263 args->ra_flags); 264 } 265 266 int 267 _rw_wowned(const volatile uintptr_t *c) 268 { 269 270 return (rw_wowner(rwlock2rw(c)) == curthread); 271 } 272 273 void 274 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) 275 { 276 struct rwlock *rw; 277 uintptr_t tid, v; 278 279 rw = rwlock2rw(c); 280 281 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 282 !TD_IS_IDLETHREAD(curthread), 283 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", 284 curthread, rw->lock_object.lo_name, file, line)); 285 KASSERT(rw->rw_lock != RW_DESTROYED, 286 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 287 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 288 line, NULL); 289 tid = (uintptr_t)curthread; 290 v = RW_UNLOCKED; 291 if (!_rw_write_lock_fetch(rw, &v, tid)) 292 _rw_wlock_hard(rw, v, file, line); 293 else 294 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 295 0, 0, file, line, LOCKSTAT_WRITER); 296 297 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 298 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 299 TD_LOCKS_INC(curthread); 300 } 301 302 int 303 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 304 { 305 struct thread *td; 306 uintptr_t tid, v; 307 int rval; 308 bool recursed; 309 310 td = curthread; 311 tid = (uintptr_t)td; 312 if (SCHEDULER_STOPPED_TD(td)) 313 return (1); 314 315 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), 316 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", 317 curthread, rw->lock_object.lo_name, file, line)); 318 KASSERT(rw->rw_lock != RW_DESTROYED, 319 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 320 321 rval = 1; 322 recursed = false; 323 v = RW_UNLOCKED; 324 for (;;) { 325 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 326 break; 327 if (v == RW_UNLOCKED) 328 continue; 329 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { 330 rw->rw_recurse++; 331 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 332 break; 333 } 334 rval = 0; 335 break; 336 } 337 338 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 339 if (rval) { 340 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 341 file, line); 342 if (!recursed) 343 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 344 rw, 0, 0, file, line, LOCKSTAT_WRITER); 345 TD_LOCKS_INC(curthread); 346 } 347 return (rval); 348 } 349 350 int 351 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) 352 { 353 struct rwlock *rw; 354 355 rw = rwlock2rw(c); 356 return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG)); 357 } 358 359 void 360 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) 361 { 362 struct rwlock *rw; 363 364 rw = rwlock2rw(c); 365 366 KASSERT(rw->rw_lock != RW_DESTROYED, 367 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 368 __rw_assert(c, RA_WLOCKED, file, line); 369 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 370 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 371 line); 372 373 #ifdef LOCK_PROFILING 374 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); 375 #else 376 __rw_wunlock(rw, curthread, file, line); 377 #endif 378 379 TD_LOCKS_DEC(curthread); 380 } 381 382 /* 383 * Determines whether a new reader can acquire a lock. Succeeds if the 384 * reader already owns a read lock and the lock is locked for read to 385 * prevent deadlock from reader recursion. Also succeeds if the lock 386 * is unlocked and has no writer waiters or spinners. Failing otherwise 387 * prioritizes writers before readers. 388 */ 389 static bool __always_inline 390 __rw_can_read(struct thread *td, uintptr_t v, bool fp) 391 { 392 393 if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) 394 == RW_LOCK_READ) 395 return (true); 396 if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ)) 397 return (true); 398 return (false); 399 } 400 401 static bool __always_inline 402 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp 403 LOCK_FILE_LINE_ARG_DEF) 404 { 405 406 /* 407 * Handle the easy case. If no other thread has a write 408 * lock, then try to bump up the count of read locks. Note 409 * that we have to preserve the current state of the 410 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 411 * read lock, then rw_lock must have changed, so restart 412 * the loop. Note that this handles the case of a 413 * completely unlocked rwlock since such a lock is encoded 414 * as a read lock with no waiters. 415 */ 416 while (__rw_can_read(td, *vp, fp)) { 417 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, 418 *vp + RW_ONE_READER)) { 419 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 420 CTR4(KTR_LOCK, 421 "%s: %p succeed %p -> %p", __func__, 422 rw, (void *)*vp, 423 (void *)(*vp + RW_ONE_READER)); 424 td->td_rw_rlocks++; 425 return (true); 426 } 427 } 428 return (false); 429 } 430 431 static void __noinline 432 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 433 LOCK_FILE_LINE_ARG_DEF) 434 { 435 struct turnstile *ts; 436 struct thread *owner; 437 #ifdef ADAPTIVE_RWLOCKS 438 int spintries = 0; 439 int i, n; 440 #endif 441 #ifdef LOCK_PROFILING 442 uint64_t waittime = 0; 443 int contested = 0; 444 #endif 445 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 446 struct lock_delay_arg lda; 447 #endif 448 #ifdef KDTRACE_HOOKS 449 u_int sleep_cnt = 0; 450 int64_t sleep_time = 0; 451 int64_t all_time = 0; 452 #endif 453 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 454 uintptr_t state = 0; 455 int doing_lockprof = 0; 456 #endif 457 458 #ifdef KDTRACE_HOOKS 459 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 460 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 461 goto out_lockstat; 462 doing_lockprof = 1; 463 all_time -= lockstat_nsecs(&rw->lock_object); 464 state = v; 465 } 466 #endif 467 #ifdef LOCK_PROFILING 468 doing_lockprof = 1; 469 state = v; 470 #endif 471 472 if (SCHEDULER_STOPPED()) 473 return; 474 475 #if defined(ADAPTIVE_RWLOCKS) 476 lock_delay_arg_init(&lda, &rw_delay); 477 #elif defined(KDTRACE_HOOKS) 478 lock_delay_arg_init_noadapt(&lda); 479 #endif 480 481 #ifdef HWPMC_HOOKS 482 PMC_SOFT_CALL( , , lock, failed); 483 #endif 484 lock_profile_obtain_lock_failed(&rw->lock_object, false, 485 &contested, &waittime); 486 487 for (;;) { 488 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 489 break; 490 #ifdef KDTRACE_HOOKS 491 lda.spin_cnt++; 492 #endif 493 494 #ifdef ADAPTIVE_RWLOCKS 495 /* 496 * If the owner is running on another CPU, spin until 497 * the owner stops running or the state of the lock 498 * changes. 499 */ 500 if ((v & RW_LOCK_READ) == 0) { 501 owner = (struct thread *)RW_OWNER(v); 502 if (TD_IS_RUNNING(owner)) { 503 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 504 CTR3(KTR_LOCK, 505 "%s: spinning on %p held by %p", 506 __func__, rw, owner); 507 KTR_STATE1(KTR_SCHED, "thread", 508 sched_tdname(curthread), "spinning", 509 "lockname:\"%s\"", rw->lock_object.lo_name); 510 do { 511 lock_delay(&lda); 512 v = RW_READ_VALUE(rw); 513 owner = lv_rw_wowner(v); 514 } while (owner != NULL && TD_IS_RUNNING(owner)); 515 KTR_STATE0(KTR_SCHED, "thread", 516 sched_tdname(curthread), "running"); 517 continue; 518 } 519 } else { 520 if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) { 521 MPASS(!__rw_can_read(td, v, false)); 522 lock_delay_spin(2); 523 v = RW_READ_VALUE(rw); 524 continue; 525 } 526 if (spintries < rowner_retries) { 527 spintries++; 528 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 529 "spinning", "lockname:\"%s\"", 530 rw->lock_object.lo_name); 531 n = RW_READERS(v); 532 for (i = 0; i < rowner_loops; i += n) { 533 lock_delay_spin(n); 534 v = RW_READ_VALUE(rw); 535 if (!(v & RW_LOCK_READ)) 536 break; 537 n = RW_READERS(v); 538 if (n == 0) 539 break; 540 if (__rw_can_read(td, v, false)) 541 break; 542 } 543 #ifdef KDTRACE_HOOKS 544 lda.spin_cnt += rowner_loops - i; 545 #endif 546 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 547 "running"); 548 if (i < rowner_loops) 549 continue; 550 } 551 } 552 #endif 553 554 /* 555 * Okay, now it's the hard case. Some other thread already 556 * has a write lock or there are write waiters present, 557 * acquire the turnstile lock so we can begin the process 558 * of blocking. 559 */ 560 ts = turnstile_trywait(&rw->lock_object); 561 562 /* 563 * The lock might have been released while we spun, so 564 * recheck its state and restart the loop if needed. 565 */ 566 v = RW_READ_VALUE(rw); 567 retry_ts: 568 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) || 569 __rw_can_read(td, v, false)) { 570 turnstile_cancel(ts); 571 continue; 572 } 573 574 owner = lv_rw_wowner(v); 575 576 #ifdef ADAPTIVE_RWLOCKS 577 /* 578 * The current lock owner might have started executing 579 * on another CPU (or the lock could have changed 580 * owners) while we were waiting on the turnstile 581 * chain lock. If so, drop the turnstile lock and try 582 * again. 583 */ 584 if (owner != NULL) { 585 if (TD_IS_RUNNING(owner)) { 586 turnstile_cancel(ts); 587 continue; 588 } 589 } 590 #endif 591 592 /* 593 * The lock is held in write mode or it already has waiters. 594 */ 595 MPASS(!__rw_can_read(td, v, false)); 596 597 /* 598 * If the RW_LOCK_READ_WAITERS flag is already set, then 599 * we can go ahead and block. If it is not set then try 600 * to set it. If we fail to set it drop the turnstile 601 * lock and restart the loop. 602 */ 603 if (!(v & RW_LOCK_READ_WAITERS)) { 604 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 605 v | RW_LOCK_READ_WAITERS)) 606 goto retry_ts; 607 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 608 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 609 __func__, rw); 610 } 611 612 /* 613 * We were unable to acquire the lock and the read waiters 614 * flag is set, so we must block on the turnstile. 615 */ 616 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 617 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 618 rw); 619 #ifdef KDTRACE_HOOKS 620 sleep_time -= lockstat_nsecs(&rw->lock_object); 621 #endif 622 MPASS(owner == rw_owner(rw)); 623 turnstile_wait(ts, owner, TS_SHARED_QUEUE); 624 #ifdef KDTRACE_HOOKS 625 sleep_time += lockstat_nsecs(&rw->lock_object); 626 sleep_cnt++; 627 #endif 628 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 629 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 630 __func__, rw); 631 v = RW_READ_VALUE(rw); 632 } 633 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 634 if (__predict_true(!doing_lockprof)) 635 return; 636 #endif 637 #ifdef KDTRACE_HOOKS 638 all_time += lockstat_nsecs(&rw->lock_object); 639 if (sleep_time) 640 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 641 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 642 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 643 644 /* Record only the loops spinning and not sleeping. */ 645 if (lda.spin_cnt > sleep_cnt) 646 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 647 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 648 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 649 out_lockstat: 650 #endif 651 /* 652 * TODO: acquire "owner of record" here. Here be turnstile dragons 653 * however. turnstiles don't like owners changing between calls to 654 * turnstile_wait() currently. 655 */ 656 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 657 waittime, file, line, LOCKSTAT_READER); 658 } 659 660 void 661 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 662 { 663 struct thread *td; 664 uintptr_t v; 665 666 td = curthread; 667 668 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) || 669 !TD_IS_IDLETHREAD(td), 670 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", 671 td, rw->lock_object.lo_name, file, line)); 672 KASSERT(rw->rw_lock != RW_DESTROYED, 673 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 674 KASSERT(rw_wowner(rw) != td, 675 ("rw_rlock: wlock already held for %s @ %s:%d", 676 rw->lock_object.lo_name, file, line)); 677 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 678 679 v = RW_READ_VALUE(rw); 680 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || 681 !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG))) 682 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 683 else 684 lock_profile_obtain_lock_success(&rw->lock_object, false, 0, 0, 685 file, line); 686 687 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 688 WITNESS_LOCK(&rw->lock_object, 0, file, line); 689 TD_LOCKS_INC(curthread); 690 } 691 692 void 693 __rw_rlock(volatile uintptr_t *c, const char *file, int line) 694 { 695 struct rwlock *rw; 696 697 rw = rwlock2rw(c); 698 __rw_rlock_int(rw LOCK_FILE_LINE_ARG); 699 } 700 701 int 702 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 703 { 704 uintptr_t x; 705 706 if (SCHEDULER_STOPPED()) 707 return (1); 708 709 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 710 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", 711 curthread, rw->lock_object.lo_name, file, line)); 712 713 x = rw->rw_lock; 714 for (;;) { 715 KASSERT(rw->rw_lock != RW_DESTROYED, 716 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 717 if (!(x & RW_LOCK_READ)) 718 break; 719 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { 720 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 721 line); 722 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 723 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 724 rw, 0, 0, file, line, LOCKSTAT_READER); 725 TD_LOCKS_INC(curthread); 726 curthread->td_rw_rlocks++; 727 return (1); 728 } 729 } 730 731 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 732 return (0); 733 } 734 735 int 736 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) 737 { 738 struct rwlock *rw; 739 740 rw = rwlock2rw(c); 741 return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG)); 742 } 743 744 static bool __always_inline 745 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) 746 { 747 748 for (;;) { 749 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) { 750 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 751 *vp - RW_ONE_READER)) { 752 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 753 CTR4(KTR_LOCK, 754 "%s: %p succeeded %p -> %p", 755 __func__, rw, (void *)*vp, 756 (void *)(*vp - RW_ONE_READER)); 757 td->td_rw_rlocks--; 758 return (true); 759 } 760 continue; 761 } 762 break; 763 } 764 return (false); 765 } 766 767 static void __noinline 768 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 769 LOCK_FILE_LINE_ARG_DEF) 770 { 771 struct turnstile *ts; 772 uintptr_t setv, queue; 773 774 if (SCHEDULER_STOPPED()) 775 return; 776 777 if (__rw_runlock_try(rw, td, &v)) 778 goto out_lockstat; 779 780 /* 781 * Ok, we know we have waiters and we think we are the 782 * last reader, so grab the turnstile lock. 783 */ 784 turnstile_chain_lock(&rw->lock_object); 785 v = RW_READ_VALUE(rw); 786 for (;;) { 787 if (__rw_runlock_try(rw, td, &v)) 788 break; 789 790 MPASS(v & RW_LOCK_WAITERS); 791 792 /* 793 * Try to drop our lock leaving the lock in a unlocked 794 * state. 795 * 796 * If you wanted to do explicit lock handoff you'd have to 797 * do it here. You'd also want to use turnstile_signal() 798 * and you'd have to handle the race where a higher 799 * priority thread blocks on the write lock before the 800 * thread you wakeup actually runs and have the new thread 801 * "steal" the lock. For now it's a lot simpler to just 802 * wakeup all of the waiters. 803 * 804 * As above, if we fail, then another thread might have 805 * acquired a read lock, so drop the turnstile lock and 806 * restart. 807 */ 808 setv = RW_UNLOCKED; 809 queue = TS_SHARED_QUEUE; 810 if (v & RW_LOCK_WRITE_WAITERS) { 811 queue = TS_EXCLUSIVE_QUEUE; 812 setv |= (v & RW_LOCK_READ_WAITERS); 813 } 814 setv |= (v & RW_LOCK_WRITE_SPINNER); 815 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv)) 816 continue; 817 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 818 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 819 __func__, rw); 820 821 /* 822 * Ok. The lock is released and all that's left is to 823 * wake up the waiters. Note that the lock might not be 824 * free anymore, but in that case the writers will just 825 * block again if they run before the new lock holder(s) 826 * release the lock. 827 */ 828 ts = turnstile_lookup(&rw->lock_object); 829 MPASS(ts != NULL); 830 turnstile_broadcast(ts, queue); 831 turnstile_unpend(ts); 832 td->td_rw_rlocks--; 833 break; 834 } 835 turnstile_chain_unlock(&rw->lock_object); 836 out_lockstat: 837 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); 838 } 839 840 void 841 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 842 { 843 struct thread *td; 844 uintptr_t v; 845 846 KASSERT(rw->rw_lock != RW_DESTROYED, 847 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 848 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 849 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 850 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 851 852 td = curthread; 853 v = RW_READ_VALUE(rw); 854 855 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || 856 !__rw_runlock_try(rw, td, &v))) 857 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 858 else 859 lock_profile_release_lock(&rw->lock_object, false); 860 861 TD_LOCKS_DEC(curthread); 862 } 863 864 void 865 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) 866 { 867 struct rwlock *rw; 868 869 rw = rwlock2rw(c); 870 _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG); 871 } 872 873 #ifdef ADAPTIVE_RWLOCKS 874 static inline void 875 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work) 876 { 877 878 if (v & RW_LOCK_WRITE_SPINNER) 879 return; 880 if (*in_critical) { 881 critical_exit(); 882 *in_critical = false; 883 (*extra_work)--; 884 } 885 } 886 #else 887 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0) 888 #endif 889 890 /* 891 * This function is called when we are unable to obtain a write lock on the 892 * first try. This means that at least one other thread holds either a 893 * read or write lock. 894 */ 895 void 896 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 897 { 898 uintptr_t tid; 899 struct rwlock *rw; 900 struct turnstile *ts; 901 struct thread *owner; 902 #ifdef ADAPTIVE_RWLOCKS 903 int spintries = 0; 904 int i, n; 905 enum { READERS, WRITER } sleep_reason = READERS; 906 bool in_critical = false; 907 #endif 908 uintptr_t setv; 909 #ifdef LOCK_PROFILING 910 uint64_t waittime = 0; 911 int contested = 0; 912 #endif 913 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 914 struct lock_delay_arg lda; 915 #endif 916 #ifdef KDTRACE_HOOKS 917 u_int sleep_cnt = 0; 918 int64_t sleep_time = 0; 919 int64_t all_time = 0; 920 #endif 921 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 922 uintptr_t state = 0; 923 int doing_lockprof = 0; 924 #endif 925 int extra_work = 0; 926 927 tid = (uintptr_t)curthread; 928 rw = rwlock2rw(c); 929 930 #ifdef KDTRACE_HOOKS 931 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 932 while (v == RW_UNLOCKED) { 933 if (_rw_write_lock_fetch(rw, &v, tid)) 934 goto out_lockstat; 935 } 936 extra_work = 1; 937 doing_lockprof = 1; 938 all_time -= lockstat_nsecs(&rw->lock_object); 939 state = v; 940 } 941 #endif 942 #ifdef LOCK_PROFILING 943 extra_work = 1; 944 doing_lockprof = 1; 945 state = v; 946 #endif 947 948 if (SCHEDULER_STOPPED()) 949 return; 950 951 if (__predict_false(v == RW_UNLOCKED)) 952 v = RW_READ_VALUE(rw); 953 954 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { 955 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 956 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 957 __func__, rw->lock_object.lo_name, file, line)); 958 rw->rw_recurse++; 959 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 960 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 961 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 962 return; 963 } 964 965 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 966 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 967 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 968 969 #if defined(ADAPTIVE_RWLOCKS) 970 lock_delay_arg_init(&lda, &rw_delay); 971 #elif defined(KDTRACE_HOOKS) 972 lock_delay_arg_init_noadapt(&lda); 973 #endif 974 975 #ifdef HWPMC_HOOKS 976 PMC_SOFT_CALL( , , lock, failed); 977 #endif 978 lock_profile_obtain_lock_failed(&rw->lock_object, false, 979 &contested, &waittime); 980 981 for (;;) { 982 if (v == RW_UNLOCKED) { 983 if (_rw_write_lock_fetch(rw, &v, tid)) 984 break; 985 continue; 986 } 987 #ifdef KDTRACE_HOOKS 988 lda.spin_cnt++; 989 #endif 990 991 #ifdef ADAPTIVE_RWLOCKS 992 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) { 993 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 994 break; 995 continue; 996 } 997 998 /* 999 * If the lock is write locked and the owner is 1000 * running on another CPU, spin until the owner stops 1001 * running or the state of the lock changes. 1002 */ 1003 if (!(v & RW_LOCK_READ)) { 1004 rw_drop_critical(v, &in_critical, &extra_work); 1005 sleep_reason = WRITER; 1006 owner = lv_rw_wowner(v); 1007 if (!TD_IS_RUNNING(owner)) 1008 goto ts; 1009 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1010 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 1011 __func__, rw, owner); 1012 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1013 "spinning", "lockname:\"%s\"", 1014 rw->lock_object.lo_name); 1015 do { 1016 lock_delay(&lda); 1017 v = RW_READ_VALUE(rw); 1018 owner = lv_rw_wowner(v); 1019 } while (owner != NULL && TD_IS_RUNNING(owner)); 1020 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1021 "running"); 1022 continue; 1023 } else if (RW_READERS(v) > 0) { 1024 sleep_reason = READERS; 1025 if (spintries == rowner_retries) 1026 goto ts; 1027 if (!(v & RW_LOCK_WRITE_SPINNER)) { 1028 if (!in_critical) { 1029 critical_enter(); 1030 in_critical = true; 1031 extra_work++; 1032 } 1033 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1034 v | RW_LOCK_WRITE_SPINNER)) { 1035 critical_exit(); 1036 in_critical = false; 1037 extra_work--; 1038 continue; 1039 } 1040 } 1041 spintries++; 1042 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1043 "spinning", "lockname:\"%s\"", 1044 rw->lock_object.lo_name); 1045 n = RW_READERS(v); 1046 for (i = 0; i < rowner_loops; i += n) { 1047 lock_delay_spin(n); 1048 v = RW_READ_VALUE(rw); 1049 if (!(v & RW_LOCK_WRITE_SPINNER)) 1050 break; 1051 if (!(v & RW_LOCK_READ)) 1052 break; 1053 n = RW_READERS(v); 1054 if (n == 0) 1055 break; 1056 } 1057 #ifdef KDTRACE_HOOKS 1058 lda.spin_cnt += i; 1059 #endif 1060 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1061 "running"); 1062 if (i < rowner_loops) 1063 continue; 1064 } 1065 ts: 1066 #endif 1067 ts = turnstile_trywait(&rw->lock_object); 1068 v = RW_READ_VALUE(rw); 1069 retry_ts: 1070 owner = lv_rw_wowner(v); 1071 1072 #ifdef ADAPTIVE_RWLOCKS 1073 /* 1074 * The current lock owner might have started executing 1075 * on another CPU (or the lock could have changed 1076 * owners) while we were waiting on the turnstile 1077 * chain lock. If so, drop the turnstile lock and try 1078 * again. 1079 */ 1080 if (owner != NULL) { 1081 if (TD_IS_RUNNING(owner)) { 1082 turnstile_cancel(ts); 1083 rw_drop_critical(v, &in_critical, &extra_work); 1084 continue; 1085 } 1086 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) { 1087 turnstile_cancel(ts); 1088 rw_drop_critical(v, &in_critical, &extra_work); 1089 continue; 1090 } 1091 #endif 1092 /* 1093 * Check for the waiters flags about this rwlock. 1094 * If the lock was released, without maintain any pending 1095 * waiters queue, simply try to acquire it. 1096 * If a pending waiters queue is present, claim the lock 1097 * ownership and maintain the pending queue. 1098 */ 1099 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 1100 if ((v & ~setv) == RW_UNLOCKED) { 1101 setv &= ~RW_LOCK_WRITE_SPINNER; 1102 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) { 1103 if (setv) 1104 turnstile_claim(ts); 1105 else 1106 turnstile_cancel(ts); 1107 break; 1108 } 1109 goto retry_ts; 1110 } 1111 1112 #ifdef ADAPTIVE_RWLOCKS 1113 if (in_critical) { 1114 if ((v & RW_LOCK_WRITE_SPINNER) || 1115 !((v & RW_LOCK_WRITE_WAITERS))) { 1116 setv = v & ~RW_LOCK_WRITE_SPINNER; 1117 setv |= RW_LOCK_WRITE_WAITERS; 1118 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv)) 1119 goto retry_ts; 1120 } 1121 critical_exit(); 1122 in_critical = false; 1123 extra_work--; 1124 } else { 1125 #endif 1126 /* 1127 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 1128 * set it. If we fail to set it, then loop back and try 1129 * again. 1130 */ 1131 if (!(v & RW_LOCK_WRITE_WAITERS)) { 1132 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1133 v | RW_LOCK_WRITE_WAITERS)) 1134 goto retry_ts; 1135 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1136 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 1137 __func__, rw); 1138 } 1139 #ifdef ADAPTIVE_RWLOCKS 1140 } 1141 #endif 1142 /* 1143 * We were unable to acquire the lock and the write waiters 1144 * flag is set, so we must block on the turnstile. 1145 */ 1146 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1147 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 1148 rw); 1149 #ifdef KDTRACE_HOOKS 1150 sleep_time -= lockstat_nsecs(&rw->lock_object); 1151 #endif 1152 MPASS(owner == rw_owner(rw)); 1153 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); 1154 #ifdef KDTRACE_HOOKS 1155 sleep_time += lockstat_nsecs(&rw->lock_object); 1156 sleep_cnt++; 1157 #endif 1158 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1159 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 1160 __func__, rw); 1161 #ifdef ADAPTIVE_RWLOCKS 1162 spintries = 0; 1163 #endif 1164 v = RW_READ_VALUE(rw); 1165 } 1166 if (__predict_true(!extra_work)) 1167 return; 1168 #ifdef ADAPTIVE_RWLOCKS 1169 if (in_critical) 1170 critical_exit(); 1171 #endif 1172 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 1173 if (__predict_true(!doing_lockprof)) 1174 return; 1175 #endif 1176 #ifdef KDTRACE_HOOKS 1177 all_time += lockstat_nsecs(&rw->lock_object); 1178 if (sleep_time) 1179 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 1180 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1181 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1182 1183 /* Record only the loops spinning and not sleeping. */ 1184 if (lda.spin_cnt > sleep_cnt) 1185 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 1186 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1187 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1188 out_lockstat: 1189 #endif 1190 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 1191 waittime, file, line, LOCKSTAT_WRITER); 1192 } 1193 1194 /* 1195 * This function is called if lockstat is active or the first try at releasing 1196 * a write lock failed. The latter means that the lock is recursed or one of 1197 * the 2 waiter bits must be set indicating that at least one thread is waiting 1198 * on this lock. 1199 */ 1200 void 1201 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 1202 { 1203 struct rwlock *rw; 1204 struct turnstile *ts; 1205 uintptr_t tid, setv; 1206 int queue; 1207 1208 tid = (uintptr_t)curthread; 1209 if (SCHEDULER_STOPPED()) 1210 return; 1211 1212 rw = rwlock2rw(c); 1213 if (__predict_false(v == tid)) 1214 v = RW_READ_VALUE(rw); 1215 1216 if (v & RW_LOCK_WRITER_RECURSED) { 1217 if (--(rw->rw_recurse) == 0) 1218 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 1219 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1220 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 1221 return; 1222 } 1223 1224 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); 1225 if (v == tid && _rw_write_unlock(rw, tid)) 1226 return; 1227 1228 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 1229 ("%s: neither of the waiter flags are set", __func__)); 1230 1231 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1232 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 1233 1234 turnstile_chain_lock(&rw->lock_object); 1235 1236 /* 1237 * Use the same algo as sx locks for now. Prefer waking up shared 1238 * waiters if we have any over writers. This is probably not ideal. 1239 * 1240 * 'v' is the value we are going to write back to rw_lock. If we 1241 * have waiters on both queues, we need to preserve the state of 1242 * the waiter flag for the queue we don't wake up. For now this is 1243 * hardcoded for the algorithm mentioned above. 1244 * 1245 * In the case of both readers and writers waiting we wakeup the 1246 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 1247 * new writer comes in before a reader it will claim the lock up 1248 * above. There is probably a potential priority inversion in 1249 * there that could be worked around either by waking both queues 1250 * of waiters or doing some complicated lock handoff gymnastics. 1251 */ 1252 setv = RW_UNLOCKED; 1253 v = RW_READ_VALUE(rw); 1254 queue = TS_SHARED_QUEUE; 1255 if (v & RW_LOCK_WRITE_WAITERS) { 1256 queue = TS_EXCLUSIVE_QUEUE; 1257 setv |= (v & RW_LOCK_READ_WAITERS); 1258 } 1259 atomic_store_rel_ptr(&rw->rw_lock, setv); 1260 1261 /* Wake up all waiters for the specific queue. */ 1262 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1263 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 1264 queue == TS_SHARED_QUEUE ? "read" : "write"); 1265 1266 ts = turnstile_lookup(&rw->lock_object); 1267 MPASS(ts != NULL); 1268 turnstile_broadcast(ts, queue); 1269 turnstile_unpend(ts); 1270 turnstile_chain_unlock(&rw->lock_object); 1271 } 1272 1273 /* 1274 * Attempt to do a non-blocking upgrade from a read lock to a write 1275 * lock. This will only succeed if this thread holds a single read 1276 * lock. Returns true if the upgrade succeeded and false otherwise. 1277 */ 1278 int 1279 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1280 { 1281 uintptr_t v, setv, tid; 1282 struct turnstile *ts; 1283 int success; 1284 1285 if (SCHEDULER_STOPPED()) 1286 return (1); 1287 1288 KASSERT(rw->rw_lock != RW_DESTROYED, 1289 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 1290 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 1291 1292 /* 1293 * Attempt to switch from one reader to a writer. If there 1294 * are any write waiters, then we will have to lock the 1295 * turnstile first to prevent races with another writer 1296 * calling turnstile_wait() before we have claimed this 1297 * turnstile. So, do the simple case of no waiters first. 1298 */ 1299 tid = (uintptr_t)curthread; 1300 success = 0; 1301 v = RW_READ_VALUE(rw); 1302 for (;;) { 1303 if (RW_READERS(v) > 1) 1304 break; 1305 if (!(v & RW_LOCK_WAITERS)) { 1306 success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid); 1307 if (!success) 1308 continue; 1309 break; 1310 } 1311 1312 /* 1313 * Ok, we think we have waiters, so lock the turnstile. 1314 */ 1315 ts = turnstile_trywait(&rw->lock_object); 1316 v = RW_READ_VALUE(rw); 1317 retry_ts: 1318 if (RW_READERS(v) > 1) { 1319 turnstile_cancel(ts); 1320 break; 1321 } 1322 /* 1323 * Try to switch from one reader to a writer again. This time 1324 * we honor the current state of the waiters flags. 1325 * If we obtain the lock with the flags set, then claim 1326 * ownership of the turnstile. 1327 */ 1328 setv = tid | (v & RW_LOCK_WAITERS); 1329 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv); 1330 if (success) { 1331 if (v & RW_LOCK_WAITERS) 1332 turnstile_claim(ts); 1333 else 1334 turnstile_cancel(ts); 1335 break; 1336 } 1337 goto retry_ts; 1338 } 1339 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 1340 if (success) { 1341 curthread->td_rw_rlocks--; 1342 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 1343 file, line); 1344 LOCKSTAT_RECORD0(rw__upgrade, rw); 1345 } 1346 return (success); 1347 } 1348 1349 int 1350 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) 1351 { 1352 struct rwlock *rw; 1353 1354 rw = rwlock2rw(c); 1355 return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG)); 1356 } 1357 1358 /* 1359 * Downgrade a write lock into a single read lock. 1360 */ 1361 void 1362 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1363 { 1364 struct turnstile *ts; 1365 uintptr_t tid, v; 1366 int rwait, wwait; 1367 1368 if (SCHEDULER_STOPPED()) 1369 return; 1370 1371 KASSERT(rw->rw_lock != RW_DESTROYED, 1372 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 1373 __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line); 1374 #ifndef INVARIANTS 1375 if (rw_recursed(rw)) 1376 panic("downgrade of a recursed lock"); 1377 #endif 1378 1379 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1380 1381 /* 1382 * Convert from a writer to a single reader. First we handle 1383 * the easy case with no waiters. If there are any waiters, we 1384 * lock the turnstile and "disown" the lock. 1385 */ 1386 tid = (uintptr_t)curthread; 1387 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1388 goto out; 1389 1390 /* 1391 * Ok, we think we have waiters, so lock the turnstile so we can 1392 * read the waiter flags without any races. 1393 */ 1394 turnstile_chain_lock(&rw->lock_object); 1395 v = rw->rw_lock & RW_LOCK_WAITERS; 1396 rwait = v & RW_LOCK_READ_WAITERS; 1397 wwait = v & RW_LOCK_WRITE_WAITERS; 1398 MPASS(rwait | wwait); 1399 1400 /* 1401 * Downgrade from a write lock while preserving waiters flag 1402 * and give up ownership of the turnstile. 1403 */ 1404 ts = turnstile_lookup(&rw->lock_object); 1405 MPASS(ts != NULL); 1406 if (!wwait) 1407 v &= ~RW_LOCK_READ_WAITERS; 1408 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1409 /* 1410 * Wake other readers if there are no writers pending. Otherwise they 1411 * won't be able to acquire the lock anyway. 1412 */ 1413 if (rwait && !wwait) { 1414 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1415 turnstile_unpend(ts); 1416 } else 1417 turnstile_disown(ts); 1418 turnstile_chain_unlock(&rw->lock_object); 1419 out: 1420 curthread->td_rw_rlocks++; 1421 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1422 LOCKSTAT_RECORD0(rw__downgrade, rw); 1423 } 1424 1425 void 1426 __rw_downgrade(volatile uintptr_t *c, const char *file, int line) 1427 { 1428 struct rwlock *rw; 1429 1430 rw = rwlock2rw(c); 1431 __rw_downgrade_int(rw LOCK_FILE_LINE_ARG); 1432 } 1433 1434 #ifdef INVARIANT_SUPPORT 1435 #ifndef INVARIANTS 1436 #undef __rw_assert 1437 #endif 1438 1439 /* 1440 * In the non-WITNESS case, rw_assert() can only detect that at least 1441 * *some* thread owns an rlock, but it cannot guarantee that *this* 1442 * thread owns an rlock. 1443 */ 1444 void 1445 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) 1446 { 1447 const struct rwlock *rw; 1448 1449 if (SCHEDULER_STOPPED()) 1450 return; 1451 1452 rw = rwlock2rw(c); 1453 1454 switch (what) { 1455 case RA_LOCKED: 1456 case RA_LOCKED | RA_RECURSED: 1457 case RA_LOCKED | RA_NOTRECURSED: 1458 case RA_RLOCKED: 1459 case RA_RLOCKED | RA_RECURSED: 1460 case RA_RLOCKED | RA_NOTRECURSED: 1461 #ifdef WITNESS 1462 witness_assert(&rw->lock_object, what, file, line); 1463 #else 1464 /* 1465 * If some other thread has a write lock or we have one 1466 * and are asserting a read lock, fail. Also, if no one 1467 * has a lock at all, fail. 1468 */ 1469 if (rw->rw_lock == RW_UNLOCKED || 1470 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || 1471 rw_wowner(rw) != curthread))) 1472 panic("Lock %s not %slocked @ %s:%d\n", 1473 rw->lock_object.lo_name, (what & RA_RLOCKED) ? 1474 "read " : "", file, line); 1475 1476 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { 1477 if (rw_recursed(rw)) { 1478 if (what & RA_NOTRECURSED) 1479 panic("Lock %s recursed @ %s:%d\n", 1480 rw->lock_object.lo_name, file, 1481 line); 1482 } else if (what & RA_RECURSED) 1483 panic("Lock %s not recursed @ %s:%d\n", 1484 rw->lock_object.lo_name, file, line); 1485 } 1486 #endif 1487 break; 1488 case RA_WLOCKED: 1489 case RA_WLOCKED | RA_RECURSED: 1490 case RA_WLOCKED | RA_NOTRECURSED: 1491 if (rw_wowner(rw) != curthread) 1492 panic("Lock %s not exclusively locked @ %s:%d\n", 1493 rw->lock_object.lo_name, file, line); 1494 if (rw_recursed(rw)) { 1495 if (what & RA_NOTRECURSED) 1496 panic("Lock %s recursed @ %s:%d\n", 1497 rw->lock_object.lo_name, file, line); 1498 } else if (what & RA_RECURSED) 1499 panic("Lock %s not recursed @ %s:%d\n", 1500 rw->lock_object.lo_name, file, line); 1501 break; 1502 case RA_UNLOCKED: 1503 #ifdef WITNESS 1504 witness_assert(&rw->lock_object, what, file, line); 1505 #else 1506 /* 1507 * If we hold a write lock fail. We can't reliably check 1508 * to see if we hold a read lock or not. 1509 */ 1510 if (rw_wowner(rw) == curthread) 1511 panic("Lock %s exclusively locked @ %s:%d\n", 1512 rw->lock_object.lo_name, file, line); 1513 #endif 1514 break; 1515 default: 1516 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1517 line); 1518 } 1519 } 1520 #endif /* INVARIANT_SUPPORT */ 1521 1522 #ifdef DDB 1523 void 1524 db_show_rwlock(const struct lock_object *lock) 1525 { 1526 const struct rwlock *rw; 1527 struct thread *td; 1528 1529 rw = (const struct rwlock *)lock; 1530 1531 db_printf(" state: "); 1532 if (rw->rw_lock == RW_UNLOCKED) 1533 db_printf("UNLOCKED\n"); 1534 else if (rw->rw_lock == RW_DESTROYED) { 1535 db_printf("DESTROYED\n"); 1536 return; 1537 } else if (rw->rw_lock & RW_LOCK_READ) 1538 db_printf("RLOCK: %ju locks\n", 1539 (uintmax_t)(RW_READERS(rw->rw_lock))); 1540 else { 1541 td = rw_wowner(rw); 1542 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1543 td->td_tid, td->td_proc->p_pid, td->td_name); 1544 if (rw_recursed(rw)) 1545 db_printf(" recursed: %u\n", rw->rw_recurse); 1546 } 1547 db_printf(" waiters: "); 1548 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1549 case RW_LOCK_READ_WAITERS: 1550 db_printf("readers\n"); 1551 break; 1552 case RW_LOCK_WRITE_WAITERS: 1553 db_printf("writers\n"); 1554 break; 1555 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1556 db_printf("readers and writers\n"); 1557 break; 1558 default: 1559 db_printf("none\n"); 1560 break; 1561 } 1562 } 1563 1564 #endif 1565