1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Machine independent bits of reader/writer lock implementation. 30 */ 31 32 #include <sys/cdefs.h> 33 #include "opt_ddb.h" 34 #include "opt_hwpmc_hooks.h" 35 #include "opt_no_adaptive_rwlocks.h" 36 37 #include <sys/param.h> 38 #include <sys/kdb.h> 39 #include <sys/ktr.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/rwlock.h> 45 #include <sys/sched.h> 46 #include <sys/smp.h> 47 #include <sys/sysctl.h> 48 #include <sys/systm.h> 49 #include <sys/turnstile.h> 50 51 #include <machine/cpu.h> 52 53 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 54 #define ADAPTIVE_RWLOCKS 55 #endif 56 57 #ifdef HWPMC_HOOKS 58 #include <sys/pmckern.h> 59 PMC_SOFT_DECLARE( , , lock, failed); 60 #endif 61 62 /* 63 * Return the rwlock address when the lock cookie address is provided. 64 * This functionality assumes that struct rwlock* have a member named rw_lock. 65 */ 66 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) 67 68 #ifdef DDB 69 #include <ddb/ddb.h> 70 71 static void db_show_rwlock(const struct lock_object *lock); 72 #endif 73 static void assert_rw(const struct lock_object *lock, int what); 74 static void lock_rw(struct lock_object *lock, uintptr_t how); 75 static int trylock_rw(struct lock_object *lock, uintptr_t how); 76 #ifdef KDTRACE_HOOKS 77 static int owner_rw(const struct lock_object *lock, struct thread **owner); 78 #endif 79 static uintptr_t unlock_rw(struct lock_object *lock); 80 81 struct lock_class lock_class_rw = { 82 .lc_name = "rw", 83 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 84 .lc_assert = assert_rw, 85 #ifdef DDB 86 .lc_ddb_show = db_show_rwlock, 87 #endif 88 .lc_lock = lock_rw, 89 .lc_trylock = trylock_rw, 90 .lc_unlock = unlock_rw, 91 #ifdef KDTRACE_HOOKS 92 .lc_owner = owner_rw, 93 #endif 94 }; 95 96 #ifdef ADAPTIVE_RWLOCKS 97 #ifdef RWLOCK_CUSTOM_BACKOFF 98 static u_short __read_frequently rowner_retries; 99 static u_short __read_frequently rowner_loops; 100 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, 101 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 102 "rwlock debugging"); 103 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 104 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 105 106 static struct lock_delay_config __read_frequently rw_delay; 107 108 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 109 0, ""); 110 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 111 0, ""); 112 113 static void 114 rw_lock_delay_init(void *arg __unused) 115 { 116 117 lock_delay_default_init(&rw_delay); 118 rowner_retries = 10; 119 rowner_loops = max(10000, rw_delay.max); 120 } 121 LOCK_DELAY_SYSINIT(rw_lock_delay_init); 122 #else 123 #define rw_delay locks_delay 124 #define rowner_retries locks_delay_retries 125 #define rowner_loops locks_delay_loops 126 #endif 127 #endif 128 129 /* 130 * Return a pointer to the owning thread if the lock is write-locked or 131 * NULL if the lock is unlocked or read-locked. 132 */ 133 134 #define lv_rw_wowner(v) \ 135 ((v) & RW_LOCK_READ ? NULL : \ 136 (struct thread *)RW_OWNER((v))) 137 138 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) 139 140 /* 141 * Returns if a write owner is recursed. Write ownership is not assured 142 * here and should be previously checked. 143 */ 144 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 145 146 /* 147 * Return true if curthread helds the lock. 148 */ 149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 150 151 /* 152 * Return a pointer to the owning thread for this lock who should receive 153 * any priority lent by threads that block on this lock. Currently this 154 * is identical to rw_wowner(). 155 */ 156 #define rw_owner(rw) rw_wowner(rw) 157 158 #ifndef INVARIANTS 159 #define __rw_assert(c, what, file, line) 160 #endif 161 162 static void 163 assert_rw(const struct lock_object *lock, int what) 164 { 165 166 rw_assert((const struct rwlock *)lock, what); 167 } 168 169 static void 170 lock_rw(struct lock_object *lock, uintptr_t how) 171 { 172 struct rwlock *rw; 173 174 rw = (struct rwlock *)lock; 175 if (how) 176 rw_rlock(rw); 177 else 178 rw_wlock(rw); 179 } 180 181 static int 182 trylock_rw(struct lock_object *lock, uintptr_t how) 183 { 184 struct rwlock *rw; 185 186 rw = (struct rwlock *)lock; 187 if (how) 188 return (rw_try_rlock(rw)); 189 else 190 return (rw_try_wlock(rw)); 191 } 192 193 static uintptr_t 194 unlock_rw(struct lock_object *lock) 195 { 196 struct rwlock *rw; 197 198 rw = (struct rwlock *)lock; 199 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 200 if (rw->rw_lock & RW_LOCK_READ) { 201 rw_runlock(rw); 202 return (1); 203 } else { 204 rw_wunlock(rw); 205 return (0); 206 } 207 } 208 209 #ifdef KDTRACE_HOOKS 210 static int 211 owner_rw(const struct lock_object *lock, struct thread **owner) 212 { 213 const struct rwlock *rw = (const struct rwlock *)lock; 214 uintptr_t x = rw->rw_lock; 215 216 *owner = rw_wowner(rw); 217 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 218 (*owner != NULL)); 219 } 220 #endif 221 222 void 223 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) 224 { 225 struct rwlock *rw; 226 int flags; 227 228 rw = rwlock2rw(c); 229 230 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 231 RW_RECURSE | RW_NEW)) == 0); 232 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 233 ("%s: rw_lock not aligned for %s: %p", __func__, name, 234 &rw->rw_lock)); 235 236 flags = LO_UPGRADABLE; 237 if (opts & RW_DUPOK) 238 flags |= LO_DUPOK; 239 if (opts & RW_NOPROFILE) 240 flags |= LO_NOPROFILE; 241 if (!(opts & RW_NOWITNESS)) 242 flags |= LO_WITNESS; 243 if (opts & RW_RECURSE) 244 flags |= LO_RECURSABLE; 245 if (opts & RW_QUIET) 246 flags |= LO_QUIET; 247 if (opts & RW_NEW) 248 flags |= LO_NEW; 249 250 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 251 rw->rw_lock = RW_UNLOCKED; 252 rw->rw_recurse = 0; 253 } 254 255 void 256 _rw_destroy(volatile uintptr_t *c) 257 { 258 struct rwlock *rw; 259 260 rw = rwlock2rw(c); 261 262 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 263 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 264 rw->rw_lock = RW_DESTROYED; 265 lock_destroy(&rw->lock_object); 266 } 267 268 void 269 rw_sysinit(void *arg) 270 { 271 struct rw_args *args; 272 273 args = arg; 274 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, 275 args->ra_flags); 276 } 277 278 int 279 _rw_wowned(const volatile uintptr_t *c) 280 { 281 282 return (rw_wowner(rwlock2rw(c)) == curthread); 283 } 284 285 void 286 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) 287 { 288 struct rwlock *rw; 289 uintptr_t tid, v; 290 291 rw = rwlock2rw(c); 292 293 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 294 !TD_IS_IDLETHREAD(curthread), 295 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", 296 curthread, rw->lock_object.lo_name, file, line)); 297 KASSERT(rw->rw_lock != RW_DESTROYED, 298 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 299 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 300 line, NULL); 301 tid = (uintptr_t)curthread; 302 v = RW_UNLOCKED; 303 if (!_rw_write_lock_fetch(rw, &v, tid)) 304 _rw_wlock_hard(rw, v, file, line); 305 else 306 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 307 0, 0, file, line, LOCKSTAT_WRITER); 308 309 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 310 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 311 TD_LOCKS_INC(curthread); 312 } 313 314 int 315 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 316 { 317 struct thread *td; 318 uintptr_t tid, v; 319 int rval; 320 bool recursed; 321 322 td = curthread; 323 tid = (uintptr_t)td; 324 if (SCHEDULER_STOPPED()) 325 return (1); 326 327 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), 328 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", 329 curthread, rw->lock_object.lo_name, file, line)); 330 KASSERT(rw->rw_lock != RW_DESTROYED, 331 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 332 333 rval = 1; 334 recursed = false; 335 v = RW_UNLOCKED; 336 for (;;) { 337 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 338 break; 339 if (v == RW_UNLOCKED) 340 continue; 341 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { 342 rw->rw_recurse++; 343 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 344 break; 345 } 346 rval = 0; 347 break; 348 } 349 350 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 351 if (rval) { 352 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 353 file, line); 354 if (!recursed) 355 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 356 rw, 0, 0, file, line, LOCKSTAT_WRITER); 357 TD_LOCKS_INC(curthread); 358 } 359 return (rval); 360 } 361 362 int 363 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) 364 { 365 struct rwlock *rw; 366 367 rw = rwlock2rw(c); 368 return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG)); 369 } 370 371 void 372 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) 373 { 374 struct rwlock *rw; 375 376 rw = rwlock2rw(c); 377 378 KASSERT(rw->rw_lock != RW_DESTROYED, 379 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 380 __rw_assert(c, RA_WLOCKED, file, line); 381 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 382 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 383 line); 384 385 #ifdef LOCK_PROFILING 386 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); 387 #else 388 __rw_wunlock(rw, curthread, file, line); 389 #endif 390 391 TD_LOCKS_DEC(curthread); 392 } 393 394 /* 395 * Determines whether a new reader can acquire a lock. Succeeds if the 396 * reader already owns a read lock and the lock is locked for read to 397 * prevent deadlock from reader recursion. Also succeeds if the lock 398 * is unlocked and has no writer waiters or spinners. Failing otherwise 399 * prioritizes writers before readers. 400 */ 401 static __always_inline bool 402 __rw_can_read(struct thread *td, uintptr_t v, bool fp) 403 { 404 405 if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) 406 == RW_LOCK_READ) 407 return (true); 408 if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ)) 409 return (true); 410 return (false); 411 } 412 413 static __always_inline bool 414 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp 415 LOCK_FILE_LINE_ARG_DEF) 416 { 417 418 /* 419 * Handle the easy case. If no other thread has a write 420 * lock, then try to bump up the count of read locks. Note 421 * that we have to preserve the current state of the 422 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 423 * read lock, then rw_lock must have changed, so restart 424 * the loop. Note that this handles the case of a 425 * completely unlocked rwlock since such a lock is encoded 426 * as a read lock with no waiters. 427 */ 428 while (__rw_can_read(td, *vp, fp)) { 429 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, 430 *vp + RW_ONE_READER)) { 431 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 432 CTR4(KTR_LOCK, 433 "%s: %p succeed %p -> %p", __func__, 434 rw, (void *)*vp, 435 (void *)(*vp + RW_ONE_READER)); 436 td->td_rw_rlocks++; 437 return (true); 438 } 439 } 440 return (false); 441 } 442 443 static void __noinline 444 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 445 LOCK_FILE_LINE_ARG_DEF) 446 { 447 struct turnstile *ts; 448 struct thread *owner; 449 #ifdef ADAPTIVE_RWLOCKS 450 int spintries = 0; 451 int i, n; 452 #endif 453 #ifdef LOCK_PROFILING 454 uint64_t waittime = 0; 455 int contested = 0; 456 #endif 457 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 458 struct lock_delay_arg lda; 459 #endif 460 #ifdef KDTRACE_HOOKS 461 u_int sleep_cnt = 0; 462 int64_t sleep_time = 0; 463 int64_t all_time = 0; 464 uintptr_t state = 0; 465 #endif 466 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 467 int doing_lockprof = 0; 468 #endif 469 470 #ifdef KDTRACE_HOOKS 471 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 472 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 473 goto out_lockstat; 474 doing_lockprof = 1; 475 all_time -= lockstat_nsecs(&rw->lock_object); 476 } 477 state = v; 478 #endif 479 #ifdef LOCK_PROFILING 480 doing_lockprof = 1; 481 #endif 482 483 if (SCHEDULER_STOPPED()) 484 return; 485 486 #if defined(ADAPTIVE_RWLOCKS) 487 lock_delay_arg_init(&lda, &rw_delay); 488 #elif defined(KDTRACE_HOOKS) 489 lock_delay_arg_init_noadapt(&lda); 490 #endif 491 492 #ifdef HWPMC_HOOKS 493 PMC_SOFT_CALL( , , lock, failed); 494 #endif 495 lock_profile_obtain_lock_failed(&rw->lock_object, false, 496 &contested, &waittime); 497 498 THREAD_CONTENDS_ON_LOCK(&rw->lock_object); 499 500 for (;;) { 501 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 502 break; 503 #ifdef KDTRACE_HOOKS 504 lda.spin_cnt++; 505 #endif 506 507 #ifdef ADAPTIVE_RWLOCKS 508 /* 509 * If the owner is running on another CPU, spin until 510 * the owner stops running or the state of the lock 511 * changes. 512 */ 513 if ((v & RW_LOCK_READ) == 0) { 514 owner = (struct thread *)RW_OWNER(v); 515 if (TD_IS_RUNNING(owner)) { 516 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 517 CTR3(KTR_LOCK, 518 "%s: spinning on %p held by %p", 519 __func__, rw, owner); 520 KTR_STATE1(KTR_SCHED, "thread", 521 sched_tdname(curthread), "spinning", 522 "lockname:\"%s\"", rw->lock_object.lo_name); 523 do { 524 lock_delay(&lda); 525 v = RW_READ_VALUE(rw); 526 owner = lv_rw_wowner(v); 527 } while (owner != NULL && TD_IS_RUNNING(owner)); 528 KTR_STATE0(KTR_SCHED, "thread", 529 sched_tdname(curthread), "running"); 530 continue; 531 } 532 } else { 533 if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) { 534 MPASS(!__rw_can_read(td, v, false)); 535 lock_delay_spin(2); 536 v = RW_READ_VALUE(rw); 537 continue; 538 } 539 if (spintries < rowner_retries) { 540 spintries++; 541 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 542 "spinning", "lockname:\"%s\"", 543 rw->lock_object.lo_name); 544 n = RW_READERS(v); 545 for (i = 0; i < rowner_loops; i += n) { 546 lock_delay_spin(n); 547 v = RW_READ_VALUE(rw); 548 if (!(v & RW_LOCK_READ)) 549 break; 550 n = RW_READERS(v); 551 if (n == 0) 552 break; 553 if (__rw_can_read(td, v, false)) 554 break; 555 } 556 #ifdef KDTRACE_HOOKS 557 lda.spin_cnt += rowner_loops - i; 558 #endif 559 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 560 "running"); 561 if (i < rowner_loops) 562 continue; 563 } 564 } 565 #endif 566 567 /* 568 * Okay, now it's the hard case. Some other thread already 569 * has a write lock or there are write waiters present, 570 * acquire the turnstile lock so we can begin the process 571 * of blocking. 572 */ 573 ts = turnstile_trywait(&rw->lock_object); 574 575 /* 576 * The lock might have been released while we spun, so 577 * recheck its state and restart the loop if needed. 578 */ 579 v = RW_READ_VALUE(rw); 580 retry_ts: 581 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) || 582 __rw_can_read(td, v, false)) { 583 turnstile_cancel(ts); 584 continue; 585 } 586 587 owner = lv_rw_wowner(v); 588 589 #ifdef ADAPTIVE_RWLOCKS 590 /* 591 * The current lock owner might have started executing 592 * on another CPU (or the lock could have changed 593 * owners) while we were waiting on the turnstile 594 * chain lock. If so, drop the turnstile lock and try 595 * again. 596 */ 597 if (owner != NULL) { 598 if (TD_IS_RUNNING(owner)) { 599 turnstile_cancel(ts); 600 continue; 601 } 602 } 603 #endif 604 605 /* 606 * The lock is held in write mode or it already has waiters. 607 */ 608 MPASS(!__rw_can_read(td, v, false)); 609 610 /* 611 * If the RW_LOCK_READ_WAITERS flag is already set, then 612 * we can go ahead and block. If it is not set then try 613 * to set it. If we fail to set it drop the turnstile 614 * lock and restart the loop. 615 */ 616 if (!(v & RW_LOCK_READ_WAITERS)) { 617 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 618 v | RW_LOCK_READ_WAITERS)) 619 goto retry_ts; 620 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 621 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 622 __func__, rw); 623 } 624 625 /* 626 * We were unable to acquire the lock and the read waiters 627 * flag is set, so we must block on the turnstile. 628 */ 629 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 630 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 631 rw); 632 #ifdef KDTRACE_HOOKS 633 sleep_time -= lockstat_nsecs(&rw->lock_object); 634 #endif 635 MPASS(owner == rw_owner(rw)); 636 turnstile_wait(ts, owner, TS_SHARED_QUEUE); 637 #ifdef KDTRACE_HOOKS 638 sleep_time += lockstat_nsecs(&rw->lock_object); 639 sleep_cnt++; 640 #endif 641 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 642 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 643 __func__, rw); 644 v = RW_READ_VALUE(rw); 645 } 646 THREAD_CONTENTION_DONE(&rw->lock_object); 647 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 648 if (__predict_true(!doing_lockprof)) 649 return; 650 #endif 651 #ifdef KDTRACE_HOOKS 652 all_time += lockstat_nsecs(&rw->lock_object); 653 if (sleep_time) 654 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 655 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 656 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 657 658 /* Record only the loops spinning and not sleeping. */ 659 if (lda.spin_cnt > sleep_cnt) 660 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 661 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 662 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 663 out_lockstat: 664 #endif 665 /* 666 * TODO: acquire "owner of record" here. Here be turnstile dragons 667 * however. turnstiles don't like owners changing between calls to 668 * turnstile_wait() currently. 669 */ 670 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 671 waittime, file, line, LOCKSTAT_READER); 672 } 673 674 void 675 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 676 { 677 struct thread *td; 678 uintptr_t v; 679 680 td = curthread; 681 682 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 683 !TD_IS_IDLETHREAD(td), 684 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", 685 td, rw->lock_object.lo_name, file, line)); 686 KASSERT(rw->rw_lock != RW_DESTROYED, 687 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 688 KASSERT(rw_wowner(rw) != td, 689 ("rw_rlock: wlock already held for %s @ %s:%d", 690 rw->lock_object.lo_name, file, line)); 691 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 692 693 v = RW_READ_VALUE(rw); 694 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || 695 !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG))) 696 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 697 else 698 lock_profile_obtain_lock_success(&rw->lock_object, false, 0, 0, 699 file, line); 700 701 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 702 WITNESS_LOCK(&rw->lock_object, 0, file, line); 703 TD_LOCKS_INC(curthread); 704 } 705 706 void 707 __rw_rlock(volatile uintptr_t *c, const char *file, int line) 708 { 709 struct rwlock *rw; 710 711 rw = rwlock2rw(c); 712 __rw_rlock_int(rw LOCK_FILE_LINE_ARG); 713 } 714 715 int 716 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 717 { 718 uintptr_t x; 719 720 if (SCHEDULER_STOPPED()) 721 return (1); 722 723 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 724 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", 725 curthread, rw->lock_object.lo_name, file, line)); 726 727 x = rw->rw_lock; 728 for (;;) { 729 KASSERT(rw->rw_lock != RW_DESTROYED, 730 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 731 if (!(x & RW_LOCK_READ)) 732 break; 733 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { 734 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 735 line); 736 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 737 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 738 rw, 0, 0, file, line, LOCKSTAT_READER); 739 TD_LOCKS_INC(curthread); 740 curthread->td_rw_rlocks++; 741 return (1); 742 } 743 } 744 745 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 746 return (0); 747 } 748 749 int 750 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) 751 { 752 struct rwlock *rw; 753 754 rw = rwlock2rw(c); 755 return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG)); 756 } 757 758 static __always_inline bool 759 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) 760 { 761 762 for (;;) { 763 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) { 764 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 765 *vp - RW_ONE_READER)) { 766 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 767 CTR4(KTR_LOCK, 768 "%s: %p succeeded %p -> %p", 769 __func__, rw, (void *)*vp, 770 (void *)(*vp - RW_ONE_READER)); 771 td->td_rw_rlocks--; 772 return (true); 773 } 774 continue; 775 } 776 break; 777 } 778 return (false); 779 } 780 781 static void __noinline 782 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 783 LOCK_FILE_LINE_ARG_DEF) 784 { 785 struct turnstile *ts; 786 uintptr_t setv, passedv, queue; 787 788 if (SCHEDULER_STOPPED()) 789 return; 790 791 passedv = v; 792 if (__rw_runlock_try(rw, td, &v)) 793 goto out_lockstat; 794 795 /* 796 * Ok, we know we have waiters and we think we are the 797 * last reader, so grab the turnstile lock. 798 */ 799 turnstile_chain_lock(&rw->lock_object); 800 v = RW_READ_VALUE(rw); 801 for (;;) { 802 if (__rw_runlock_try(rw, td, &v)) 803 break; 804 805 MPASS(v & RW_LOCK_WAITERS); 806 807 /* 808 * Try to drop our lock leaving the lock in a unlocked 809 * state. 810 * 811 * If you wanted to do explicit lock handoff you'd have to 812 * do it here. You'd also want to use turnstile_signal() 813 * and you'd have to handle the race where a higher 814 * priority thread blocks on the write lock before the 815 * thread you wakeup actually runs and have the new thread 816 * "steal" the lock. For now it's a lot simpler to just 817 * wakeup all of the waiters. 818 * 819 * As above, if we fail, then another thread might have 820 * acquired a read lock, so drop the turnstile lock and 821 * restart. 822 */ 823 setv = RW_UNLOCKED; 824 queue = TS_SHARED_QUEUE; 825 if (v & RW_LOCK_WRITE_WAITERS) { 826 queue = TS_EXCLUSIVE_QUEUE; 827 setv |= (v & RW_LOCK_READ_WAITERS); 828 } 829 setv |= (v & RW_LOCK_WRITE_SPINNER); 830 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv)) 831 continue; 832 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 833 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 834 __func__, rw); 835 836 /* 837 * Ok. The lock is released and all that's left is to 838 * wake up the waiters. Note that the lock might not be 839 * free anymore, but in that case the writers will just 840 * block again if they run before the new lock holder(s) 841 * release the lock. 842 */ 843 ts = turnstile_lookup(&rw->lock_object); 844 if (__predict_false(ts == NULL)) { 845 panic("got NULL turnstile on rwlock %p passedv %p v %p", 846 rw, (void *)passedv, (void *)v); 847 } 848 turnstile_broadcast(ts, queue); 849 turnstile_unpend(ts); 850 td->td_rw_rlocks--; 851 break; 852 } 853 turnstile_chain_unlock(&rw->lock_object); 854 out_lockstat: 855 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); 856 } 857 858 void 859 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 860 { 861 struct thread *td; 862 uintptr_t v; 863 864 KASSERT(rw->rw_lock != RW_DESTROYED, 865 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 866 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 867 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 868 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 869 870 td = curthread; 871 v = RW_READ_VALUE(rw); 872 873 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || 874 !__rw_runlock_try(rw, td, &v))) 875 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 876 else 877 lock_profile_release_lock(&rw->lock_object, false); 878 879 TD_LOCKS_DEC(curthread); 880 } 881 882 void 883 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) 884 { 885 struct rwlock *rw; 886 887 rw = rwlock2rw(c); 888 _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG); 889 } 890 891 #ifdef ADAPTIVE_RWLOCKS 892 static inline void 893 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work) 894 { 895 896 if (v & RW_LOCK_WRITE_SPINNER) 897 return; 898 if (*in_critical) { 899 critical_exit(); 900 *in_critical = false; 901 (*extra_work)--; 902 } 903 } 904 #else 905 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0) 906 #endif 907 908 /* 909 * This function is called when we are unable to obtain a write lock on the 910 * first try. This means that at least one other thread holds either a 911 * read or write lock. 912 */ 913 void 914 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 915 { 916 uintptr_t tid; 917 struct rwlock *rw; 918 struct turnstile *ts; 919 struct thread *owner; 920 #ifdef ADAPTIVE_RWLOCKS 921 int spintries = 0; 922 int i, n; 923 enum { READERS, WRITER } sleep_reason = READERS; 924 bool in_critical = false; 925 #endif 926 uintptr_t setv; 927 #ifdef LOCK_PROFILING 928 uint64_t waittime = 0; 929 int contested = 0; 930 #endif 931 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 932 struct lock_delay_arg lda; 933 #endif 934 #ifdef KDTRACE_HOOKS 935 u_int sleep_cnt = 0; 936 int64_t sleep_time = 0; 937 int64_t all_time = 0; 938 uintptr_t state = 0; 939 #endif 940 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 941 int doing_lockprof = 0; 942 #endif 943 int extra_work = 0; 944 945 tid = (uintptr_t)curthread; 946 rw = rwlock2rw(c); 947 948 #ifdef KDTRACE_HOOKS 949 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 950 while (v == RW_UNLOCKED) { 951 if (_rw_write_lock_fetch(rw, &v, tid)) 952 goto out_lockstat; 953 } 954 extra_work = 1; 955 doing_lockprof = 1; 956 all_time -= lockstat_nsecs(&rw->lock_object); 957 } 958 state = v; 959 #endif 960 #ifdef LOCK_PROFILING 961 extra_work = 1; 962 doing_lockprof = 1; 963 #endif 964 965 if (SCHEDULER_STOPPED()) 966 return; 967 968 if (__predict_false(v == RW_UNLOCKED)) 969 v = RW_READ_VALUE(rw); 970 971 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { 972 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 973 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 974 __func__, rw->lock_object.lo_name, file, line)); 975 rw->rw_recurse++; 976 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 977 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 978 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 979 return; 980 } 981 982 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 983 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 984 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 985 986 #if defined(ADAPTIVE_RWLOCKS) 987 lock_delay_arg_init(&lda, &rw_delay); 988 #elif defined(KDTRACE_HOOKS) 989 lock_delay_arg_init_noadapt(&lda); 990 #endif 991 992 #ifdef HWPMC_HOOKS 993 PMC_SOFT_CALL( , , lock, failed); 994 #endif 995 lock_profile_obtain_lock_failed(&rw->lock_object, false, 996 &contested, &waittime); 997 998 THREAD_CONTENDS_ON_LOCK(&rw->lock_object); 999 1000 for (;;) { 1001 if (v == RW_UNLOCKED) { 1002 if (_rw_write_lock_fetch(rw, &v, tid)) 1003 break; 1004 continue; 1005 } 1006 #ifdef KDTRACE_HOOKS 1007 lda.spin_cnt++; 1008 #endif 1009 1010 #ifdef ADAPTIVE_RWLOCKS 1011 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) { 1012 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 1013 break; 1014 continue; 1015 } 1016 1017 /* 1018 * If the lock is write locked and the owner is 1019 * running on another CPU, spin until the owner stops 1020 * running or the state of the lock changes. 1021 */ 1022 if (!(v & RW_LOCK_READ)) { 1023 rw_drop_critical(v, &in_critical, &extra_work); 1024 sleep_reason = WRITER; 1025 owner = lv_rw_wowner(v); 1026 if (!TD_IS_RUNNING(owner)) 1027 goto ts; 1028 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1029 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 1030 __func__, rw, owner); 1031 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1032 "spinning", "lockname:\"%s\"", 1033 rw->lock_object.lo_name); 1034 do { 1035 lock_delay(&lda); 1036 v = RW_READ_VALUE(rw); 1037 owner = lv_rw_wowner(v); 1038 } while (owner != NULL && TD_IS_RUNNING(owner)); 1039 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1040 "running"); 1041 continue; 1042 } else if (RW_READERS(v) > 0) { 1043 sleep_reason = READERS; 1044 if (spintries == rowner_retries) 1045 goto ts; 1046 if (!(v & RW_LOCK_WRITE_SPINNER)) { 1047 if (!in_critical) { 1048 critical_enter(); 1049 in_critical = true; 1050 extra_work++; 1051 } 1052 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1053 v | RW_LOCK_WRITE_SPINNER)) { 1054 critical_exit(); 1055 in_critical = false; 1056 extra_work--; 1057 continue; 1058 } 1059 } 1060 spintries++; 1061 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1062 "spinning", "lockname:\"%s\"", 1063 rw->lock_object.lo_name); 1064 n = RW_READERS(v); 1065 for (i = 0; i < rowner_loops; i += n) { 1066 lock_delay_spin(n); 1067 v = RW_READ_VALUE(rw); 1068 if (!(v & RW_LOCK_WRITE_SPINNER)) 1069 break; 1070 if (!(v & RW_LOCK_READ)) 1071 break; 1072 n = RW_READERS(v); 1073 if (n == 0) 1074 break; 1075 } 1076 #ifdef KDTRACE_HOOKS 1077 lda.spin_cnt += i; 1078 #endif 1079 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1080 "running"); 1081 if (i < rowner_loops) 1082 continue; 1083 } 1084 ts: 1085 #endif 1086 ts = turnstile_trywait(&rw->lock_object); 1087 v = RW_READ_VALUE(rw); 1088 retry_ts: 1089 owner = lv_rw_wowner(v); 1090 1091 #ifdef ADAPTIVE_RWLOCKS 1092 /* 1093 * The current lock owner might have started executing 1094 * on another CPU (or the lock could have changed 1095 * owners) while we were waiting on the turnstile 1096 * chain lock. If so, drop the turnstile lock and try 1097 * again. 1098 */ 1099 if (owner != NULL) { 1100 if (TD_IS_RUNNING(owner)) { 1101 turnstile_cancel(ts); 1102 rw_drop_critical(v, &in_critical, &extra_work); 1103 continue; 1104 } 1105 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) { 1106 turnstile_cancel(ts); 1107 rw_drop_critical(v, &in_critical, &extra_work); 1108 continue; 1109 } 1110 #endif 1111 /* 1112 * Check for the waiters flags about this rwlock. 1113 * If the lock was released, without maintain any pending 1114 * waiters queue, simply try to acquire it. 1115 * If a pending waiters queue is present, claim the lock 1116 * ownership and maintain the pending queue. 1117 */ 1118 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 1119 if ((v & ~setv) == RW_UNLOCKED) { 1120 setv &= ~RW_LOCK_WRITE_SPINNER; 1121 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) { 1122 if (setv) 1123 turnstile_claim(ts); 1124 else 1125 turnstile_cancel(ts); 1126 break; 1127 } 1128 goto retry_ts; 1129 } 1130 1131 #ifdef ADAPTIVE_RWLOCKS 1132 if (in_critical) { 1133 if ((v & RW_LOCK_WRITE_SPINNER) || 1134 !((v & RW_LOCK_WRITE_WAITERS))) { 1135 setv = v & ~RW_LOCK_WRITE_SPINNER; 1136 setv |= RW_LOCK_WRITE_WAITERS; 1137 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv)) 1138 goto retry_ts; 1139 } 1140 critical_exit(); 1141 in_critical = false; 1142 extra_work--; 1143 } else { 1144 #endif 1145 /* 1146 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 1147 * set it. If we fail to set it, then loop back and try 1148 * again. 1149 */ 1150 if (!(v & RW_LOCK_WRITE_WAITERS)) { 1151 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1152 v | RW_LOCK_WRITE_WAITERS)) 1153 goto retry_ts; 1154 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1155 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 1156 __func__, rw); 1157 } 1158 #ifdef ADAPTIVE_RWLOCKS 1159 } 1160 #endif 1161 /* 1162 * We were unable to acquire the lock and the write waiters 1163 * flag is set, so we must block on the turnstile. 1164 */ 1165 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1166 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 1167 rw); 1168 #ifdef KDTRACE_HOOKS 1169 sleep_time -= lockstat_nsecs(&rw->lock_object); 1170 #endif 1171 MPASS(owner == rw_owner(rw)); 1172 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); 1173 #ifdef KDTRACE_HOOKS 1174 sleep_time += lockstat_nsecs(&rw->lock_object); 1175 sleep_cnt++; 1176 #endif 1177 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1178 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 1179 __func__, rw); 1180 #ifdef ADAPTIVE_RWLOCKS 1181 spintries = 0; 1182 #endif 1183 v = RW_READ_VALUE(rw); 1184 } 1185 THREAD_CONTENTION_DONE(&rw->lock_object); 1186 if (__predict_true(!extra_work)) 1187 return; 1188 #ifdef ADAPTIVE_RWLOCKS 1189 if (in_critical) 1190 critical_exit(); 1191 #endif 1192 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 1193 if (__predict_true(!doing_lockprof)) 1194 return; 1195 #endif 1196 #ifdef KDTRACE_HOOKS 1197 all_time += lockstat_nsecs(&rw->lock_object); 1198 if (sleep_time) 1199 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 1200 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1201 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1202 1203 /* Record only the loops spinning and not sleeping. */ 1204 if (lda.spin_cnt > sleep_cnt) 1205 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 1206 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1207 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1208 out_lockstat: 1209 #endif 1210 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 1211 waittime, file, line, LOCKSTAT_WRITER); 1212 } 1213 1214 /* 1215 * This function is called if lockstat is active or the first try at releasing 1216 * a write lock failed. The latter means that the lock is recursed or one of 1217 * the 2 waiter bits must be set indicating that at least one thread is waiting 1218 * on this lock. 1219 */ 1220 void 1221 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 1222 { 1223 struct rwlock *rw; 1224 struct turnstile *ts; 1225 uintptr_t tid, setv, passedv; 1226 int queue; 1227 1228 tid = (uintptr_t)curthread; 1229 if (SCHEDULER_STOPPED()) 1230 return; 1231 1232 rw = rwlock2rw(c); 1233 if (__predict_false(v == tid)) 1234 v = RW_READ_VALUE(rw); 1235 1236 if (v & RW_LOCK_WRITER_RECURSED) { 1237 if (--(rw->rw_recurse) == 0) 1238 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 1239 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1240 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 1241 return; 1242 } 1243 1244 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); 1245 if (v == tid && _rw_write_unlock(rw, tid)) 1246 return; 1247 1248 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 1249 ("%s: neither of the waiter flags are set", __func__)); 1250 1251 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1252 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 1253 1254 turnstile_chain_lock(&rw->lock_object); 1255 1256 /* 1257 * Use the same algo as sx locks for now. Prefer waking up shared 1258 * waiters if we have any over writers. This is probably not ideal. 1259 * 1260 * 'v' is the value we are going to write back to rw_lock. If we 1261 * have waiters on both queues, we need to preserve the state of 1262 * the waiter flag for the queue we don't wake up. For now this is 1263 * hardcoded for the algorithm mentioned above. 1264 * 1265 * In the case of both readers and writers waiting we wakeup the 1266 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 1267 * new writer comes in before a reader it will claim the lock up 1268 * above. There is probably a potential priority inversion in 1269 * there that could be worked around either by waking both queues 1270 * of waiters or doing some complicated lock handoff gymnastics. 1271 */ 1272 setv = RW_UNLOCKED; 1273 passedv = v; 1274 v = RW_READ_VALUE(rw); 1275 queue = TS_SHARED_QUEUE; 1276 if (v & RW_LOCK_WRITE_WAITERS) { 1277 queue = TS_EXCLUSIVE_QUEUE; 1278 setv |= (v & RW_LOCK_READ_WAITERS); 1279 } 1280 atomic_store_rel_ptr(&rw->rw_lock, setv); 1281 1282 /* Wake up all waiters for the specific queue. */ 1283 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1284 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 1285 queue == TS_SHARED_QUEUE ? "read" : "write"); 1286 1287 ts = turnstile_lookup(&rw->lock_object); 1288 if (__predict_false(ts == NULL)) { 1289 panic("got NULL turnstile on rwlock %p passedv %p v %p", rw, 1290 (void *)passedv, (void *)v); 1291 } 1292 turnstile_broadcast(ts, queue); 1293 turnstile_unpend(ts); 1294 turnstile_chain_unlock(&rw->lock_object); 1295 } 1296 1297 /* 1298 * Attempt to do a non-blocking upgrade from a read lock to a write 1299 * lock. This will only succeed if this thread holds a single read 1300 * lock. Returns true if the upgrade succeeded and false otherwise. 1301 */ 1302 int 1303 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1304 { 1305 uintptr_t v, setv, tid; 1306 struct turnstile *ts; 1307 int success; 1308 1309 if (SCHEDULER_STOPPED()) 1310 return (1); 1311 1312 KASSERT(rw->rw_lock != RW_DESTROYED, 1313 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 1314 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 1315 1316 /* 1317 * Attempt to switch from one reader to a writer. If there 1318 * are any write waiters, then we will have to lock the 1319 * turnstile first to prevent races with another writer 1320 * calling turnstile_wait() before we have claimed this 1321 * turnstile. So, do the simple case of no waiters first. 1322 */ 1323 tid = (uintptr_t)curthread; 1324 success = 0; 1325 v = RW_READ_VALUE(rw); 1326 for (;;) { 1327 if (RW_READERS(v) > 1) 1328 break; 1329 if (!(v & RW_LOCK_WAITERS)) { 1330 success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid); 1331 if (!success) 1332 continue; 1333 break; 1334 } 1335 1336 /* 1337 * Ok, we think we have waiters, so lock the turnstile. 1338 */ 1339 ts = turnstile_trywait(&rw->lock_object); 1340 v = RW_READ_VALUE(rw); 1341 retry_ts: 1342 if (RW_READERS(v) > 1) { 1343 turnstile_cancel(ts); 1344 break; 1345 } 1346 /* 1347 * Try to switch from one reader to a writer again. This time 1348 * we honor the current state of the waiters flags. 1349 * If we obtain the lock with the flags set, then claim 1350 * ownership of the turnstile. 1351 */ 1352 setv = tid | (v & RW_LOCK_WAITERS); 1353 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv); 1354 if (success) { 1355 if (v & RW_LOCK_WAITERS) 1356 turnstile_claim(ts); 1357 else 1358 turnstile_cancel(ts); 1359 break; 1360 } 1361 goto retry_ts; 1362 } 1363 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 1364 if (success) { 1365 curthread->td_rw_rlocks--; 1366 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 1367 file, line); 1368 LOCKSTAT_RECORD0(rw__upgrade, rw); 1369 } 1370 return (success); 1371 } 1372 1373 int 1374 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) 1375 { 1376 struct rwlock *rw; 1377 1378 rw = rwlock2rw(c); 1379 return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG)); 1380 } 1381 1382 /* 1383 * Downgrade a write lock into a single read lock. 1384 */ 1385 void 1386 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1387 { 1388 struct turnstile *ts; 1389 uintptr_t tid, v; 1390 int rwait, wwait; 1391 1392 if (SCHEDULER_STOPPED()) 1393 return; 1394 1395 KASSERT(rw->rw_lock != RW_DESTROYED, 1396 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 1397 __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line); 1398 #ifndef INVARIANTS 1399 if (rw_recursed(rw)) 1400 panic("downgrade of a recursed lock"); 1401 #endif 1402 1403 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1404 1405 /* 1406 * Convert from a writer to a single reader. First we handle 1407 * the easy case with no waiters. If there are any waiters, we 1408 * lock the turnstile and "disown" the lock. 1409 */ 1410 tid = (uintptr_t)curthread; 1411 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1412 goto out; 1413 1414 /* 1415 * Ok, we think we have waiters, so lock the turnstile so we can 1416 * read the waiter flags without any races. 1417 */ 1418 turnstile_chain_lock(&rw->lock_object); 1419 v = rw->rw_lock & RW_LOCK_WAITERS; 1420 rwait = v & RW_LOCK_READ_WAITERS; 1421 wwait = v & RW_LOCK_WRITE_WAITERS; 1422 MPASS(rwait | wwait); 1423 1424 /* 1425 * Downgrade from a write lock while preserving waiters flag 1426 * and give up ownership of the turnstile. 1427 */ 1428 ts = turnstile_lookup(&rw->lock_object); 1429 MPASS(ts != NULL); 1430 if (!wwait) 1431 v &= ~RW_LOCK_READ_WAITERS; 1432 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1433 /* 1434 * Wake other readers if there are no writers pending. Otherwise they 1435 * won't be able to acquire the lock anyway. 1436 */ 1437 if (rwait && !wwait) { 1438 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1439 turnstile_unpend(ts); 1440 } else 1441 turnstile_disown(ts); 1442 turnstile_chain_unlock(&rw->lock_object); 1443 out: 1444 curthread->td_rw_rlocks++; 1445 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1446 LOCKSTAT_RECORD0(rw__downgrade, rw); 1447 } 1448 1449 void 1450 __rw_downgrade(volatile uintptr_t *c, const char *file, int line) 1451 { 1452 struct rwlock *rw; 1453 1454 rw = rwlock2rw(c); 1455 __rw_downgrade_int(rw LOCK_FILE_LINE_ARG); 1456 } 1457 1458 #ifdef INVARIANT_SUPPORT 1459 #ifndef INVARIANTS 1460 #undef __rw_assert 1461 #endif 1462 1463 /* 1464 * In the non-WITNESS case, rw_assert() can only detect that at least 1465 * *some* thread owns an rlock, but it cannot guarantee that *this* 1466 * thread owns an rlock. 1467 */ 1468 void 1469 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) 1470 { 1471 const struct rwlock *rw; 1472 1473 if (SCHEDULER_STOPPED()) 1474 return; 1475 1476 rw = rwlock2rw(c); 1477 1478 switch (what) { 1479 case RA_LOCKED: 1480 case RA_LOCKED | RA_RECURSED: 1481 case RA_LOCKED | RA_NOTRECURSED: 1482 case RA_RLOCKED: 1483 case RA_RLOCKED | RA_RECURSED: 1484 case RA_RLOCKED | RA_NOTRECURSED: 1485 #ifdef WITNESS 1486 witness_assert(&rw->lock_object, what, file, line); 1487 #else 1488 /* 1489 * If some other thread has a write lock or we have one 1490 * and are asserting a read lock, fail. Also, if no one 1491 * has a lock at all, fail. 1492 */ 1493 if (rw->rw_lock == RW_UNLOCKED || 1494 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || 1495 rw_wowner(rw) != curthread))) 1496 panic("Lock %s not %slocked @ %s:%d\n", 1497 rw->lock_object.lo_name, (what & RA_RLOCKED) ? 1498 "read " : "", file, line); 1499 1500 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { 1501 if (rw_recursed(rw)) { 1502 if (what & RA_NOTRECURSED) 1503 panic("Lock %s recursed @ %s:%d\n", 1504 rw->lock_object.lo_name, file, 1505 line); 1506 } else if (what & RA_RECURSED) 1507 panic("Lock %s not recursed @ %s:%d\n", 1508 rw->lock_object.lo_name, file, line); 1509 } 1510 #endif 1511 break; 1512 case RA_WLOCKED: 1513 case RA_WLOCKED | RA_RECURSED: 1514 case RA_WLOCKED | RA_NOTRECURSED: 1515 if (rw_wowner(rw) != curthread) 1516 panic("Lock %s not exclusively locked @ %s:%d\n", 1517 rw->lock_object.lo_name, file, line); 1518 if (rw_recursed(rw)) { 1519 if (what & RA_NOTRECURSED) 1520 panic("Lock %s recursed @ %s:%d\n", 1521 rw->lock_object.lo_name, file, line); 1522 } else if (what & RA_RECURSED) 1523 panic("Lock %s not recursed @ %s:%d\n", 1524 rw->lock_object.lo_name, file, line); 1525 break; 1526 case RA_UNLOCKED: 1527 #ifdef WITNESS 1528 witness_assert(&rw->lock_object, what, file, line); 1529 #else 1530 /* 1531 * If we hold a write lock fail. We can't reliably check 1532 * to see if we hold a read lock or not. 1533 */ 1534 if (rw_wowner(rw) == curthread) 1535 panic("Lock %s exclusively locked @ %s:%d\n", 1536 rw->lock_object.lo_name, file, line); 1537 #endif 1538 break; 1539 default: 1540 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1541 line); 1542 } 1543 } 1544 #endif /* INVARIANT_SUPPORT */ 1545 1546 #ifdef DDB 1547 static void 1548 db_show_rwlock(const struct lock_object *lock) 1549 { 1550 const struct rwlock *rw; 1551 struct thread *td; 1552 1553 rw = (const struct rwlock *)lock; 1554 1555 db_printf(" state: "); 1556 if (rw->rw_lock == RW_UNLOCKED) 1557 db_printf("UNLOCKED\n"); 1558 else if (rw->rw_lock == RW_DESTROYED) { 1559 db_printf("DESTROYED\n"); 1560 return; 1561 } else if (rw->rw_lock & RW_LOCK_READ) 1562 db_printf("RLOCK: %ju locks\n", 1563 (uintmax_t)(RW_READERS(rw->rw_lock))); 1564 else { 1565 td = rw_wowner(rw); 1566 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1567 td->td_tid, td->td_proc->p_pid, td->td_name); 1568 if (rw_recursed(rw)) 1569 db_printf(" recursed: %u\n", rw->rw_recurse); 1570 } 1571 db_printf(" waiters: "); 1572 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1573 case RW_LOCK_READ_WAITERS: 1574 db_printf("readers\n"); 1575 break; 1576 case RW_LOCK_WRITE_WAITERS: 1577 db_printf("writers\n"); 1578 break; 1579 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1580 db_printf("readers and writers\n"); 1581 break; 1582 default: 1583 db_printf("none\n"); 1584 break; 1585 } 1586 } 1587 1588 #endif 1589