1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Machine independent bits of reader/writer lock implementation. 30 */ 31 32 #include <sys/cdefs.h> 33 #include "opt_ddb.h" 34 #include "opt_hwpmc_hooks.h" 35 #include "opt_no_adaptive_rwlocks.h" 36 37 #include <sys/param.h> 38 #include <sys/kdb.h> 39 #include <sys/ktr.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/rwlock.h> 45 #include <sys/sched.h> 46 #include <sys/smp.h> 47 #include <sys/sysctl.h> 48 #include <sys/systm.h> 49 #include <sys/turnstile.h> 50 51 #include <machine/cpu.h> 52 53 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 54 #define ADAPTIVE_RWLOCKS 55 #endif 56 57 #ifdef HWPMC_HOOKS 58 #include <sys/pmckern.h> 59 PMC_SOFT_DECLARE( , , lock, failed); 60 #endif 61 62 /* 63 * Return the rwlock address when the lock cookie address is provided. 64 * This functionality assumes that struct rwlock* have a member named rw_lock. 65 */ 66 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) 67 68 #ifdef DDB 69 #include <ddb/ddb.h> 70 71 static void db_show_rwlock(const struct lock_object *lock); 72 #endif 73 static void assert_rw(const struct lock_object *lock, int what); 74 static void lock_rw(struct lock_object *lock, uintptr_t how); 75 static int trylock_rw(struct lock_object *lock, uintptr_t how); 76 #ifdef KDTRACE_HOOKS 77 static int owner_rw(const struct lock_object *lock, struct thread **owner); 78 #endif 79 static uintptr_t unlock_rw(struct lock_object *lock); 80 81 struct lock_class lock_class_rw = { 82 .lc_name = "rw", 83 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 84 .lc_assert = assert_rw, 85 #ifdef DDB 86 .lc_ddb_show = db_show_rwlock, 87 #endif 88 .lc_lock = lock_rw, 89 .lc_trylock = trylock_rw, 90 .lc_unlock = unlock_rw, 91 #ifdef KDTRACE_HOOKS 92 .lc_owner = owner_rw, 93 #endif 94 }; 95 96 #ifdef ADAPTIVE_RWLOCKS 97 #ifdef RWLOCK_CUSTOM_BACKOFF 98 static u_short __read_frequently rowner_retries; 99 static u_short __read_frequently rowner_loops; 100 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, 101 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 102 "rwlock debugging"); 103 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 104 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 105 106 static struct lock_delay_config __read_frequently rw_delay; 107 108 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 109 0, ""); 110 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 111 0, ""); 112 113 static void 114 rw_lock_delay_init(void *arg __unused) 115 { 116 117 lock_delay_default_init(&rw_delay); 118 rowner_retries = 10; 119 rowner_loops = max(10000, rw_delay.max); 120 } 121 LOCK_DELAY_SYSINIT(rw_lock_delay_init); 122 #else 123 #define rw_delay locks_delay 124 #define rowner_retries locks_delay_retries 125 #define rowner_loops locks_delay_loops 126 #endif 127 #endif 128 129 /* 130 * Return a pointer to the owning thread if the lock is write-locked or 131 * NULL if the lock is unlocked or read-locked. 132 */ 133 134 #define lv_rw_wowner(v) \ 135 ((v) & RW_LOCK_READ ? NULL : \ 136 (struct thread *)RW_OWNER((v))) 137 138 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) 139 140 /* 141 * Returns if a write owner is recursed. Write ownership is not assured 142 * here and should be previously checked. 143 */ 144 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 145 146 /* 147 * Return true if curthread helds the lock. 148 */ 149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 150 151 /* 152 * Return a pointer to the owning thread for this lock who should receive 153 * any priority lent by threads that block on this lock. Currently this 154 * is identical to rw_wowner(). 155 */ 156 #define rw_owner(rw) rw_wowner(rw) 157 158 #ifndef INVARIANTS 159 #define __rw_assert(c, what, file, line) 160 #endif 161 162 static void 163 assert_rw(const struct lock_object *lock, int what) 164 { 165 166 rw_assert((const struct rwlock *)lock, what); 167 } 168 169 static void 170 lock_rw(struct lock_object *lock, uintptr_t how) 171 { 172 struct rwlock *rw; 173 174 rw = (struct rwlock *)lock; 175 if (how) 176 rw_rlock(rw); 177 else 178 rw_wlock(rw); 179 } 180 181 static int 182 trylock_rw(struct lock_object *lock, uintptr_t how) 183 { 184 struct rwlock *rw; 185 186 rw = (struct rwlock *)lock; 187 if (how) 188 return (rw_try_rlock(rw)); 189 else 190 return (rw_try_wlock(rw)); 191 } 192 193 static uintptr_t 194 unlock_rw(struct lock_object *lock) 195 { 196 struct rwlock *rw; 197 198 rw = (struct rwlock *)lock; 199 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 200 if (rw->rw_lock & RW_LOCK_READ) { 201 rw_runlock(rw); 202 return (1); 203 } else { 204 rw_wunlock(rw); 205 return (0); 206 } 207 } 208 209 #ifdef KDTRACE_HOOKS 210 static int 211 owner_rw(const struct lock_object *lock, struct thread **owner) 212 { 213 const struct rwlock *rw = (const struct rwlock *)lock; 214 uintptr_t x = rw->rw_lock; 215 216 *owner = rw_wowner(rw); 217 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 218 (*owner != NULL)); 219 } 220 #endif 221 222 void 223 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) 224 { 225 struct rwlock *rw; 226 int flags; 227 228 rw = rwlock2rw(c); 229 230 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 231 RW_RECURSE | RW_NEW)) == 0); 232 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 233 ("%s: rw_lock not aligned for %s: %p", __func__, name, 234 &rw->rw_lock)); 235 236 flags = LO_UPGRADABLE; 237 if (opts & RW_DUPOK) 238 flags |= LO_DUPOK; 239 if (opts & RW_NOPROFILE) 240 flags |= LO_NOPROFILE; 241 if (!(opts & RW_NOWITNESS)) 242 flags |= LO_WITNESS; 243 if (opts & RW_RECURSE) 244 flags |= LO_RECURSABLE; 245 if (opts & RW_QUIET) 246 flags |= LO_QUIET; 247 if (opts & RW_NEW) 248 flags |= LO_NEW; 249 250 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 251 rw->rw_lock = RW_UNLOCKED; 252 rw->rw_recurse = 0; 253 } 254 255 void 256 _rw_destroy(volatile uintptr_t *c) 257 { 258 struct rwlock *rw; 259 260 rw = rwlock2rw(c); 261 262 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 263 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 264 rw->rw_lock = RW_DESTROYED; 265 lock_destroy(&rw->lock_object); 266 } 267 268 void 269 rw_sysinit(void *arg) 270 { 271 struct rw_args *args; 272 273 args = arg; 274 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, 275 args->ra_flags); 276 } 277 278 int 279 _rw_wowned(const volatile uintptr_t *c) 280 { 281 282 return (rw_wowner(rwlock2rw(c)) == curthread); 283 } 284 285 void 286 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) 287 { 288 struct rwlock *rw; 289 uintptr_t tid, v; 290 291 rw = rwlock2rw(c); 292 293 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 294 !TD_IS_IDLETHREAD(curthread), 295 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", 296 curthread, rw->lock_object.lo_name, file, line)); 297 KASSERT(rw->rw_lock != RW_DESTROYED, 298 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 299 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 300 line, NULL); 301 tid = (uintptr_t)curthread; 302 v = RW_UNLOCKED; 303 if (!_rw_write_lock_fetch(rw, &v, tid)) 304 _rw_wlock_hard(rw, v, file, line); 305 else 306 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 307 0, 0, file, line, LOCKSTAT_WRITER); 308 309 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 310 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 311 TD_LOCKS_INC(curthread); 312 } 313 314 int 315 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 316 { 317 struct thread *td; 318 uintptr_t tid, v; 319 int rval; 320 bool recursed; 321 322 td = curthread; 323 tid = (uintptr_t)td; 324 if (SCHEDULER_STOPPED()) 325 return (1); 326 327 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), 328 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", 329 curthread, rw->lock_object.lo_name, file, line)); 330 KASSERT(rw->rw_lock != RW_DESTROYED, 331 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 332 333 rval = 1; 334 recursed = false; 335 v = RW_UNLOCKED; 336 for (;;) { 337 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 338 break; 339 if (v == RW_UNLOCKED) 340 continue; 341 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { 342 rw->rw_recurse++; 343 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 344 break; 345 } 346 rval = 0; 347 break; 348 } 349 350 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 351 if (rval) { 352 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 353 file, line); 354 if (!recursed) 355 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 356 rw, 0, 0, file, line, LOCKSTAT_WRITER); 357 TD_LOCKS_INC(curthread); 358 } 359 return (rval); 360 } 361 362 int 363 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) 364 { 365 struct rwlock *rw; 366 367 rw = rwlock2rw(c); 368 return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG)); 369 } 370 371 void 372 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) 373 { 374 struct rwlock *rw; 375 376 rw = rwlock2rw(c); 377 378 KASSERT(rw->rw_lock != RW_DESTROYED, 379 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 380 __rw_assert(c, RA_WLOCKED, file, line); 381 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 382 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 383 line); 384 385 #ifdef LOCK_PROFILING 386 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); 387 #else 388 __rw_wunlock(rw, curthread, file, line); 389 #endif 390 391 TD_LOCKS_DEC(curthread); 392 } 393 394 /* 395 * Determines whether a new reader can acquire a lock. Succeeds if the 396 * reader already owns a read lock and the lock is locked for read to 397 * prevent deadlock from reader recursion. Also succeeds if the lock 398 * is unlocked and has no writer waiters or spinners. Failing otherwise 399 * prioritizes writers before readers. 400 */ 401 static __always_inline bool 402 __rw_can_read(struct thread *td, uintptr_t v, bool fp) 403 { 404 405 if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) 406 == RW_LOCK_READ) 407 return (true); 408 if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ)) 409 return (true); 410 return (false); 411 } 412 413 static __always_inline bool 414 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp 415 LOCK_FILE_LINE_ARG_DEF) 416 { 417 418 /* 419 * Handle the easy case. If no other thread has a write 420 * lock, then try to bump up the count of read locks. Note 421 * that we have to preserve the current state of the 422 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 423 * read lock, then rw_lock must have changed, so restart 424 * the loop. Note that this handles the case of a 425 * completely unlocked rwlock since such a lock is encoded 426 * as a read lock with no waiters. 427 */ 428 while (__rw_can_read(td, *vp, fp)) { 429 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, 430 *vp + RW_ONE_READER)) { 431 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 432 CTR4(KTR_LOCK, 433 "%s: %p succeed %p -> %p", __func__, 434 rw, (void *)*vp, 435 (void *)(*vp + RW_ONE_READER)); 436 td->td_rw_rlocks++; 437 return (true); 438 } 439 } 440 return (false); 441 } 442 443 static void __noinline 444 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 445 LOCK_FILE_LINE_ARG_DEF) 446 { 447 struct turnstile *ts; 448 struct thread *owner; 449 #ifdef ADAPTIVE_RWLOCKS 450 int spintries = 0; 451 int i, n; 452 #endif 453 #ifdef LOCK_PROFILING 454 uint64_t waittime = 0; 455 int contested = 0; 456 #endif 457 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 458 struct lock_delay_arg lda; 459 #endif 460 #ifdef KDTRACE_HOOKS 461 u_int sleep_cnt = 0; 462 int64_t sleep_time = 0; 463 int64_t all_time = 0; 464 #endif 465 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 466 uintptr_t state = 0; 467 int doing_lockprof = 0; 468 #endif 469 470 #ifdef KDTRACE_HOOKS 471 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 472 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 473 goto out_lockstat; 474 doing_lockprof = 1; 475 all_time -= lockstat_nsecs(&rw->lock_object); 476 state = v; 477 } 478 #endif 479 #ifdef LOCK_PROFILING 480 doing_lockprof = 1; 481 state = v; 482 #endif 483 484 if (SCHEDULER_STOPPED()) 485 return; 486 487 #if defined(ADAPTIVE_RWLOCKS) 488 lock_delay_arg_init(&lda, &rw_delay); 489 #elif defined(KDTRACE_HOOKS) 490 lock_delay_arg_init_noadapt(&lda); 491 #endif 492 493 #ifdef HWPMC_HOOKS 494 PMC_SOFT_CALL( , , lock, failed); 495 #endif 496 lock_profile_obtain_lock_failed(&rw->lock_object, false, 497 &contested, &waittime); 498 499 THREAD_CONTENDS_ON_LOCK(&rw->lock_object); 500 501 for (;;) { 502 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 503 break; 504 #ifdef KDTRACE_HOOKS 505 lda.spin_cnt++; 506 #endif 507 508 #ifdef ADAPTIVE_RWLOCKS 509 /* 510 * If the owner is running on another CPU, spin until 511 * the owner stops running or the state of the lock 512 * changes. 513 */ 514 if ((v & RW_LOCK_READ) == 0) { 515 owner = (struct thread *)RW_OWNER(v); 516 if (TD_IS_RUNNING(owner)) { 517 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 518 CTR3(KTR_LOCK, 519 "%s: spinning on %p held by %p", 520 __func__, rw, owner); 521 KTR_STATE1(KTR_SCHED, "thread", 522 sched_tdname(curthread), "spinning", 523 "lockname:\"%s\"", rw->lock_object.lo_name); 524 do { 525 lock_delay(&lda); 526 v = RW_READ_VALUE(rw); 527 owner = lv_rw_wowner(v); 528 } while (owner != NULL && TD_IS_RUNNING(owner)); 529 KTR_STATE0(KTR_SCHED, "thread", 530 sched_tdname(curthread), "running"); 531 continue; 532 } 533 } else { 534 if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) { 535 MPASS(!__rw_can_read(td, v, false)); 536 lock_delay_spin(2); 537 v = RW_READ_VALUE(rw); 538 continue; 539 } 540 if (spintries < rowner_retries) { 541 spintries++; 542 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 543 "spinning", "lockname:\"%s\"", 544 rw->lock_object.lo_name); 545 n = RW_READERS(v); 546 for (i = 0; i < rowner_loops; i += n) { 547 lock_delay_spin(n); 548 v = RW_READ_VALUE(rw); 549 if (!(v & RW_LOCK_READ)) 550 break; 551 n = RW_READERS(v); 552 if (n == 0) 553 break; 554 if (__rw_can_read(td, v, false)) 555 break; 556 } 557 #ifdef KDTRACE_HOOKS 558 lda.spin_cnt += rowner_loops - i; 559 #endif 560 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 561 "running"); 562 if (i < rowner_loops) 563 continue; 564 } 565 } 566 #endif 567 568 /* 569 * Okay, now it's the hard case. Some other thread already 570 * has a write lock or there are write waiters present, 571 * acquire the turnstile lock so we can begin the process 572 * of blocking. 573 */ 574 ts = turnstile_trywait(&rw->lock_object); 575 576 /* 577 * The lock might have been released while we spun, so 578 * recheck its state and restart the loop if needed. 579 */ 580 v = RW_READ_VALUE(rw); 581 retry_ts: 582 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) || 583 __rw_can_read(td, v, false)) { 584 turnstile_cancel(ts); 585 continue; 586 } 587 588 owner = lv_rw_wowner(v); 589 590 #ifdef ADAPTIVE_RWLOCKS 591 /* 592 * The current lock owner might have started executing 593 * on another CPU (or the lock could have changed 594 * owners) while we were waiting on the turnstile 595 * chain lock. If so, drop the turnstile lock and try 596 * again. 597 */ 598 if (owner != NULL) { 599 if (TD_IS_RUNNING(owner)) { 600 turnstile_cancel(ts); 601 continue; 602 } 603 } 604 #endif 605 606 /* 607 * The lock is held in write mode or it already has waiters. 608 */ 609 MPASS(!__rw_can_read(td, v, false)); 610 611 /* 612 * If the RW_LOCK_READ_WAITERS flag is already set, then 613 * we can go ahead and block. If it is not set then try 614 * to set it. If we fail to set it drop the turnstile 615 * lock and restart the loop. 616 */ 617 if (!(v & RW_LOCK_READ_WAITERS)) { 618 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 619 v | RW_LOCK_READ_WAITERS)) 620 goto retry_ts; 621 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 622 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 623 __func__, rw); 624 } 625 626 /* 627 * We were unable to acquire the lock and the read waiters 628 * flag is set, so we must block on the turnstile. 629 */ 630 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 631 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 632 rw); 633 #ifdef KDTRACE_HOOKS 634 sleep_time -= lockstat_nsecs(&rw->lock_object); 635 #endif 636 MPASS(owner == rw_owner(rw)); 637 turnstile_wait(ts, owner, TS_SHARED_QUEUE); 638 #ifdef KDTRACE_HOOKS 639 sleep_time += lockstat_nsecs(&rw->lock_object); 640 sleep_cnt++; 641 #endif 642 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 643 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 644 __func__, rw); 645 v = RW_READ_VALUE(rw); 646 } 647 THREAD_CONTENTION_DONE(&rw->lock_object); 648 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 649 if (__predict_true(!doing_lockprof)) 650 return; 651 #endif 652 #ifdef KDTRACE_HOOKS 653 all_time += lockstat_nsecs(&rw->lock_object); 654 if (sleep_time) 655 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 656 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 657 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 658 659 /* Record only the loops spinning and not sleeping. */ 660 if (lda.spin_cnt > sleep_cnt) 661 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 662 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 663 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 664 out_lockstat: 665 #endif 666 /* 667 * TODO: acquire "owner of record" here. Here be turnstile dragons 668 * however. turnstiles don't like owners changing between calls to 669 * turnstile_wait() currently. 670 */ 671 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 672 waittime, file, line, LOCKSTAT_READER); 673 } 674 675 void 676 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 677 { 678 struct thread *td; 679 uintptr_t v; 680 681 td = curthread; 682 683 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 684 !TD_IS_IDLETHREAD(td), 685 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", 686 td, rw->lock_object.lo_name, file, line)); 687 KASSERT(rw->rw_lock != RW_DESTROYED, 688 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 689 KASSERT(rw_wowner(rw) != td, 690 ("rw_rlock: wlock already held for %s @ %s:%d", 691 rw->lock_object.lo_name, file, line)); 692 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 693 694 v = RW_READ_VALUE(rw); 695 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || 696 !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG))) 697 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 698 else 699 lock_profile_obtain_lock_success(&rw->lock_object, false, 0, 0, 700 file, line); 701 702 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 703 WITNESS_LOCK(&rw->lock_object, 0, file, line); 704 TD_LOCKS_INC(curthread); 705 } 706 707 void 708 __rw_rlock(volatile uintptr_t *c, const char *file, int line) 709 { 710 struct rwlock *rw; 711 712 rw = rwlock2rw(c); 713 __rw_rlock_int(rw LOCK_FILE_LINE_ARG); 714 } 715 716 int 717 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 718 { 719 uintptr_t x; 720 721 if (SCHEDULER_STOPPED()) 722 return (1); 723 724 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 725 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", 726 curthread, rw->lock_object.lo_name, file, line)); 727 728 x = rw->rw_lock; 729 for (;;) { 730 KASSERT(rw->rw_lock != RW_DESTROYED, 731 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 732 if (!(x & RW_LOCK_READ)) 733 break; 734 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { 735 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 736 line); 737 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 738 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 739 rw, 0, 0, file, line, LOCKSTAT_READER); 740 TD_LOCKS_INC(curthread); 741 curthread->td_rw_rlocks++; 742 return (1); 743 } 744 } 745 746 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 747 return (0); 748 } 749 750 int 751 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) 752 { 753 struct rwlock *rw; 754 755 rw = rwlock2rw(c); 756 return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG)); 757 } 758 759 static __always_inline bool 760 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) 761 { 762 763 for (;;) { 764 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) { 765 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 766 *vp - RW_ONE_READER)) { 767 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 768 CTR4(KTR_LOCK, 769 "%s: %p succeeded %p -> %p", 770 __func__, rw, (void *)*vp, 771 (void *)(*vp - RW_ONE_READER)); 772 td->td_rw_rlocks--; 773 return (true); 774 } 775 continue; 776 } 777 break; 778 } 779 return (false); 780 } 781 782 static void __noinline 783 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 784 LOCK_FILE_LINE_ARG_DEF) 785 { 786 struct turnstile *ts; 787 uintptr_t setv, passedv, queue; 788 789 if (SCHEDULER_STOPPED()) 790 return; 791 792 passedv = v; 793 if (__rw_runlock_try(rw, td, &v)) 794 goto out_lockstat; 795 796 /* 797 * Ok, we know we have waiters and we think we are the 798 * last reader, so grab the turnstile lock. 799 */ 800 turnstile_chain_lock(&rw->lock_object); 801 v = RW_READ_VALUE(rw); 802 for (;;) { 803 if (__rw_runlock_try(rw, td, &v)) 804 break; 805 806 MPASS(v & RW_LOCK_WAITERS); 807 808 /* 809 * Try to drop our lock leaving the lock in a unlocked 810 * state. 811 * 812 * If you wanted to do explicit lock handoff you'd have to 813 * do it here. You'd also want to use turnstile_signal() 814 * and you'd have to handle the race where a higher 815 * priority thread blocks on the write lock before the 816 * thread you wakeup actually runs and have the new thread 817 * "steal" the lock. For now it's a lot simpler to just 818 * wakeup all of the waiters. 819 * 820 * As above, if we fail, then another thread might have 821 * acquired a read lock, so drop the turnstile lock and 822 * restart. 823 */ 824 setv = RW_UNLOCKED; 825 queue = TS_SHARED_QUEUE; 826 if (v & RW_LOCK_WRITE_WAITERS) { 827 queue = TS_EXCLUSIVE_QUEUE; 828 setv |= (v & RW_LOCK_READ_WAITERS); 829 } 830 setv |= (v & RW_LOCK_WRITE_SPINNER); 831 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv)) 832 continue; 833 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 834 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 835 __func__, rw); 836 837 /* 838 * Ok. The lock is released and all that's left is to 839 * wake up the waiters. Note that the lock might not be 840 * free anymore, but in that case the writers will just 841 * block again if they run before the new lock holder(s) 842 * release the lock. 843 */ 844 ts = turnstile_lookup(&rw->lock_object); 845 if (__predict_false(ts == NULL)) { 846 panic("got NULL turnstile on rwlock %p passedv %p v %p", 847 rw, (void *)passedv, (void *)v); 848 } 849 turnstile_broadcast(ts, queue); 850 turnstile_unpend(ts); 851 td->td_rw_rlocks--; 852 break; 853 } 854 turnstile_chain_unlock(&rw->lock_object); 855 out_lockstat: 856 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); 857 } 858 859 void 860 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 861 { 862 struct thread *td; 863 uintptr_t v; 864 865 KASSERT(rw->rw_lock != RW_DESTROYED, 866 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 867 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 868 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 869 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 870 871 td = curthread; 872 v = RW_READ_VALUE(rw); 873 874 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || 875 !__rw_runlock_try(rw, td, &v))) 876 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 877 else 878 lock_profile_release_lock(&rw->lock_object, false); 879 880 TD_LOCKS_DEC(curthread); 881 } 882 883 void 884 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) 885 { 886 struct rwlock *rw; 887 888 rw = rwlock2rw(c); 889 _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG); 890 } 891 892 #ifdef ADAPTIVE_RWLOCKS 893 static inline void 894 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work) 895 { 896 897 if (v & RW_LOCK_WRITE_SPINNER) 898 return; 899 if (*in_critical) { 900 critical_exit(); 901 *in_critical = false; 902 (*extra_work)--; 903 } 904 } 905 #else 906 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0) 907 #endif 908 909 /* 910 * This function is called when we are unable to obtain a write lock on the 911 * first try. This means that at least one other thread holds either a 912 * read or write lock. 913 */ 914 void 915 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 916 { 917 uintptr_t tid; 918 struct rwlock *rw; 919 struct turnstile *ts; 920 struct thread *owner; 921 #ifdef ADAPTIVE_RWLOCKS 922 int spintries = 0; 923 int i, n; 924 enum { READERS, WRITER } sleep_reason = READERS; 925 bool in_critical = false; 926 #endif 927 uintptr_t setv; 928 #ifdef LOCK_PROFILING 929 uint64_t waittime = 0; 930 int contested = 0; 931 #endif 932 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 933 struct lock_delay_arg lda; 934 #endif 935 #ifdef KDTRACE_HOOKS 936 u_int sleep_cnt = 0; 937 int64_t sleep_time = 0; 938 int64_t all_time = 0; 939 #endif 940 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 941 uintptr_t state = 0; 942 int doing_lockprof = 0; 943 #endif 944 int extra_work = 0; 945 946 tid = (uintptr_t)curthread; 947 rw = rwlock2rw(c); 948 949 #ifdef KDTRACE_HOOKS 950 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 951 while (v == RW_UNLOCKED) { 952 if (_rw_write_lock_fetch(rw, &v, tid)) 953 goto out_lockstat; 954 } 955 extra_work = 1; 956 doing_lockprof = 1; 957 all_time -= lockstat_nsecs(&rw->lock_object); 958 state = v; 959 } 960 #endif 961 #ifdef LOCK_PROFILING 962 extra_work = 1; 963 doing_lockprof = 1; 964 state = v; 965 #endif 966 967 if (SCHEDULER_STOPPED()) 968 return; 969 970 if (__predict_false(v == RW_UNLOCKED)) 971 v = RW_READ_VALUE(rw); 972 973 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { 974 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 975 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 976 __func__, rw->lock_object.lo_name, file, line)); 977 rw->rw_recurse++; 978 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 979 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 980 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 981 return; 982 } 983 984 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 985 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 986 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 987 988 #if defined(ADAPTIVE_RWLOCKS) 989 lock_delay_arg_init(&lda, &rw_delay); 990 #elif defined(KDTRACE_HOOKS) 991 lock_delay_arg_init_noadapt(&lda); 992 #endif 993 994 #ifdef HWPMC_HOOKS 995 PMC_SOFT_CALL( , , lock, failed); 996 #endif 997 lock_profile_obtain_lock_failed(&rw->lock_object, false, 998 &contested, &waittime); 999 1000 THREAD_CONTENDS_ON_LOCK(&rw->lock_object); 1001 1002 for (;;) { 1003 if (v == RW_UNLOCKED) { 1004 if (_rw_write_lock_fetch(rw, &v, tid)) 1005 break; 1006 continue; 1007 } 1008 #ifdef KDTRACE_HOOKS 1009 lda.spin_cnt++; 1010 #endif 1011 1012 #ifdef ADAPTIVE_RWLOCKS 1013 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) { 1014 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 1015 break; 1016 continue; 1017 } 1018 1019 /* 1020 * If the lock is write locked and the owner is 1021 * running on another CPU, spin until the owner stops 1022 * running or the state of the lock changes. 1023 */ 1024 if (!(v & RW_LOCK_READ)) { 1025 rw_drop_critical(v, &in_critical, &extra_work); 1026 sleep_reason = WRITER; 1027 owner = lv_rw_wowner(v); 1028 if (!TD_IS_RUNNING(owner)) 1029 goto ts; 1030 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1031 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 1032 __func__, rw, owner); 1033 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1034 "spinning", "lockname:\"%s\"", 1035 rw->lock_object.lo_name); 1036 do { 1037 lock_delay(&lda); 1038 v = RW_READ_VALUE(rw); 1039 owner = lv_rw_wowner(v); 1040 } while (owner != NULL && TD_IS_RUNNING(owner)); 1041 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1042 "running"); 1043 continue; 1044 } else if (RW_READERS(v) > 0) { 1045 sleep_reason = READERS; 1046 if (spintries == rowner_retries) 1047 goto ts; 1048 if (!(v & RW_LOCK_WRITE_SPINNER)) { 1049 if (!in_critical) { 1050 critical_enter(); 1051 in_critical = true; 1052 extra_work++; 1053 } 1054 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1055 v | RW_LOCK_WRITE_SPINNER)) { 1056 critical_exit(); 1057 in_critical = false; 1058 extra_work--; 1059 continue; 1060 } 1061 } 1062 spintries++; 1063 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1064 "spinning", "lockname:\"%s\"", 1065 rw->lock_object.lo_name); 1066 n = RW_READERS(v); 1067 for (i = 0; i < rowner_loops; i += n) { 1068 lock_delay_spin(n); 1069 v = RW_READ_VALUE(rw); 1070 if (!(v & RW_LOCK_WRITE_SPINNER)) 1071 break; 1072 if (!(v & RW_LOCK_READ)) 1073 break; 1074 n = RW_READERS(v); 1075 if (n == 0) 1076 break; 1077 } 1078 #ifdef KDTRACE_HOOKS 1079 lda.spin_cnt += i; 1080 #endif 1081 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1082 "running"); 1083 if (i < rowner_loops) 1084 continue; 1085 } 1086 ts: 1087 #endif 1088 ts = turnstile_trywait(&rw->lock_object); 1089 v = RW_READ_VALUE(rw); 1090 retry_ts: 1091 owner = lv_rw_wowner(v); 1092 1093 #ifdef ADAPTIVE_RWLOCKS 1094 /* 1095 * The current lock owner might have started executing 1096 * on another CPU (or the lock could have changed 1097 * owners) while we were waiting on the turnstile 1098 * chain lock. If so, drop the turnstile lock and try 1099 * again. 1100 */ 1101 if (owner != NULL) { 1102 if (TD_IS_RUNNING(owner)) { 1103 turnstile_cancel(ts); 1104 rw_drop_critical(v, &in_critical, &extra_work); 1105 continue; 1106 } 1107 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) { 1108 turnstile_cancel(ts); 1109 rw_drop_critical(v, &in_critical, &extra_work); 1110 continue; 1111 } 1112 #endif 1113 /* 1114 * Check for the waiters flags about this rwlock. 1115 * If the lock was released, without maintain any pending 1116 * waiters queue, simply try to acquire it. 1117 * If a pending waiters queue is present, claim the lock 1118 * ownership and maintain the pending queue. 1119 */ 1120 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 1121 if ((v & ~setv) == RW_UNLOCKED) { 1122 setv &= ~RW_LOCK_WRITE_SPINNER; 1123 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) { 1124 if (setv) 1125 turnstile_claim(ts); 1126 else 1127 turnstile_cancel(ts); 1128 break; 1129 } 1130 goto retry_ts; 1131 } 1132 1133 #ifdef ADAPTIVE_RWLOCKS 1134 if (in_critical) { 1135 if ((v & RW_LOCK_WRITE_SPINNER) || 1136 !((v & RW_LOCK_WRITE_WAITERS))) { 1137 setv = v & ~RW_LOCK_WRITE_SPINNER; 1138 setv |= RW_LOCK_WRITE_WAITERS; 1139 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv)) 1140 goto retry_ts; 1141 } 1142 critical_exit(); 1143 in_critical = false; 1144 extra_work--; 1145 } else { 1146 #endif 1147 /* 1148 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 1149 * set it. If we fail to set it, then loop back and try 1150 * again. 1151 */ 1152 if (!(v & RW_LOCK_WRITE_WAITERS)) { 1153 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1154 v | RW_LOCK_WRITE_WAITERS)) 1155 goto retry_ts; 1156 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1157 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 1158 __func__, rw); 1159 } 1160 #ifdef ADAPTIVE_RWLOCKS 1161 } 1162 #endif 1163 /* 1164 * We were unable to acquire the lock and the write waiters 1165 * flag is set, so we must block on the turnstile. 1166 */ 1167 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1168 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 1169 rw); 1170 #ifdef KDTRACE_HOOKS 1171 sleep_time -= lockstat_nsecs(&rw->lock_object); 1172 #endif 1173 MPASS(owner == rw_owner(rw)); 1174 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); 1175 #ifdef KDTRACE_HOOKS 1176 sleep_time += lockstat_nsecs(&rw->lock_object); 1177 sleep_cnt++; 1178 #endif 1179 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1180 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 1181 __func__, rw); 1182 #ifdef ADAPTIVE_RWLOCKS 1183 spintries = 0; 1184 #endif 1185 v = RW_READ_VALUE(rw); 1186 } 1187 THREAD_CONTENTION_DONE(&rw->lock_object); 1188 if (__predict_true(!extra_work)) 1189 return; 1190 #ifdef ADAPTIVE_RWLOCKS 1191 if (in_critical) 1192 critical_exit(); 1193 #endif 1194 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 1195 if (__predict_true(!doing_lockprof)) 1196 return; 1197 #endif 1198 #ifdef KDTRACE_HOOKS 1199 all_time += lockstat_nsecs(&rw->lock_object); 1200 if (sleep_time) 1201 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 1202 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1203 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1204 1205 /* Record only the loops spinning and not sleeping. */ 1206 if (lda.spin_cnt > sleep_cnt) 1207 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 1208 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1209 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1210 out_lockstat: 1211 #endif 1212 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 1213 waittime, file, line, LOCKSTAT_WRITER); 1214 } 1215 1216 /* 1217 * This function is called if lockstat is active or the first try at releasing 1218 * a write lock failed. The latter means that the lock is recursed or one of 1219 * the 2 waiter bits must be set indicating that at least one thread is waiting 1220 * on this lock. 1221 */ 1222 void 1223 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 1224 { 1225 struct rwlock *rw; 1226 struct turnstile *ts; 1227 uintptr_t tid, setv, passedv; 1228 int queue; 1229 1230 tid = (uintptr_t)curthread; 1231 if (SCHEDULER_STOPPED()) 1232 return; 1233 1234 rw = rwlock2rw(c); 1235 if (__predict_false(v == tid)) 1236 v = RW_READ_VALUE(rw); 1237 1238 if (v & RW_LOCK_WRITER_RECURSED) { 1239 if (--(rw->rw_recurse) == 0) 1240 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 1241 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1242 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 1243 return; 1244 } 1245 1246 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); 1247 if (v == tid && _rw_write_unlock(rw, tid)) 1248 return; 1249 1250 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 1251 ("%s: neither of the waiter flags are set", __func__)); 1252 1253 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1254 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 1255 1256 turnstile_chain_lock(&rw->lock_object); 1257 1258 /* 1259 * Use the same algo as sx locks for now. Prefer waking up shared 1260 * waiters if we have any over writers. This is probably not ideal. 1261 * 1262 * 'v' is the value we are going to write back to rw_lock. If we 1263 * have waiters on both queues, we need to preserve the state of 1264 * the waiter flag for the queue we don't wake up. For now this is 1265 * hardcoded for the algorithm mentioned above. 1266 * 1267 * In the case of both readers and writers waiting we wakeup the 1268 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 1269 * new writer comes in before a reader it will claim the lock up 1270 * above. There is probably a potential priority inversion in 1271 * there that could be worked around either by waking both queues 1272 * of waiters or doing some complicated lock handoff gymnastics. 1273 */ 1274 setv = RW_UNLOCKED; 1275 passedv = v; 1276 v = RW_READ_VALUE(rw); 1277 queue = TS_SHARED_QUEUE; 1278 if (v & RW_LOCK_WRITE_WAITERS) { 1279 queue = TS_EXCLUSIVE_QUEUE; 1280 setv |= (v & RW_LOCK_READ_WAITERS); 1281 } 1282 atomic_store_rel_ptr(&rw->rw_lock, setv); 1283 1284 /* Wake up all waiters for the specific queue. */ 1285 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1286 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 1287 queue == TS_SHARED_QUEUE ? "read" : "write"); 1288 1289 ts = turnstile_lookup(&rw->lock_object); 1290 if (__predict_false(ts == NULL)) { 1291 panic("got NULL turnstile on rwlock %p passedv %p v %p", rw, 1292 (void *)passedv, (void *)v); 1293 } 1294 turnstile_broadcast(ts, queue); 1295 turnstile_unpend(ts); 1296 turnstile_chain_unlock(&rw->lock_object); 1297 } 1298 1299 /* 1300 * Attempt to do a non-blocking upgrade from a read lock to a write 1301 * lock. This will only succeed if this thread holds a single read 1302 * lock. Returns true if the upgrade succeeded and false otherwise. 1303 */ 1304 int 1305 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1306 { 1307 uintptr_t v, setv, tid; 1308 struct turnstile *ts; 1309 int success; 1310 1311 if (SCHEDULER_STOPPED()) 1312 return (1); 1313 1314 KASSERT(rw->rw_lock != RW_DESTROYED, 1315 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 1316 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 1317 1318 /* 1319 * Attempt to switch from one reader to a writer. If there 1320 * are any write waiters, then we will have to lock the 1321 * turnstile first to prevent races with another writer 1322 * calling turnstile_wait() before we have claimed this 1323 * turnstile. So, do the simple case of no waiters first. 1324 */ 1325 tid = (uintptr_t)curthread; 1326 success = 0; 1327 v = RW_READ_VALUE(rw); 1328 for (;;) { 1329 if (RW_READERS(v) > 1) 1330 break; 1331 if (!(v & RW_LOCK_WAITERS)) { 1332 success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid); 1333 if (!success) 1334 continue; 1335 break; 1336 } 1337 1338 /* 1339 * Ok, we think we have waiters, so lock the turnstile. 1340 */ 1341 ts = turnstile_trywait(&rw->lock_object); 1342 v = RW_READ_VALUE(rw); 1343 retry_ts: 1344 if (RW_READERS(v) > 1) { 1345 turnstile_cancel(ts); 1346 break; 1347 } 1348 /* 1349 * Try to switch from one reader to a writer again. This time 1350 * we honor the current state of the waiters flags. 1351 * If we obtain the lock with the flags set, then claim 1352 * ownership of the turnstile. 1353 */ 1354 setv = tid | (v & RW_LOCK_WAITERS); 1355 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv); 1356 if (success) { 1357 if (v & RW_LOCK_WAITERS) 1358 turnstile_claim(ts); 1359 else 1360 turnstile_cancel(ts); 1361 break; 1362 } 1363 goto retry_ts; 1364 } 1365 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 1366 if (success) { 1367 curthread->td_rw_rlocks--; 1368 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 1369 file, line); 1370 LOCKSTAT_RECORD0(rw__upgrade, rw); 1371 } 1372 return (success); 1373 } 1374 1375 int 1376 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) 1377 { 1378 struct rwlock *rw; 1379 1380 rw = rwlock2rw(c); 1381 return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG)); 1382 } 1383 1384 /* 1385 * Downgrade a write lock into a single read lock. 1386 */ 1387 void 1388 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1389 { 1390 struct turnstile *ts; 1391 uintptr_t tid, v; 1392 int rwait, wwait; 1393 1394 if (SCHEDULER_STOPPED()) 1395 return; 1396 1397 KASSERT(rw->rw_lock != RW_DESTROYED, 1398 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 1399 __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line); 1400 #ifndef INVARIANTS 1401 if (rw_recursed(rw)) 1402 panic("downgrade of a recursed lock"); 1403 #endif 1404 1405 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1406 1407 /* 1408 * Convert from a writer to a single reader. First we handle 1409 * the easy case with no waiters. If there are any waiters, we 1410 * lock the turnstile and "disown" the lock. 1411 */ 1412 tid = (uintptr_t)curthread; 1413 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1414 goto out; 1415 1416 /* 1417 * Ok, we think we have waiters, so lock the turnstile so we can 1418 * read the waiter flags without any races. 1419 */ 1420 turnstile_chain_lock(&rw->lock_object); 1421 v = rw->rw_lock & RW_LOCK_WAITERS; 1422 rwait = v & RW_LOCK_READ_WAITERS; 1423 wwait = v & RW_LOCK_WRITE_WAITERS; 1424 MPASS(rwait | wwait); 1425 1426 /* 1427 * Downgrade from a write lock while preserving waiters flag 1428 * and give up ownership of the turnstile. 1429 */ 1430 ts = turnstile_lookup(&rw->lock_object); 1431 MPASS(ts != NULL); 1432 if (!wwait) 1433 v &= ~RW_LOCK_READ_WAITERS; 1434 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1435 /* 1436 * Wake other readers if there are no writers pending. Otherwise they 1437 * won't be able to acquire the lock anyway. 1438 */ 1439 if (rwait && !wwait) { 1440 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1441 turnstile_unpend(ts); 1442 } else 1443 turnstile_disown(ts); 1444 turnstile_chain_unlock(&rw->lock_object); 1445 out: 1446 curthread->td_rw_rlocks++; 1447 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1448 LOCKSTAT_RECORD0(rw__downgrade, rw); 1449 } 1450 1451 void 1452 __rw_downgrade(volatile uintptr_t *c, const char *file, int line) 1453 { 1454 struct rwlock *rw; 1455 1456 rw = rwlock2rw(c); 1457 __rw_downgrade_int(rw LOCK_FILE_LINE_ARG); 1458 } 1459 1460 #ifdef INVARIANT_SUPPORT 1461 #ifndef INVARIANTS 1462 #undef __rw_assert 1463 #endif 1464 1465 /* 1466 * In the non-WITNESS case, rw_assert() can only detect that at least 1467 * *some* thread owns an rlock, but it cannot guarantee that *this* 1468 * thread owns an rlock. 1469 */ 1470 void 1471 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) 1472 { 1473 const struct rwlock *rw; 1474 1475 if (SCHEDULER_STOPPED()) 1476 return; 1477 1478 rw = rwlock2rw(c); 1479 1480 switch (what) { 1481 case RA_LOCKED: 1482 case RA_LOCKED | RA_RECURSED: 1483 case RA_LOCKED | RA_NOTRECURSED: 1484 case RA_RLOCKED: 1485 case RA_RLOCKED | RA_RECURSED: 1486 case RA_RLOCKED | RA_NOTRECURSED: 1487 #ifdef WITNESS 1488 witness_assert(&rw->lock_object, what, file, line); 1489 #else 1490 /* 1491 * If some other thread has a write lock or we have one 1492 * and are asserting a read lock, fail. Also, if no one 1493 * has a lock at all, fail. 1494 */ 1495 if (rw->rw_lock == RW_UNLOCKED || 1496 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || 1497 rw_wowner(rw) != curthread))) 1498 panic("Lock %s not %slocked @ %s:%d\n", 1499 rw->lock_object.lo_name, (what & RA_RLOCKED) ? 1500 "read " : "", file, line); 1501 1502 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { 1503 if (rw_recursed(rw)) { 1504 if (what & RA_NOTRECURSED) 1505 panic("Lock %s recursed @ %s:%d\n", 1506 rw->lock_object.lo_name, file, 1507 line); 1508 } else if (what & RA_RECURSED) 1509 panic("Lock %s not recursed @ %s:%d\n", 1510 rw->lock_object.lo_name, file, line); 1511 } 1512 #endif 1513 break; 1514 case RA_WLOCKED: 1515 case RA_WLOCKED | RA_RECURSED: 1516 case RA_WLOCKED | RA_NOTRECURSED: 1517 if (rw_wowner(rw) != curthread) 1518 panic("Lock %s not exclusively locked @ %s:%d\n", 1519 rw->lock_object.lo_name, file, line); 1520 if (rw_recursed(rw)) { 1521 if (what & RA_NOTRECURSED) 1522 panic("Lock %s recursed @ %s:%d\n", 1523 rw->lock_object.lo_name, file, line); 1524 } else if (what & RA_RECURSED) 1525 panic("Lock %s not recursed @ %s:%d\n", 1526 rw->lock_object.lo_name, file, line); 1527 break; 1528 case RA_UNLOCKED: 1529 #ifdef WITNESS 1530 witness_assert(&rw->lock_object, what, file, line); 1531 #else 1532 /* 1533 * If we hold a write lock fail. We can't reliably check 1534 * to see if we hold a read lock or not. 1535 */ 1536 if (rw_wowner(rw) == curthread) 1537 panic("Lock %s exclusively locked @ %s:%d\n", 1538 rw->lock_object.lo_name, file, line); 1539 #endif 1540 break; 1541 default: 1542 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1543 line); 1544 } 1545 } 1546 #endif /* INVARIANT_SUPPORT */ 1547 1548 #ifdef DDB 1549 static void 1550 db_show_rwlock(const struct lock_object *lock) 1551 { 1552 const struct rwlock *rw; 1553 struct thread *td; 1554 1555 rw = (const struct rwlock *)lock; 1556 1557 db_printf(" state: "); 1558 if (rw->rw_lock == RW_UNLOCKED) 1559 db_printf("UNLOCKED\n"); 1560 else if (rw->rw_lock == RW_DESTROYED) { 1561 db_printf("DESTROYED\n"); 1562 return; 1563 } else if (rw->rw_lock & RW_LOCK_READ) 1564 db_printf("RLOCK: %ju locks\n", 1565 (uintmax_t)(RW_READERS(rw->rw_lock))); 1566 else { 1567 td = rw_wowner(rw); 1568 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1569 td->td_tid, td->td_proc->p_pid, td->td_name); 1570 if (rw_recursed(rw)) 1571 db_printf(" recursed: %u\n", rw->rw_recurse); 1572 } 1573 db_printf(" waiters: "); 1574 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1575 case RW_LOCK_READ_WAITERS: 1576 db_printf("readers\n"); 1577 break; 1578 case RW_LOCK_WRITE_WAITERS: 1579 db_printf("writers\n"); 1580 break; 1581 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1582 db_printf("readers and writers\n"); 1583 break; 1584 default: 1585 db_printf("none\n"); 1586 break; 1587 } 1588 } 1589 1590 #endif 1591