1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Machine independent bits of reader/writer lock implementation. 30 */ 31 32 #include <sys/cdefs.h> 33 #include "opt_ddb.h" 34 #include "opt_hwpmc_hooks.h" 35 #include "opt_no_adaptive_rwlocks.h" 36 37 #include <sys/param.h> 38 #include <sys/kdb.h> 39 #include <sys/ktr.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/rwlock.h> 45 #include <sys/sched.h> 46 #include <sys/smp.h> 47 #include <sys/sysctl.h> 48 #include <sys/systm.h> 49 #include <sys/turnstile.h> 50 51 #include <machine/cpu.h> 52 53 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 54 #define ADAPTIVE_RWLOCKS 55 #endif 56 57 #ifdef HWPMC_HOOKS 58 #include <sys/pmckern.h> 59 PMC_SOFT_DECLARE( , , lock, failed); 60 #endif 61 62 /* 63 * Return the rwlock address when the lock cookie address is provided. 64 * This functionality assumes that struct rwlock* have a member named rw_lock. 65 */ 66 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) 67 68 #ifdef DDB 69 #include <ddb/ddb.h> 70 71 static void db_show_rwlock(const struct lock_object *lock); 72 #endif 73 static void assert_rw(const struct lock_object *lock, int what); 74 static void lock_rw(struct lock_object *lock, uintptr_t how); 75 static int trylock_rw(struct lock_object *lock, uintptr_t how); 76 #ifdef KDTRACE_HOOKS 77 static int owner_rw(const struct lock_object *lock, struct thread **owner); 78 #endif 79 static uintptr_t unlock_rw(struct lock_object *lock); 80 81 struct lock_class lock_class_rw = { 82 .lc_name = "rw", 83 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 84 .lc_assert = assert_rw, 85 #ifdef DDB 86 .lc_ddb_show = db_show_rwlock, 87 #endif 88 .lc_lock = lock_rw, 89 .lc_trylock = trylock_rw, 90 .lc_unlock = unlock_rw, 91 #ifdef KDTRACE_HOOKS 92 .lc_owner = owner_rw, 93 #endif 94 }; 95 96 #ifdef ADAPTIVE_RWLOCKS 97 #ifdef RWLOCK_CUSTOM_BACKOFF 98 static u_short __read_frequently rowner_retries; 99 static u_short __read_frequently rowner_loops; 100 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, 101 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 102 "rwlock debugging"); 103 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 104 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 105 106 static struct lock_delay_config __read_frequently rw_delay; 107 108 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base, 109 0, ""); 110 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, 111 0, ""); 112 113 static void 114 rw_lock_delay_init(void *arg __unused) 115 { 116 117 lock_delay_default_init(&rw_delay); 118 rowner_retries = 10; 119 rowner_loops = max(10000, rw_delay.max); 120 } 121 LOCK_DELAY_SYSINIT(rw_lock_delay_init); 122 #else 123 #define rw_delay locks_delay 124 #define rowner_retries locks_delay_retries 125 #define rowner_loops locks_delay_loops 126 #endif 127 #endif 128 129 /* 130 * Return a pointer to the owning thread if the lock is write-locked or 131 * NULL if the lock is unlocked or read-locked. 132 */ 133 134 #define lv_rw_wowner(v) \ 135 ((v) & RW_LOCK_READ ? NULL : \ 136 (struct thread *)RW_OWNER((v))) 137 138 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw)) 139 140 /* 141 * Returns if a write owner is recursed. Write ownership is not assured 142 * here and should be previously checked. 143 */ 144 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 145 146 /* 147 * Return true if curthread helds the lock. 148 */ 149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 150 151 /* 152 * Return a pointer to the owning thread for this lock who should receive 153 * any priority lent by threads that block on this lock. Currently this 154 * is identical to rw_wowner(). 155 */ 156 #define rw_owner(rw) rw_wowner(rw) 157 158 #ifndef INVARIANTS 159 #define __rw_assert(c, what, file, line) 160 #endif 161 162 static void 163 assert_rw(const struct lock_object *lock, int what) 164 { 165 166 rw_assert((const struct rwlock *)lock, what); 167 } 168 169 static void 170 lock_rw(struct lock_object *lock, uintptr_t how) 171 { 172 struct rwlock *rw; 173 174 rw = (struct rwlock *)lock; 175 if (how) 176 rw_rlock(rw); 177 else 178 rw_wlock(rw); 179 } 180 181 static int 182 trylock_rw(struct lock_object *lock, uintptr_t how) 183 { 184 struct rwlock *rw; 185 186 rw = (struct rwlock *)lock; 187 if (how) 188 return (rw_try_rlock(rw)); 189 else 190 return (rw_try_wlock(rw)); 191 } 192 193 static uintptr_t 194 unlock_rw(struct lock_object *lock) 195 { 196 struct rwlock *rw; 197 198 rw = (struct rwlock *)lock; 199 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 200 if (rw->rw_lock & RW_LOCK_READ) { 201 rw_runlock(rw); 202 return (1); 203 } else { 204 rw_wunlock(rw); 205 return (0); 206 } 207 } 208 209 #ifdef KDTRACE_HOOKS 210 static int 211 owner_rw(const struct lock_object *lock, struct thread **owner) 212 { 213 const struct rwlock *rw = (const struct rwlock *)lock; 214 uintptr_t x = rw->rw_lock; 215 216 *owner = rw_wowner(rw); 217 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 218 (*owner != NULL)); 219 } 220 #endif 221 222 void 223 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) 224 { 225 struct rwlock *rw; 226 int flags; 227 228 rw = rwlock2rw(c); 229 230 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 231 RW_RECURSE | RW_NEW)) == 0); 232 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 233 ("%s: rw_lock not aligned for %s: %p", __func__, name, 234 &rw->rw_lock)); 235 236 flags = LO_UPGRADABLE; 237 if (opts & RW_DUPOK) 238 flags |= LO_DUPOK; 239 if (opts & RW_NOPROFILE) 240 flags |= LO_NOPROFILE; 241 if (!(opts & RW_NOWITNESS)) 242 flags |= LO_WITNESS; 243 if (opts & RW_RECURSE) 244 flags |= LO_RECURSABLE; 245 if (opts & RW_QUIET) 246 flags |= LO_QUIET; 247 if (opts & RW_NEW) 248 flags |= LO_NEW; 249 250 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 251 rw->rw_lock = RW_UNLOCKED; 252 rw->rw_recurse = 0; 253 } 254 255 void 256 _rw_destroy(volatile uintptr_t *c) 257 { 258 struct rwlock *rw; 259 260 rw = rwlock2rw(c); 261 262 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 263 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 264 rw->rw_lock = RW_DESTROYED; 265 lock_destroy(&rw->lock_object); 266 } 267 268 void 269 rw_sysinit(void *arg) 270 { 271 struct rw_args *args; 272 273 args = arg; 274 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, 275 args->ra_flags); 276 } 277 278 int 279 _rw_wowned(const volatile uintptr_t *c) 280 { 281 282 return (rw_wowner(rwlock2rw(c)) == curthread); 283 } 284 285 void 286 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) 287 { 288 struct rwlock *rw; 289 uintptr_t tid, v; 290 291 rw = rwlock2rw(c); 292 293 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 294 !TD_IS_IDLETHREAD(curthread), 295 ("rw_wlock() by idle thread %p on rwlock %p @ %s:%d", 296 curthread, rw, file, line)); 297 KASSERT(rw->rw_lock != RW_DESTROYED, 298 ("rw_wlock() of destroyed rwlock %p @ %s:%d", rw, file, line)); 299 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 300 line, NULL); 301 tid = (uintptr_t)curthread; 302 v = RW_UNLOCKED; 303 if (!_rw_write_lock_fetch(rw, &v, tid)) 304 _rw_wlock_hard(rw, v, file, line); 305 else 306 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, 307 0, 0, file, line, LOCKSTAT_WRITER); 308 309 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 310 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 311 TD_LOCKS_INC(curthread); 312 } 313 314 int 315 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 316 { 317 struct thread *td; 318 uintptr_t tid, v; 319 int rval; 320 bool recursed; 321 322 td = curthread; 323 tid = (uintptr_t)td; 324 if (SCHEDULER_STOPPED()) 325 return (1); 326 327 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), 328 ("rw_try_wlock() by idle thread %p on rwlock %p @ %s:%d", 329 curthread, rw, file, line)); 330 KASSERT(rw->rw_lock != RW_DESTROYED, 331 ("rw_try_wlock() of destroyed rwlock %p @ %s:%d", rw, file, line)); 332 333 rval = 1; 334 recursed = false; 335 v = RW_UNLOCKED; 336 for (;;) { 337 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 338 break; 339 if (v == RW_UNLOCKED) 340 continue; 341 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) { 342 rw->rw_recurse++; 343 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 344 break; 345 } 346 rval = 0; 347 break; 348 } 349 350 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 351 if (rval) { 352 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 353 file, line); 354 if (!recursed) 355 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 356 rw, 0, 0, file, line, LOCKSTAT_WRITER); 357 TD_LOCKS_INC(curthread); 358 } 359 return (rval); 360 } 361 362 int 363 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) 364 { 365 struct rwlock *rw; 366 367 rw = rwlock2rw(c); 368 return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG)); 369 } 370 371 void 372 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) 373 { 374 struct rwlock *rw; 375 376 rw = rwlock2rw(c); 377 378 KASSERT(rw->rw_lock != RW_DESTROYED, 379 ("rw_wunlock() of destroyed rwlock %p @ %s:%d", rw, file, line)); 380 __rw_assert(c, RA_WLOCKED, file, line); 381 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 382 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 383 line); 384 385 #ifdef LOCK_PROFILING 386 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); 387 #else 388 __rw_wunlock(rw, curthread, file, line); 389 #endif 390 391 TD_LOCKS_DEC(curthread); 392 } 393 394 /* 395 * Determines whether a new reader can acquire a lock. Succeeds if the 396 * reader already owns a read lock and the lock is locked for read to 397 * prevent deadlock from reader recursion. Also succeeds if the lock 398 * is unlocked and has no writer waiters or spinners. Failing otherwise 399 * prioritizes writers before readers. 400 */ 401 static __always_inline bool 402 __rw_can_read(struct thread *td, uintptr_t v, bool fp) 403 { 404 405 if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) 406 == RW_LOCK_READ) 407 return (true); 408 if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ)) 409 return (true); 410 return (false); 411 } 412 413 static __always_inline bool 414 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp 415 LOCK_FILE_LINE_ARG_DEF) 416 { 417 418 /* 419 * Handle the easy case. If no other thread has a write 420 * lock, then try to bump up the count of read locks. Note 421 * that we have to preserve the current state of the 422 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 423 * read lock, then rw_lock must have changed, so restart 424 * the loop. Note that this handles the case of a 425 * completely unlocked rwlock since such a lock is encoded 426 * as a read lock with no waiters. 427 */ 428 while (__rw_can_read(td, *vp, fp)) { 429 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp, 430 *vp + RW_ONE_READER)) { 431 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 432 CTR4(KTR_LOCK, 433 "%s: %p succeed %p -> %p", __func__, 434 rw, (void *)*vp, 435 (void *)(*vp + RW_ONE_READER)); 436 td->td_rw_rlocks++; 437 return (true); 438 } 439 } 440 return (false); 441 } 442 443 static void __noinline 444 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 445 LOCK_FILE_LINE_ARG_DEF) 446 { 447 struct turnstile *ts; 448 struct thread *owner; 449 #ifdef ADAPTIVE_RWLOCKS 450 int spintries = 0; 451 int i, n; 452 #endif 453 #ifdef LOCK_PROFILING 454 uint64_t waittime = 0; 455 int contested = 0; 456 #endif 457 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 458 struct lock_delay_arg lda; 459 #endif 460 #ifdef KDTRACE_HOOKS 461 u_int sleep_cnt = 0; 462 int64_t sleep_time = 0; 463 int64_t all_time = 0; 464 uintptr_t state = 0; 465 #endif 466 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 467 int doing_lockprof = 0; 468 #endif 469 470 #ifdef KDTRACE_HOOKS 471 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 472 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 473 goto out_lockstat; 474 doing_lockprof = 1; 475 all_time -= lockstat_nsecs(&rw->lock_object); 476 } 477 state = v; 478 #endif 479 #ifdef LOCK_PROFILING 480 doing_lockprof = 1; 481 #endif 482 483 if (SCHEDULER_STOPPED()) 484 return; 485 486 #if defined(ADAPTIVE_RWLOCKS) 487 lock_delay_arg_init(&lda, &rw_delay); 488 #elif defined(KDTRACE_HOOKS) 489 lock_delay_arg_init_noadapt(&lda); 490 #endif 491 492 #ifdef HWPMC_HOOKS 493 PMC_SOFT_CALL( , , lock, failed); 494 #endif 495 lock_profile_obtain_lock_failed(&rw->lock_object, false, 496 &contested, &waittime); 497 498 THREAD_CONTENDS_ON_LOCK(&rw->lock_object); 499 500 for (;;) { 501 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) 502 break; 503 #ifdef KDTRACE_HOOKS 504 lda.spin_cnt++; 505 #endif 506 507 #ifdef ADAPTIVE_RWLOCKS 508 /* 509 * If the owner is running on another CPU, spin until 510 * the owner stops running or the state of the lock 511 * changes. 512 */ 513 if ((v & RW_LOCK_READ) == 0) { 514 owner = (struct thread *)RW_OWNER(v); 515 if (TD_IS_RUNNING(owner)) { 516 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 517 CTR3(KTR_LOCK, 518 "%s: spinning on %p held by %p", 519 __func__, rw, owner); 520 KTR_STATE1(KTR_SCHED, "thread", 521 sched_tdname(curthread), "spinning", 522 "lockname:\"%s\"", rw->lock_object.lo_name); 523 do { 524 lock_delay(&lda); 525 v = RW_READ_VALUE(rw); 526 owner = lv_rw_wowner(v); 527 } while (owner != NULL && TD_IS_RUNNING(owner)); 528 KTR_STATE0(KTR_SCHED, "thread", 529 sched_tdname(curthread), "running"); 530 continue; 531 } 532 } else { 533 if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) { 534 MPASS(!__rw_can_read(td, v, false)); 535 lock_delay_spin(2); 536 v = RW_READ_VALUE(rw); 537 continue; 538 } 539 if (spintries < rowner_retries) { 540 spintries++; 541 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 542 "spinning", "lockname:\"%s\"", 543 rw->lock_object.lo_name); 544 n = RW_READERS(v); 545 for (i = 0; i < rowner_loops; i += n) { 546 lock_delay_spin(n); 547 v = RW_READ_VALUE(rw); 548 if (!(v & RW_LOCK_READ)) 549 break; 550 n = RW_READERS(v); 551 if (n == 0) 552 break; 553 if (__rw_can_read(td, v, false)) 554 break; 555 } 556 #ifdef KDTRACE_HOOKS 557 lda.spin_cnt += rowner_loops - i; 558 #endif 559 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 560 "running"); 561 if (i < rowner_loops) 562 continue; 563 } 564 } 565 #endif 566 567 /* 568 * Okay, now it's the hard case. Some other thread already 569 * has a write lock or there are write waiters present, 570 * acquire the turnstile lock so we can begin the process 571 * of blocking. 572 */ 573 ts = turnstile_trywait(&rw->lock_object); 574 575 /* 576 * The lock might have been released while we spun, so 577 * recheck its state and restart the loop if needed. 578 */ 579 v = RW_READ_VALUE(rw); 580 retry_ts: 581 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) || 582 __rw_can_read(td, v, false)) { 583 turnstile_cancel(ts); 584 continue; 585 } 586 587 owner = lv_rw_wowner(v); 588 589 #ifdef ADAPTIVE_RWLOCKS 590 /* 591 * The current lock owner might have started executing 592 * on another CPU (or the lock could have changed 593 * owners) while we were waiting on the turnstile 594 * chain lock. If so, drop the turnstile lock and try 595 * again. 596 */ 597 if (owner != NULL) { 598 if (TD_IS_RUNNING(owner)) { 599 turnstile_cancel(ts); 600 continue; 601 } 602 } 603 #endif 604 605 /* 606 * The lock is held in write mode or it already has waiters. 607 */ 608 MPASS(!__rw_can_read(td, v, false)); 609 610 /* 611 * If the RW_LOCK_READ_WAITERS flag is already set, then 612 * we can go ahead and block. If it is not set then try 613 * to set it. If we fail to set it drop the turnstile 614 * lock and restart the loop. 615 */ 616 if (!(v & RW_LOCK_READ_WAITERS)) { 617 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 618 v | RW_LOCK_READ_WAITERS)) 619 goto retry_ts; 620 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 621 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 622 __func__, rw); 623 } 624 625 /* 626 * We were unable to acquire the lock and the read waiters 627 * flag is set, so we must block on the turnstile. 628 */ 629 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 630 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 631 rw); 632 #ifdef KDTRACE_HOOKS 633 sleep_time -= lockstat_nsecs(&rw->lock_object); 634 #endif 635 MPASS(owner == rw_owner(rw)); 636 turnstile_wait(ts, owner, TS_SHARED_QUEUE); 637 #ifdef KDTRACE_HOOKS 638 sleep_time += lockstat_nsecs(&rw->lock_object); 639 sleep_cnt++; 640 #endif 641 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 642 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 643 __func__, rw); 644 v = RW_READ_VALUE(rw); 645 } 646 THREAD_CONTENTION_DONE(&rw->lock_object); 647 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 648 if (__predict_true(!doing_lockprof)) 649 return; 650 #endif 651 #ifdef KDTRACE_HOOKS 652 all_time += lockstat_nsecs(&rw->lock_object); 653 if (sleep_time) 654 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 655 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 656 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 657 658 /* Record only the loops spinning and not sleeping. */ 659 if (lda.spin_cnt > sleep_cnt) 660 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 661 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, 662 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 663 out_lockstat: 664 #endif 665 /* 666 * TODO: acquire "owner of record" here. Here be turnstile dragons 667 * however. turnstiles don't like owners changing between calls to 668 * turnstile_wait() currently. 669 */ 670 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 671 waittime, file, line, LOCKSTAT_READER); 672 } 673 674 void 675 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 676 { 677 struct thread *td; 678 uintptr_t v; 679 680 td = curthread; 681 682 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || 683 !TD_IS_IDLETHREAD(td), 684 ("rw_rlock() by idle thread %p on rwlock %p @ %s:%d", 685 td, rw, file, line)); 686 KASSERT(rw->rw_lock != RW_DESTROYED, 687 ("rw_rlock() of destroyed rwlock %p @ %s:%d", rw, file, line)); 688 KASSERT(rw_wowner(rw) != td, 689 ("rw_rlock: wlock already held for %p @ %s:%d", 690 rw, file, line)); 691 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 692 693 v = RW_READ_VALUE(rw); 694 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || 695 !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG))) 696 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 697 else 698 lock_profile_obtain_lock_success(&rw->lock_object, false, 0, 0, 699 file, line); 700 701 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 702 WITNESS_LOCK(&rw->lock_object, 0, file, line); 703 TD_LOCKS_INC(curthread); 704 } 705 706 void 707 __rw_rlock(volatile uintptr_t *c, const char *file, int line) 708 { 709 struct rwlock *rw; 710 711 rw = rwlock2rw(c); 712 __rw_rlock_int(rw LOCK_FILE_LINE_ARG); 713 } 714 715 int 716 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 717 { 718 uintptr_t x; 719 720 if (SCHEDULER_STOPPED()) 721 return (1); 722 723 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), 724 ("rw_try_rlock() by idle thread %p on rwlock %p @ %s:%d", 725 curthread, rw, file, line)); 726 727 x = rw->rw_lock; 728 for (;;) { 729 KASSERT(rw->rw_lock != RW_DESTROYED, 730 ("rw_try_rlock() of destroyed rwlock %p @ %s:%d", rw, file, 731 line)); 732 if (!(x & RW_LOCK_READ)) 733 break; 734 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) { 735 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 736 line); 737 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 738 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, 739 rw, 0, 0, file, line, LOCKSTAT_READER); 740 TD_LOCKS_INC(curthread); 741 curthread->td_rw_rlocks++; 742 return (1); 743 } 744 } 745 746 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 747 return (0); 748 } 749 750 int 751 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) 752 { 753 struct rwlock *rw; 754 755 rw = rwlock2rw(c); 756 return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG)); 757 } 758 759 static __always_inline bool 760 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp) 761 { 762 763 for (;;) { 764 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) { 765 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp, 766 *vp - RW_ONE_READER)) { 767 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 768 CTR4(KTR_LOCK, 769 "%s: %p succeeded %p -> %p", 770 __func__, rw, (void *)*vp, 771 (void *)(*vp - RW_ONE_READER)); 772 td->td_rw_rlocks--; 773 return (true); 774 } 775 continue; 776 } 777 break; 778 } 779 return (false); 780 } 781 782 static void __noinline 783 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v 784 LOCK_FILE_LINE_ARG_DEF) 785 { 786 struct turnstile *ts; 787 uintptr_t setv, queue; 788 789 if (SCHEDULER_STOPPED()) 790 return; 791 792 if (__rw_runlock_try(rw, td, &v)) 793 goto out_lockstat; 794 795 /* 796 * Ok, we know we have waiters and we think we are the 797 * last reader, so grab the turnstile lock. 798 */ 799 turnstile_chain_lock(&rw->lock_object); 800 v = RW_READ_VALUE(rw); 801 for (;;) { 802 if (__rw_runlock_try(rw, td, &v)) 803 break; 804 805 MPASS(v & RW_LOCK_WAITERS); 806 807 /* 808 * Try to drop our lock leaving the lock in a unlocked 809 * state. 810 * 811 * If you wanted to do explicit lock handoff you'd have to 812 * do it here. You'd also want to use turnstile_signal() 813 * and you'd have to handle the race where a higher 814 * priority thread blocks on the write lock before the 815 * thread you wakeup actually runs and have the new thread 816 * "steal" the lock. For now it's a lot simpler to just 817 * wakeup all of the waiters. 818 * 819 * As above, if we fail, then another thread might have 820 * acquired a read lock, so drop the turnstile lock and 821 * restart. 822 */ 823 setv = RW_UNLOCKED; 824 queue = TS_SHARED_QUEUE; 825 if (v & RW_LOCK_WRITE_WAITERS) { 826 queue = TS_EXCLUSIVE_QUEUE; 827 setv |= (v & RW_LOCK_READ_WAITERS); 828 } 829 setv |= (v & RW_LOCK_WRITE_SPINNER); 830 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv)) 831 continue; 832 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 833 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 834 __func__, rw); 835 836 /* 837 * Ok. The lock is released and all that's left is to 838 * wake up the waiters. Note that the lock might not be 839 * free anymore, but in that case the writers will just 840 * block again if they run before the new lock holder(s) 841 * release the lock. 842 */ 843 ts = turnstile_lookup(&rw->lock_object); 844 MPASS(ts != NULL); 845 turnstile_broadcast(ts, queue); 846 turnstile_unpend(ts); 847 td->td_rw_rlocks--; 848 break; 849 } 850 turnstile_chain_unlock(&rw->lock_object); 851 out_lockstat: 852 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); 853 } 854 855 void 856 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 857 { 858 struct thread *td; 859 uintptr_t v; 860 861 KASSERT(rw->rw_lock != RW_DESTROYED, 862 ("rw_runlock() of destroyed rwlock %p @ %s:%d", rw, file, line)); 863 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 864 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 865 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 866 867 td = curthread; 868 v = RW_READ_VALUE(rw); 869 870 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || 871 !__rw_runlock_try(rw, td, &v))) 872 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG); 873 else 874 lock_profile_release_lock(&rw->lock_object, false); 875 876 TD_LOCKS_DEC(curthread); 877 } 878 879 void 880 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) 881 { 882 struct rwlock *rw; 883 884 rw = rwlock2rw(c); 885 _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG); 886 } 887 888 #ifdef ADAPTIVE_RWLOCKS 889 static inline void 890 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work) 891 { 892 893 if (v & RW_LOCK_WRITE_SPINNER) 894 return; 895 if (*in_critical) { 896 critical_exit(); 897 *in_critical = false; 898 (*extra_work)--; 899 } 900 } 901 #else 902 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0) 903 #endif 904 905 /* 906 * This function is called when we are unable to obtain a write lock on the 907 * first try. This means that at least one other thread holds either a 908 * read or write lock. 909 */ 910 void 911 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 912 { 913 uintptr_t tid; 914 struct rwlock *rw; 915 struct turnstile *ts; 916 struct thread *owner; 917 #ifdef ADAPTIVE_RWLOCKS 918 int spintries = 0; 919 int i, n; 920 enum { READERS, WRITER } sleep_reason = READERS; 921 bool in_critical = false; 922 #endif 923 uintptr_t setv; 924 #ifdef LOCK_PROFILING 925 uint64_t waittime = 0; 926 int contested = 0; 927 #endif 928 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS) 929 struct lock_delay_arg lda; 930 #endif 931 #ifdef KDTRACE_HOOKS 932 u_int sleep_cnt = 0; 933 int64_t sleep_time = 0; 934 int64_t all_time = 0; 935 uintptr_t state = 0; 936 #endif 937 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 938 int doing_lockprof = 0; 939 #endif 940 int extra_work = 0; 941 942 tid = (uintptr_t)curthread; 943 rw = rwlock2rw(c); 944 945 #ifdef KDTRACE_HOOKS 946 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { 947 while (v == RW_UNLOCKED) { 948 if (_rw_write_lock_fetch(rw, &v, tid)) 949 goto out_lockstat; 950 } 951 extra_work = 1; 952 doing_lockprof = 1; 953 all_time -= lockstat_nsecs(&rw->lock_object); 954 } 955 state = v; 956 #endif 957 #ifdef LOCK_PROFILING 958 extra_work = 1; 959 doing_lockprof = 1; 960 #endif 961 962 if (SCHEDULER_STOPPED()) 963 return; 964 965 if (__predict_false(v == RW_UNLOCKED)) 966 v = RW_READ_VALUE(rw); 967 968 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { 969 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 970 ("%s: recursing but non-recursive rw %p @ %s:%d\n", 971 __func__, rw, file, line)); 972 rw->rw_recurse++; 973 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 974 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 975 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 976 return; 977 } 978 979 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 980 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 981 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 982 983 #if defined(ADAPTIVE_RWLOCKS) 984 lock_delay_arg_init(&lda, &rw_delay); 985 #elif defined(KDTRACE_HOOKS) 986 lock_delay_arg_init_noadapt(&lda); 987 #endif 988 989 #ifdef HWPMC_HOOKS 990 PMC_SOFT_CALL( , , lock, failed); 991 #endif 992 lock_profile_obtain_lock_failed(&rw->lock_object, false, 993 &contested, &waittime); 994 995 THREAD_CONTENDS_ON_LOCK(&rw->lock_object); 996 997 for (;;) { 998 if (v == RW_UNLOCKED) { 999 if (_rw_write_lock_fetch(rw, &v, tid)) 1000 break; 1001 continue; 1002 } 1003 #ifdef KDTRACE_HOOKS 1004 lda.spin_cnt++; 1005 #endif 1006 1007 #ifdef ADAPTIVE_RWLOCKS 1008 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) { 1009 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid)) 1010 break; 1011 continue; 1012 } 1013 1014 /* 1015 * If the lock is write locked and the owner is 1016 * running on another CPU, spin until the owner stops 1017 * running or the state of the lock changes. 1018 */ 1019 if (!(v & RW_LOCK_READ)) { 1020 rw_drop_critical(v, &in_critical, &extra_work); 1021 sleep_reason = WRITER; 1022 owner = lv_rw_wowner(v); 1023 if (!TD_IS_RUNNING(owner)) 1024 goto ts; 1025 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1026 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 1027 __func__, rw, owner); 1028 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1029 "spinning", "lockname:\"%s\"", 1030 rw->lock_object.lo_name); 1031 do { 1032 lock_delay(&lda); 1033 v = RW_READ_VALUE(rw); 1034 owner = lv_rw_wowner(v); 1035 } while (owner != NULL && TD_IS_RUNNING(owner)); 1036 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1037 "running"); 1038 continue; 1039 } else if (RW_READERS(v) > 0) { 1040 sleep_reason = READERS; 1041 if (spintries == rowner_retries) 1042 goto ts; 1043 if (!(v & RW_LOCK_WRITE_SPINNER)) { 1044 if (!in_critical) { 1045 critical_enter(); 1046 in_critical = true; 1047 extra_work++; 1048 } 1049 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1050 v | RW_LOCK_WRITE_SPINNER)) { 1051 critical_exit(); 1052 in_critical = false; 1053 extra_work--; 1054 continue; 1055 } 1056 } 1057 spintries++; 1058 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), 1059 "spinning", "lockname:\"%s\"", 1060 rw->lock_object.lo_name); 1061 n = RW_READERS(v); 1062 for (i = 0; i < rowner_loops; i += n) { 1063 lock_delay_spin(n); 1064 v = RW_READ_VALUE(rw); 1065 if (!(v & RW_LOCK_WRITE_SPINNER)) 1066 break; 1067 if (!(v & RW_LOCK_READ)) 1068 break; 1069 n = RW_READERS(v); 1070 if (n == 0) 1071 break; 1072 } 1073 #ifdef KDTRACE_HOOKS 1074 lda.spin_cnt += i; 1075 #endif 1076 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), 1077 "running"); 1078 if (i < rowner_loops) 1079 continue; 1080 } 1081 ts: 1082 #endif 1083 ts = turnstile_trywait(&rw->lock_object); 1084 v = RW_READ_VALUE(rw); 1085 retry_ts: 1086 owner = lv_rw_wowner(v); 1087 1088 #ifdef ADAPTIVE_RWLOCKS 1089 /* 1090 * The current lock owner might have started executing 1091 * on another CPU (or the lock could have changed 1092 * owners) while we were waiting on the turnstile 1093 * chain lock. If so, drop the turnstile lock and try 1094 * again. 1095 */ 1096 if (owner != NULL) { 1097 if (TD_IS_RUNNING(owner)) { 1098 turnstile_cancel(ts); 1099 rw_drop_critical(v, &in_critical, &extra_work); 1100 continue; 1101 } 1102 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) { 1103 turnstile_cancel(ts); 1104 rw_drop_critical(v, &in_critical, &extra_work); 1105 continue; 1106 } 1107 #endif 1108 /* 1109 * Check for the waiters flags about this rwlock. 1110 * If the lock was released, without maintain any pending 1111 * waiters queue, simply try to acquire it. 1112 * If a pending waiters queue is present, claim the lock 1113 * ownership and maintain the pending queue. 1114 */ 1115 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 1116 if ((v & ~setv) == RW_UNLOCKED) { 1117 setv &= ~RW_LOCK_WRITE_SPINNER; 1118 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) { 1119 if (setv) 1120 turnstile_claim(ts); 1121 else 1122 turnstile_cancel(ts); 1123 break; 1124 } 1125 goto retry_ts; 1126 } 1127 1128 #ifdef ADAPTIVE_RWLOCKS 1129 if (in_critical) { 1130 if ((v & RW_LOCK_WRITE_SPINNER) || 1131 !((v & RW_LOCK_WRITE_WAITERS))) { 1132 setv = v & ~RW_LOCK_WRITE_SPINNER; 1133 setv |= RW_LOCK_WRITE_WAITERS; 1134 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv)) 1135 goto retry_ts; 1136 } 1137 critical_exit(); 1138 in_critical = false; 1139 extra_work--; 1140 } else { 1141 #endif 1142 /* 1143 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 1144 * set it. If we fail to set it, then loop back and try 1145 * again. 1146 */ 1147 if (!(v & RW_LOCK_WRITE_WAITERS)) { 1148 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, 1149 v | RW_LOCK_WRITE_WAITERS)) 1150 goto retry_ts; 1151 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1152 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 1153 __func__, rw); 1154 } 1155 #ifdef ADAPTIVE_RWLOCKS 1156 } 1157 #endif 1158 /* 1159 * We were unable to acquire the lock and the write waiters 1160 * flag is set, so we must block on the turnstile. 1161 */ 1162 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1163 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 1164 rw); 1165 #ifdef KDTRACE_HOOKS 1166 sleep_time -= lockstat_nsecs(&rw->lock_object); 1167 #endif 1168 MPASS(owner == rw_owner(rw)); 1169 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); 1170 #ifdef KDTRACE_HOOKS 1171 sleep_time += lockstat_nsecs(&rw->lock_object); 1172 sleep_cnt++; 1173 #endif 1174 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1175 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 1176 __func__, rw); 1177 #ifdef ADAPTIVE_RWLOCKS 1178 spintries = 0; 1179 #endif 1180 v = RW_READ_VALUE(rw); 1181 } 1182 THREAD_CONTENTION_DONE(&rw->lock_object); 1183 if (__predict_true(!extra_work)) 1184 return; 1185 #ifdef ADAPTIVE_RWLOCKS 1186 if (in_critical) 1187 critical_exit(); 1188 #endif 1189 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) 1190 if (__predict_true(!doing_lockprof)) 1191 return; 1192 #endif 1193 #ifdef KDTRACE_HOOKS 1194 all_time += lockstat_nsecs(&rw->lock_object); 1195 if (sleep_time) 1196 LOCKSTAT_RECORD4(rw__block, rw, sleep_time, 1197 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1198 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1199 1200 /* Record only the loops spinning and not sleeping. */ 1201 if (lda.spin_cnt > sleep_cnt) 1202 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, 1203 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, 1204 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); 1205 out_lockstat: 1206 #endif 1207 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, 1208 waittime, file, line, LOCKSTAT_WRITER); 1209 } 1210 1211 /* 1212 * This function is called if lockstat is active or the first try at releasing 1213 * a write lock failed. The latter means that the lock is recursed or one of 1214 * the 2 waiter bits must be set indicating that at least one thread is waiting 1215 * on this lock. 1216 */ 1217 void 1218 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) 1219 { 1220 struct rwlock *rw; 1221 struct turnstile *ts; 1222 uintptr_t tid, setv; 1223 int queue; 1224 1225 tid = (uintptr_t)curthread; 1226 if (SCHEDULER_STOPPED()) 1227 return; 1228 1229 rw = rwlock2rw(c); 1230 if (__predict_false(v == tid)) 1231 v = RW_READ_VALUE(rw); 1232 1233 if (v & RW_LOCK_WRITER_RECURSED) { 1234 if (--(rw->rw_recurse) == 0) 1235 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); 1236 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1237 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 1238 return; 1239 } 1240 1241 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER); 1242 if (v == tid && _rw_write_unlock(rw, tid)) 1243 return; 1244 1245 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 1246 ("%s: neither of the waiter flags are set", __func__)); 1247 1248 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1249 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 1250 1251 turnstile_chain_lock(&rw->lock_object); 1252 1253 /* 1254 * Use the same algo as sx locks for now. Prefer waking up shared 1255 * waiters if we have any over writers. This is probably not ideal. 1256 * 1257 * 'v' is the value we are going to write back to rw_lock. If we 1258 * have waiters on both queues, we need to preserve the state of 1259 * the waiter flag for the queue we don't wake up. For now this is 1260 * hardcoded for the algorithm mentioned above. 1261 * 1262 * In the case of both readers and writers waiting we wakeup the 1263 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 1264 * new writer comes in before a reader it will claim the lock up 1265 * above. There is probably a potential priority inversion in 1266 * there that could be worked around either by waking both queues 1267 * of waiters or doing some complicated lock handoff gymnastics. 1268 */ 1269 setv = RW_UNLOCKED; 1270 v = RW_READ_VALUE(rw); 1271 queue = TS_SHARED_QUEUE; 1272 if (v & RW_LOCK_WRITE_WAITERS) { 1273 queue = TS_EXCLUSIVE_QUEUE; 1274 setv |= (v & RW_LOCK_READ_WAITERS); 1275 } 1276 atomic_store_rel_ptr(&rw->rw_lock, setv); 1277 1278 /* Wake up all waiters for the specific queue. */ 1279 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 1280 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 1281 queue == TS_SHARED_QUEUE ? "read" : "write"); 1282 1283 ts = turnstile_lookup(&rw->lock_object); 1284 MPASS(ts != NULL); 1285 turnstile_broadcast(ts, queue); 1286 turnstile_unpend(ts); 1287 turnstile_chain_unlock(&rw->lock_object); 1288 } 1289 1290 /* 1291 * Attempt to do a non-blocking upgrade from a read lock to a write 1292 * lock. This will only succeed if this thread holds a single read 1293 * lock. Returns true if the upgrade succeeded and false otherwise. 1294 */ 1295 int 1296 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1297 { 1298 uintptr_t v, setv, tid; 1299 struct turnstile *ts; 1300 int success; 1301 1302 if (SCHEDULER_STOPPED()) 1303 return (1); 1304 1305 KASSERT(rw->rw_lock != RW_DESTROYED, 1306 ("rw_try_upgrade() of destroyed rwlock %p @ %s:%d", rw, file, 1307 line)); 1308 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line); 1309 1310 /* 1311 * Attempt to switch from one reader to a writer. If there 1312 * are any write waiters, then we will have to lock the 1313 * turnstile first to prevent races with another writer 1314 * calling turnstile_wait() before we have claimed this 1315 * turnstile. So, do the simple case of no waiters first. 1316 */ 1317 tid = (uintptr_t)curthread; 1318 success = 0; 1319 v = RW_READ_VALUE(rw); 1320 for (;;) { 1321 if (RW_READERS(v) > 1) 1322 break; 1323 if (!(v & RW_LOCK_WAITERS)) { 1324 success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid); 1325 if (!success) 1326 continue; 1327 break; 1328 } 1329 1330 /* 1331 * Ok, we think we have waiters, so lock the turnstile. 1332 */ 1333 ts = turnstile_trywait(&rw->lock_object); 1334 v = RW_READ_VALUE(rw); 1335 retry_ts: 1336 if (RW_READERS(v) > 1) { 1337 turnstile_cancel(ts); 1338 break; 1339 } 1340 /* 1341 * Try to switch from one reader to a writer again. This time 1342 * we honor the current state of the waiters flags. 1343 * If we obtain the lock with the flags set, then claim 1344 * ownership of the turnstile. 1345 */ 1346 setv = tid | (v & RW_LOCK_WAITERS); 1347 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv); 1348 if (success) { 1349 if (v & RW_LOCK_WAITERS) 1350 turnstile_claim(ts); 1351 else 1352 turnstile_cancel(ts); 1353 break; 1354 } 1355 goto retry_ts; 1356 } 1357 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 1358 if (success) { 1359 curthread->td_rw_rlocks--; 1360 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 1361 file, line); 1362 LOCKSTAT_RECORD0(rw__upgrade, rw); 1363 } 1364 return (success); 1365 } 1366 1367 int 1368 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) 1369 { 1370 struct rwlock *rw; 1371 1372 rw = rwlock2rw(c); 1373 return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG)); 1374 } 1375 1376 /* 1377 * Downgrade a write lock into a single read lock. 1378 */ 1379 void 1380 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF) 1381 { 1382 struct turnstile *ts; 1383 uintptr_t tid, v; 1384 int rwait, wwait; 1385 1386 if (SCHEDULER_STOPPED()) 1387 return; 1388 1389 KASSERT(rw->rw_lock != RW_DESTROYED, 1390 ("rw_downgrade() of destroyed rwlock %p @ %s:%d", rw, file, line)); 1391 __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line); 1392 #ifndef INVARIANTS 1393 if (rw_recursed(rw)) 1394 panic("downgrade of a recursed lock"); 1395 #endif 1396 1397 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1398 1399 /* 1400 * Convert from a writer to a single reader. First we handle 1401 * the easy case with no waiters. If there are any waiters, we 1402 * lock the turnstile and "disown" the lock. 1403 */ 1404 tid = (uintptr_t)curthread; 1405 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1406 goto out; 1407 1408 /* 1409 * Ok, we think we have waiters, so lock the turnstile so we can 1410 * read the waiter flags without any races. 1411 */ 1412 turnstile_chain_lock(&rw->lock_object); 1413 v = rw->rw_lock & RW_LOCK_WAITERS; 1414 rwait = v & RW_LOCK_READ_WAITERS; 1415 wwait = v & RW_LOCK_WRITE_WAITERS; 1416 MPASS(rwait | wwait); 1417 1418 /* 1419 * Downgrade from a write lock while preserving waiters flag 1420 * and give up ownership of the turnstile. 1421 */ 1422 ts = turnstile_lookup(&rw->lock_object); 1423 MPASS(ts != NULL); 1424 if (!wwait) 1425 v &= ~RW_LOCK_READ_WAITERS; 1426 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1427 /* 1428 * Wake other readers if there are no writers pending. Otherwise they 1429 * won't be able to acquire the lock anyway. 1430 */ 1431 if (rwait && !wwait) { 1432 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1433 turnstile_unpend(ts); 1434 } else 1435 turnstile_disown(ts); 1436 turnstile_chain_unlock(&rw->lock_object); 1437 out: 1438 curthread->td_rw_rlocks++; 1439 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1440 LOCKSTAT_RECORD0(rw__downgrade, rw); 1441 } 1442 1443 void 1444 __rw_downgrade(volatile uintptr_t *c, const char *file, int line) 1445 { 1446 struct rwlock *rw; 1447 1448 rw = rwlock2rw(c); 1449 __rw_downgrade_int(rw LOCK_FILE_LINE_ARG); 1450 } 1451 1452 #ifdef INVARIANT_SUPPORT 1453 #ifndef INVARIANTS 1454 #undef __rw_assert 1455 #endif 1456 1457 /* 1458 * In the non-WITNESS case, rw_assert() can only detect that at least 1459 * *some* thread owns an rlock, but it cannot guarantee that *this* 1460 * thread owns an rlock. 1461 */ 1462 void 1463 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) 1464 { 1465 const struct rwlock *rw; 1466 1467 if (SCHEDULER_STOPPED()) 1468 return; 1469 1470 rw = rwlock2rw(c); 1471 1472 switch (what) { 1473 case RA_LOCKED: 1474 case RA_LOCKED | RA_RECURSED: 1475 case RA_LOCKED | RA_NOTRECURSED: 1476 case RA_RLOCKED: 1477 case RA_RLOCKED | RA_RECURSED: 1478 case RA_RLOCKED | RA_NOTRECURSED: 1479 #ifdef WITNESS 1480 witness_assert(&rw->lock_object, what, file, line); 1481 #else 1482 /* 1483 * If some other thread has a write lock or we have one 1484 * and are asserting a read lock, fail. Also, if no one 1485 * has a lock at all, fail. 1486 */ 1487 if (rw->rw_lock == RW_UNLOCKED || 1488 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || 1489 rw_wowner(rw) != curthread))) 1490 panic("Lock %s not %slocked @ %s:%d\n", 1491 rw->lock_object.lo_name, (what & RA_RLOCKED) ? 1492 "read " : "", file, line); 1493 1494 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { 1495 if (rw_recursed(rw)) { 1496 if (what & RA_NOTRECURSED) 1497 panic("Lock %s recursed @ %s:%d\n", 1498 rw->lock_object.lo_name, file, 1499 line); 1500 } else if (what & RA_RECURSED) 1501 panic("Lock %s not recursed @ %s:%d\n", 1502 rw->lock_object.lo_name, file, line); 1503 } 1504 #endif 1505 break; 1506 case RA_WLOCKED: 1507 case RA_WLOCKED | RA_RECURSED: 1508 case RA_WLOCKED | RA_NOTRECURSED: 1509 if (rw_wowner(rw) != curthread) 1510 panic("Lock %s not exclusively locked @ %s:%d\n", 1511 rw->lock_object.lo_name, file, line); 1512 if (rw_recursed(rw)) { 1513 if (what & RA_NOTRECURSED) 1514 panic("Lock %s recursed @ %s:%d\n", 1515 rw->lock_object.lo_name, file, line); 1516 } else if (what & RA_RECURSED) 1517 panic("Lock %s not recursed @ %s:%d\n", 1518 rw->lock_object.lo_name, file, line); 1519 break; 1520 case RA_UNLOCKED: 1521 #ifdef WITNESS 1522 witness_assert(&rw->lock_object, what, file, line); 1523 #else 1524 /* 1525 * If we hold a write lock fail. We can't reliably check 1526 * to see if we hold a read lock or not. 1527 */ 1528 if (rw_wowner(rw) == curthread) 1529 panic("Lock %s exclusively locked @ %s:%d\n", 1530 rw->lock_object.lo_name, file, line); 1531 #endif 1532 break; 1533 default: 1534 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1535 line); 1536 } 1537 } 1538 #endif /* INVARIANT_SUPPORT */ 1539 1540 #ifdef DDB 1541 static void 1542 db_show_rwlock(const struct lock_object *lock) 1543 { 1544 const struct rwlock *rw; 1545 struct thread *td; 1546 1547 rw = (const struct rwlock *)lock; 1548 1549 db_printf(" state: "); 1550 if (rw->rw_lock == RW_UNLOCKED) 1551 db_printf("UNLOCKED\n"); 1552 else if (rw->rw_lock == RW_DESTROYED) { 1553 db_printf("DESTROYED\n"); 1554 return; 1555 } else if (rw->rw_lock & RW_LOCK_READ) 1556 db_printf("RLOCK: %ju locks\n", 1557 (uintmax_t)(RW_READERS(rw->rw_lock))); 1558 else { 1559 td = rw_wowner(rw); 1560 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1561 td->td_tid, td->td_proc->p_pid, td->td_name); 1562 if (rw_recursed(rw)) 1563 db_printf(" recursed: %u\n", rw->rw_recurse); 1564 } 1565 db_printf(" waiters: "); 1566 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1567 case RW_LOCK_READ_WAITERS: 1568 db_printf("readers\n"); 1569 break; 1570 case RW_LOCK_WRITE_WAITERS: 1571 db_printf("writers\n"); 1572 break; 1573 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1574 db_printf("readers and writers\n"); 1575 break; 1576 default: 1577 db_printf("none\n"); 1578 break; 1579 } 1580 } 1581 1582 #endif 1583