1 /*- 2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * Machine independent bits of reader/writer lock implementation. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ddb.h" 38 #include "opt_hwpmc_hooks.h" 39 #include "opt_kdtrace.h" 40 #include "opt_no_adaptive_rwlocks.h" 41 42 #include <sys/param.h> 43 #include <sys/ktr.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/rwlock.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 #include <sys/turnstile.h> 52 53 #include <machine/cpu.h> 54 55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 56 #define ADAPTIVE_RWLOCKS 57 #endif 58 59 #ifdef HWPMC_HOOKS 60 #include <sys/pmckern.h> 61 PMC_SOFT_DECLARE( , , lock, failed); 62 #endif 63 64 #ifdef ADAPTIVE_RWLOCKS 65 static int rowner_retries = 10; 66 static int rowner_loops = 10000; 67 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, 68 "rwlock debugging"); 69 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 70 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 71 #endif 72 73 #ifdef DDB 74 #include <ddb/ddb.h> 75 76 static void db_show_rwlock(const struct lock_object *lock); 77 #endif 78 static void assert_rw(const struct lock_object *lock, int what); 79 static void lock_rw(struct lock_object *lock, int how); 80 #ifdef KDTRACE_HOOKS 81 static int owner_rw(const struct lock_object *lock, struct thread **owner); 82 #endif 83 static int unlock_rw(struct lock_object *lock); 84 85 struct lock_class lock_class_rw = { 86 .lc_name = "rw", 87 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 88 .lc_assert = assert_rw, 89 #ifdef DDB 90 .lc_ddb_show = db_show_rwlock, 91 #endif 92 .lc_lock = lock_rw, 93 .lc_unlock = unlock_rw, 94 #ifdef KDTRACE_HOOKS 95 .lc_owner = owner_rw, 96 #endif 97 }; 98 99 /* 100 * Return a pointer to the owning thread if the lock is write-locked or 101 * NULL if the lock is unlocked or read-locked. 102 */ 103 #define rw_wowner(rw) \ 104 ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 105 (struct thread *)RW_OWNER((rw)->rw_lock)) 106 107 /* 108 * Returns if a write owner is recursed. Write ownership is not assured 109 * here and should be previously checked. 110 */ 111 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 112 113 /* 114 * Return true if curthread helds the lock. 115 */ 116 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 117 118 /* 119 * Return a pointer to the owning thread for this lock who should receive 120 * any priority lent by threads that block on this lock. Currently this 121 * is identical to rw_wowner(). 122 */ 123 #define rw_owner(rw) rw_wowner(rw) 124 125 #ifndef INVARIANTS 126 #define _rw_assert(rw, what, file, line) 127 #endif 128 129 void 130 assert_rw(const struct lock_object *lock, int what) 131 { 132 133 rw_assert((const struct rwlock *)lock, what); 134 } 135 136 void 137 lock_rw(struct lock_object *lock, int how) 138 { 139 struct rwlock *rw; 140 141 rw = (struct rwlock *)lock; 142 if (how) 143 rw_wlock(rw); 144 else 145 rw_rlock(rw); 146 } 147 148 int 149 unlock_rw(struct lock_object *lock) 150 { 151 struct rwlock *rw; 152 153 rw = (struct rwlock *)lock; 154 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 155 if (rw->rw_lock & RW_LOCK_READ) { 156 rw_runlock(rw); 157 return (0); 158 } else { 159 rw_wunlock(rw); 160 return (1); 161 } 162 } 163 164 #ifdef KDTRACE_HOOKS 165 int 166 owner_rw(const struct lock_object *lock, struct thread **owner) 167 { 168 const struct rwlock *rw = (const struct rwlock *)lock; 169 uintptr_t x = rw->rw_lock; 170 171 *owner = rw_wowner(rw); 172 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 173 (*owner != NULL)); 174 } 175 #endif 176 177 void 178 rw_init_flags(struct rwlock *rw, const char *name, int opts) 179 { 180 int flags; 181 182 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 183 RW_RECURSE)) == 0); 184 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 185 ("%s: rw_lock not aligned for %s: %p", __func__, name, 186 &rw->rw_lock)); 187 188 flags = LO_UPGRADABLE; 189 if (opts & RW_DUPOK) 190 flags |= LO_DUPOK; 191 if (opts & RW_NOPROFILE) 192 flags |= LO_NOPROFILE; 193 if (!(opts & RW_NOWITNESS)) 194 flags |= LO_WITNESS; 195 if (opts & RW_RECURSE) 196 flags |= LO_RECURSABLE; 197 if (opts & RW_QUIET) 198 flags |= LO_QUIET; 199 200 rw->rw_lock = RW_UNLOCKED; 201 rw->rw_recurse = 0; 202 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 203 } 204 205 void 206 rw_destroy(struct rwlock *rw) 207 { 208 209 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 210 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 211 rw->rw_lock = RW_DESTROYED; 212 lock_destroy(&rw->lock_object); 213 } 214 215 void 216 rw_sysinit(void *arg) 217 { 218 struct rw_args *args = arg; 219 220 rw_init(args->ra_rw, args->ra_desc); 221 } 222 223 void 224 rw_sysinit_flags(void *arg) 225 { 226 struct rw_args_flags *args = arg; 227 228 rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags); 229 } 230 231 int 232 rw_wowned(const struct rwlock *rw) 233 { 234 235 return (rw_wowner(rw) == curthread); 236 } 237 238 void 239 _rw_wlock(struct rwlock *rw, const char *file, int line) 240 { 241 242 if (SCHEDULER_STOPPED()) 243 return; 244 MPASS(curthread != NULL); 245 KASSERT(rw->rw_lock != RW_DESTROYED, 246 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 247 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 248 line, NULL); 249 __rw_wlock(rw, curthread, file, line); 250 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 251 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 252 curthread->td_locks++; 253 } 254 255 int 256 _rw_try_wlock(struct rwlock *rw, const char *file, int line) 257 { 258 int rval; 259 260 if (SCHEDULER_STOPPED()) 261 return (1); 262 263 KASSERT(rw->rw_lock != RW_DESTROYED, 264 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 265 266 if (rw_wlocked(rw) && 267 (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) { 268 rw->rw_recurse++; 269 rval = 1; 270 } else 271 rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED, 272 (uintptr_t)curthread); 273 274 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 275 if (rval) { 276 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 277 file, line); 278 curthread->td_locks++; 279 } 280 return (rval); 281 } 282 283 void 284 _rw_wunlock(struct rwlock *rw, const char *file, int line) 285 { 286 287 if (SCHEDULER_STOPPED()) 288 return; 289 MPASS(curthread != NULL); 290 KASSERT(rw->rw_lock != RW_DESTROYED, 291 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 292 _rw_assert(rw, RA_WLOCKED, file, line); 293 curthread->td_locks--; 294 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 295 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 296 line); 297 if (!rw_recursed(rw)) 298 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw); 299 __rw_wunlock(rw, curthread, file, line); 300 } 301 /* 302 * Determines whether a new reader can acquire a lock. Succeeds if the 303 * reader already owns a read lock and the lock is locked for read to 304 * prevent deadlock from reader recursion. Also succeeds if the lock 305 * is unlocked and has no writer waiters or spinners. Failing otherwise 306 * prioritizes writers before readers. 307 */ 308 #define RW_CAN_READ(_rw) \ 309 ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \ 310 (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \ 311 RW_LOCK_READ) 312 313 void 314 _rw_rlock(struct rwlock *rw, const char *file, int line) 315 { 316 struct turnstile *ts; 317 #ifdef ADAPTIVE_RWLOCKS 318 volatile struct thread *owner; 319 int spintries = 0; 320 int i; 321 #endif 322 #ifdef LOCK_PROFILING 323 uint64_t waittime = 0; 324 int contested = 0; 325 #endif 326 uintptr_t v; 327 #ifdef KDTRACE_HOOKS 328 uint64_t spin_cnt = 0; 329 uint64_t sleep_cnt = 0; 330 int64_t sleep_time = 0; 331 #endif 332 333 if (SCHEDULER_STOPPED()) 334 return; 335 336 KASSERT(rw->rw_lock != RW_DESTROYED, 337 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 338 KASSERT(rw_wowner(rw) != curthread, 339 ("%s (%s): wlock already held @ %s:%d", __func__, 340 rw->lock_object.lo_name, file, line)); 341 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 342 343 for (;;) { 344 #ifdef KDTRACE_HOOKS 345 spin_cnt++; 346 #endif 347 /* 348 * Handle the easy case. If no other thread has a write 349 * lock, then try to bump up the count of read locks. Note 350 * that we have to preserve the current state of the 351 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 352 * read lock, then rw_lock must have changed, so restart 353 * the loop. Note that this handles the case of a 354 * completely unlocked rwlock since such a lock is encoded 355 * as a read lock with no waiters. 356 */ 357 v = rw->rw_lock; 358 if (RW_CAN_READ(v)) { 359 /* 360 * The RW_LOCK_READ_WAITERS flag should only be set 361 * if the lock has been unlocked and write waiters 362 * were present. 363 */ 364 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, 365 v + RW_ONE_READER)) { 366 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 367 CTR4(KTR_LOCK, 368 "%s: %p succeed %p -> %p", __func__, 369 rw, (void *)v, 370 (void *)(v + RW_ONE_READER)); 371 break; 372 } 373 continue; 374 } 375 #ifdef HWPMC_HOOKS 376 PMC_SOFT_CALL( , , lock, failed); 377 #endif 378 lock_profile_obtain_lock_failed(&rw->lock_object, 379 &contested, &waittime); 380 381 #ifdef ADAPTIVE_RWLOCKS 382 /* 383 * If the owner is running on another CPU, spin until 384 * the owner stops running or the state of the lock 385 * changes. 386 */ 387 if ((v & RW_LOCK_READ) == 0) { 388 owner = (struct thread *)RW_OWNER(v); 389 if (TD_IS_RUNNING(owner)) { 390 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 391 CTR3(KTR_LOCK, 392 "%s: spinning on %p held by %p", 393 __func__, rw, owner); 394 while ((struct thread*)RW_OWNER(rw->rw_lock) == 395 owner && TD_IS_RUNNING(owner)) { 396 cpu_spinwait(); 397 #ifdef KDTRACE_HOOKS 398 spin_cnt++; 399 #endif 400 } 401 continue; 402 } 403 } else if (spintries < rowner_retries) { 404 spintries++; 405 for (i = 0; i < rowner_loops; i++) { 406 v = rw->rw_lock; 407 if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v)) 408 break; 409 cpu_spinwait(); 410 } 411 if (i != rowner_loops) 412 continue; 413 } 414 #endif 415 416 /* 417 * Okay, now it's the hard case. Some other thread already 418 * has a write lock or there are write waiters present, 419 * acquire the turnstile lock so we can begin the process 420 * of blocking. 421 */ 422 ts = turnstile_trywait(&rw->lock_object); 423 424 /* 425 * The lock might have been released while we spun, so 426 * recheck its state and restart the loop if needed. 427 */ 428 v = rw->rw_lock; 429 if (RW_CAN_READ(v)) { 430 turnstile_cancel(ts); 431 continue; 432 } 433 434 #ifdef ADAPTIVE_RWLOCKS 435 /* 436 * The current lock owner might have started executing 437 * on another CPU (or the lock could have changed 438 * owners) while we were waiting on the turnstile 439 * chain lock. If so, drop the turnstile lock and try 440 * again. 441 */ 442 if ((v & RW_LOCK_READ) == 0) { 443 owner = (struct thread *)RW_OWNER(v); 444 if (TD_IS_RUNNING(owner)) { 445 turnstile_cancel(ts); 446 continue; 447 } 448 } 449 #endif 450 451 /* 452 * The lock is held in write mode or it already has waiters. 453 */ 454 MPASS(!RW_CAN_READ(v)); 455 456 /* 457 * If the RW_LOCK_READ_WAITERS flag is already set, then 458 * we can go ahead and block. If it is not set then try 459 * to set it. If we fail to set it drop the turnstile 460 * lock and restart the loop. 461 */ 462 if (!(v & RW_LOCK_READ_WAITERS)) { 463 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 464 v | RW_LOCK_READ_WAITERS)) { 465 turnstile_cancel(ts); 466 continue; 467 } 468 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 469 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 470 __func__, rw); 471 } 472 473 /* 474 * We were unable to acquire the lock and the read waiters 475 * flag is set, so we must block on the turnstile. 476 */ 477 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 478 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 479 rw); 480 #ifdef KDTRACE_HOOKS 481 sleep_time -= lockstat_nsecs(); 482 #endif 483 turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); 484 #ifdef KDTRACE_HOOKS 485 sleep_time += lockstat_nsecs(); 486 sleep_cnt++; 487 #endif 488 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 489 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 490 __func__, rw); 491 } 492 493 /* 494 * TODO: acquire "owner of record" here. Here be turnstile dragons 495 * however. turnstiles don't like owners changing between calls to 496 * turnstile_wait() currently. 497 */ 498 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested, 499 waittime, file, line); 500 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 501 WITNESS_LOCK(&rw->lock_object, 0, file, line); 502 curthread->td_locks++; 503 curthread->td_rw_rlocks++; 504 #ifdef KDTRACE_HOOKS 505 if (sleep_time) 506 LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time); 507 508 /* 509 * Record only the loops spinning and not sleeping. 510 */ 511 if (spin_cnt > sleep_cnt) 512 LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); 513 #endif 514 } 515 516 int 517 _rw_try_rlock(struct rwlock *rw, const char *file, int line) 518 { 519 uintptr_t x; 520 521 if (SCHEDULER_STOPPED()) 522 return (1); 523 524 for (;;) { 525 x = rw->rw_lock; 526 KASSERT(rw->rw_lock != RW_DESTROYED, 527 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 528 if (!(x & RW_LOCK_READ)) 529 break; 530 if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { 531 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 532 line); 533 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 534 curthread->td_locks++; 535 curthread->td_rw_rlocks++; 536 return (1); 537 } 538 } 539 540 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 541 return (0); 542 } 543 544 void 545 _rw_runlock(struct rwlock *rw, const char *file, int line) 546 { 547 struct turnstile *ts; 548 uintptr_t x, v, queue; 549 550 if (SCHEDULER_STOPPED()) 551 return; 552 553 KASSERT(rw->rw_lock != RW_DESTROYED, 554 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 555 _rw_assert(rw, RA_RLOCKED, file, line); 556 curthread->td_locks--; 557 curthread->td_rw_rlocks--; 558 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 559 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 560 561 /* TODO: drop "owner of record" here. */ 562 563 for (;;) { 564 /* 565 * See if there is more than one read lock held. If so, 566 * just drop one and return. 567 */ 568 x = rw->rw_lock; 569 if (RW_READERS(x) > 1) { 570 if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, 571 x - RW_ONE_READER)) { 572 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 573 CTR4(KTR_LOCK, 574 "%s: %p succeeded %p -> %p", 575 __func__, rw, (void *)x, 576 (void *)(x - RW_ONE_READER)); 577 break; 578 } 579 continue; 580 } 581 /* 582 * If there aren't any waiters for a write lock, then try 583 * to drop it quickly. 584 */ 585 if (!(x & RW_LOCK_WAITERS)) { 586 MPASS((x & ~RW_LOCK_WRITE_SPINNER) == 587 RW_READERS_LOCK(1)); 588 if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, 589 RW_UNLOCKED)) { 590 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 591 CTR2(KTR_LOCK, "%s: %p last succeeded", 592 __func__, rw); 593 break; 594 } 595 continue; 596 } 597 /* 598 * Ok, we know we have waiters and we think we are the 599 * last reader, so grab the turnstile lock. 600 */ 601 turnstile_chain_lock(&rw->lock_object); 602 v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 603 MPASS(v & RW_LOCK_WAITERS); 604 605 /* 606 * Try to drop our lock leaving the lock in a unlocked 607 * state. 608 * 609 * If you wanted to do explicit lock handoff you'd have to 610 * do it here. You'd also want to use turnstile_signal() 611 * and you'd have to handle the race where a higher 612 * priority thread blocks on the write lock before the 613 * thread you wakeup actually runs and have the new thread 614 * "steal" the lock. For now it's a lot simpler to just 615 * wakeup all of the waiters. 616 * 617 * As above, if we fail, then another thread might have 618 * acquired a read lock, so drop the turnstile lock and 619 * restart. 620 */ 621 x = RW_UNLOCKED; 622 if (v & RW_LOCK_WRITE_WAITERS) { 623 queue = TS_EXCLUSIVE_QUEUE; 624 x |= (v & RW_LOCK_READ_WAITERS); 625 } else 626 queue = TS_SHARED_QUEUE; 627 if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 628 x)) { 629 turnstile_chain_unlock(&rw->lock_object); 630 continue; 631 } 632 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 633 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 634 __func__, rw); 635 636 /* 637 * Ok. The lock is released and all that's left is to 638 * wake up the waiters. Note that the lock might not be 639 * free anymore, but in that case the writers will just 640 * block again if they run before the new lock holder(s) 641 * release the lock. 642 */ 643 ts = turnstile_lookup(&rw->lock_object); 644 MPASS(ts != NULL); 645 turnstile_broadcast(ts, queue); 646 turnstile_unpend(ts, TS_SHARED_LOCK); 647 turnstile_chain_unlock(&rw->lock_object); 648 break; 649 } 650 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw); 651 } 652 653 /* 654 * This function is called when we are unable to obtain a write lock on the 655 * first try. This means that at least one other thread holds either a 656 * read or write lock. 657 */ 658 void 659 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 660 { 661 struct turnstile *ts; 662 #ifdef ADAPTIVE_RWLOCKS 663 volatile struct thread *owner; 664 int spintries = 0; 665 int i; 666 #endif 667 uintptr_t v, x; 668 #ifdef LOCK_PROFILING 669 uint64_t waittime = 0; 670 int contested = 0; 671 #endif 672 #ifdef KDTRACE_HOOKS 673 uint64_t spin_cnt = 0; 674 uint64_t sleep_cnt = 0; 675 int64_t sleep_time = 0; 676 #endif 677 678 if (SCHEDULER_STOPPED()) 679 return; 680 681 if (rw_wlocked(rw)) { 682 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 683 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 684 __func__, rw->lock_object.lo_name, file, line)); 685 rw->rw_recurse++; 686 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 687 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 688 return; 689 } 690 691 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 692 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 693 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 694 695 while (!_rw_write_lock(rw, tid)) { 696 #ifdef KDTRACE_HOOKS 697 spin_cnt++; 698 #endif 699 #ifdef HWPMC_HOOKS 700 PMC_SOFT_CALL( , , lock, failed); 701 #endif 702 lock_profile_obtain_lock_failed(&rw->lock_object, 703 &contested, &waittime); 704 #ifdef ADAPTIVE_RWLOCKS 705 /* 706 * If the lock is write locked and the owner is 707 * running on another CPU, spin until the owner stops 708 * running or the state of the lock changes. 709 */ 710 v = rw->rw_lock; 711 owner = (struct thread *)RW_OWNER(v); 712 if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 713 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 714 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 715 __func__, rw, owner); 716 while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && 717 TD_IS_RUNNING(owner)) { 718 cpu_spinwait(); 719 #ifdef KDTRACE_HOOKS 720 spin_cnt++; 721 #endif 722 } 723 continue; 724 } 725 if ((v & RW_LOCK_READ) && RW_READERS(v) && 726 spintries < rowner_retries) { 727 if (!(v & RW_LOCK_WRITE_SPINNER)) { 728 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 729 v | RW_LOCK_WRITE_SPINNER)) { 730 continue; 731 } 732 } 733 spintries++; 734 for (i = 0; i < rowner_loops; i++) { 735 if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) 736 break; 737 cpu_spinwait(); 738 } 739 #ifdef KDTRACE_HOOKS 740 spin_cnt += rowner_loops - i; 741 #endif 742 if (i != rowner_loops) 743 continue; 744 } 745 #endif 746 ts = turnstile_trywait(&rw->lock_object); 747 v = rw->rw_lock; 748 749 #ifdef ADAPTIVE_RWLOCKS 750 /* 751 * The current lock owner might have started executing 752 * on another CPU (or the lock could have changed 753 * owners) while we were waiting on the turnstile 754 * chain lock. If so, drop the turnstile lock and try 755 * again. 756 */ 757 if (!(v & RW_LOCK_READ)) { 758 owner = (struct thread *)RW_OWNER(v); 759 if (TD_IS_RUNNING(owner)) { 760 turnstile_cancel(ts); 761 continue; 762 } 763 } 764 #endif 765 /* 766 * Check for the waiters flags about this rwlock. 767 * If the lock was released, without maintain any pending 768 * waiters queue, simply try to acquire it. 769 * If a pending waiters queue is present, claim the lock 770 * ownership and maintain the pending queue. 771 */ 772 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 773 if ((v & ~x) == RW_UNLOCKED) { 774 x &= ~RW_LOCK_WRITE_SPINNER; 775 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { 776 if (x) 777 turnstile_claim(ts); 778 else 779 turnstile_cancel(ts); 780 break; 781 } 782 turnstile_cancel(ts); 783 continue; 784 } 785 /* 786 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 787 * set it. If we fail to set it, then loop back and try 788 * again. 789 */ 790 if (!(v & RW_LOCK_WRITE_WAITERS)) { 791 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 792 v | RW_LOCK_WRITE_WAITERS)) { 793 turnstile_cancel(ts); 794 continue; 795 } 796 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 797 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 798 __func__, rw); 799 } 800 /* 801 * We were unable to acquire the lock and the write waiters 802 * flag is set, so we must block on the turnstile. 803 */ 804 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 805 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 806 rw); 807 #ifdef KDTRACE_HOOKS 808 sleep_time -= lockstat_nsecs(); 809 #endif 810 turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); 811 #ifdef KDTRACE_HOOKS 812 sleep_time += lockstat_nsecs(); 813 sleep_cnt++; 814 #endif 815 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 816 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 817 __func__, rw); 818 #ifdef ADAPTIVE_RWLOCKS 819 spintries = 0; 820 #endif 821 } 822 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, 823 waittime, file, line); 824 #ifdef KDTRACE_HOOKS 825 if (sleep_time) 826 LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time); 827 828 /* 829 * Record only the loops spinning and not sleeping. 830 */ 831 if (spin_cnt > sleep_cnt) 832 LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); 833 #endif 834 } 835 836 /* 837 * This function is called if the first try at releasing a write lock failed. 838 * This means that one of the 2 waiter bits must be set indicating that at 839 * least one thread is waiting on this lock. 840 */ 841 void 842 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 843 { 844 struct turnstile *ts; 845 uintptr_t v; 846 int queue; 847 848 if (SCHEDULER_STOPPED()) 849 return; 850 851 if (rw_wlocked(rw) && rw_recursed(rw)) { 852 rw->rw_recurse--; 853 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 854 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 855 return; 856 } 857 858 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 859 ("%s: neither of the waiter flags are set", __func__)); 860 861 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 862 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 863 864 turnstile_chain_lock(&rw->lock_object); 865 ts = turnstile_lookup(&rw->lock_object); 866 MPASS(ts != NULL); 867 868 /* 869 * Use the same algo as sx locks for now. Prefer waking up shared 870 * waiters if we have any over writers. This is probably not ideal. 871 * 872 * 'v' is the value we are going to write back to rw_lock. If we 873 * have waiters on both queues, we need to preserve the state of 874 * the waiter flag for the queue we don't wake up. For now this is 875 * hardcoded for the algorithm mentioned above. 876 * 877 * In the case of both readers and writers waiting we wakeup the 878 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 879 * new writer comes in before a reader it will claim the lock up 880 * above. There is probably a potential priority inversion in 881 * there that could be worked around either by waking both queues 882 * of waiters or doing some complicated lock handoff gymnastics. 883 */ 884 v = RW_UNLOCKED; 885 if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) { 886 queue = TS_EXCLUSIVE_QUEUE; 887 v |= (rw->rw_lock & RW_LOCK_READ_WAITERS); 888 } else 889 queue = TS_SHARED_QUEUE; 890 891 /* Wake up all waiters for the specific queue. */ 892 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 893 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 894 queue == TS_SHARED_QUEUE ? "read" : "write"); 895 turnstile_broadcast(ts, queue); 896 atomic_store_rel_ptr(&rw->rw_lock, v); 897 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 898 turnstile_chain_unlock(&rw->lock_object); 899 } 900 901 /* 902 * Attempt to do a non-blocking upgrade from a read lock to a write 903 * lock. This will only succeed if this thread holds a single read 904 * lock. Returns true if the upgrade succeeded and false otherwise. 905 */ 906 int 907 _rw_try_upgrade(struct rwlock *rw, const char *file, int line) 908 { 909 uintptr_t v, x, tid; 910 struct turnstile *ts; 911 int success; 912 913 if (SCHEDULER_STOPPED()) 914 return (1); 915 916 KASSERT(rw->rw_lock != RW_DESTROYED, 917 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 918 _rw_assert(rw, RA_RLOCKED, file, line); 919 920 /* 921 * Attempt to switch from one reader to a writer. If there 922 * are any write waiters, then we will have to lock the 923 * turnstile first to prevent races with another writer 924 * calling turnstile_wait() before we have claimed this 925 * turnstile. So, do the simple case of no waiters first. 926 */ 927 tid = (uintptr_t)curthread; 928 success = 0; 929 for (;;) { 930 v = rw->rw_lock; 931 if (RW_READERS(v) > 1) 932 break; 933 if (!(v & RW_LOCK_WAITERS)) { 934 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid); 935 if (!success) 936 continue; 937 break; 938 } 939 940 /* 941 * Ok, we think we have waiters, so lock the turnstile. 942 */ 943 ts = turnstile_trywait(&rw->lock_object); 944 v = rw->rw_lock; 945 if (RW_READERS(v) > 1) { 946 turnstile_cancel(ts); 947 break; 948 } 949 /* 950 * Try to switch from one reader to a writer again. This time 951 * we honor the current state of the waiters flags. 952 * If we obtain the lock with the flags set, then claim 953 * ownership of the turnstile. 954 */ 955 x = rw->rw_lock & RW_LOCK_WAITERS; 956 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); 957 if (success) { 958 if (x) 959 turnstile_claim(ts); 960 else 961 turnstile_cancel(ts); 962 break; 963 } 964 turnstile_cancel(ts); 965 } 966 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 967 if (success) { 968 curthread->td_rw_rlocks--; 969 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 970 file, line); 971 LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw); 972 } 973 return (success); 974 } 975 976 /* 977 * Downgrade a write lock into a single read lock. 978 */ 979 void 980 _rw_downgrade(struct rwlock *rw, const char *file, int line) 981 { 982 struct turnstile *ts; 983 uintptr_t tid, v; 984 int rwait, wwait; 985 986 if (SCHEDULER_STOPPED()) 987 return; 988 989 KASSERT(rw->rw_lock != RW_DESTROYED, 990 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 991 _rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line); 992 #ifndef INVARIANTS 993 if (rw_recursed(rw)) 994 panic("downgrade of a recursed lock"); 995 #endif 996 997 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 998 999 /* 1000 * Convert from a writer to a single reader. First we handle 1001 * the easy case with no waiters. If there are any waiters, we 1002 * lock the turnstile and "disown" the lock. 1003 */ 1004 tid = (uintptr_t)curthread; 1005 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1006 goto out; 1007 1008 /* 1009 * Ok, we think we have waiters, so lock the turnstile so we can 1010 * read the waiter flags without any races. 1011 */ 1012 turnstile_chain_lock(&rw->lock_object); 1013 v = rw->rw_lock & RW_LOCK_WAITERS; 1014 rwait = v & RW_LOCK_READ_WAITERS; 1015 wwait = v & RW_LOCK_WRITE_WAITERS; 1016 MPASS(rwait | wwait); 1017 1018 /* 1019 * Downgrade from a write lock while preserving waiters flag 1020 * and give up ownership of the turnstile. 1021 */ 1022 ts = turnstile_lookup(&rw->lock_object); 1023 MPASS(ts != NULL); 1024 if (!wwait) 1025 v &= ~RW_LOCK_READ_WAITERS; 1026 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1027 /* 1028 * Wake other readers if there are no writers pending. Otherwise they 1029 * won't be able to acquire the lock anyway. 1030 */ 1031 if (rwait && !wwait) { 1032 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1033 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 1034 } else 1035 turnstile_disown(ts); 1036 turnstile_chain_unlock(&rw->lock_object); 1037 out: 1038 curthread->td_rw_rlocks++; 1039 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1040 LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw); 1041 } 1042 1043 #ifdef INVARIANT_SUPPORT 1044 #ifndef INVARIANTS 1045 #undef _rw_assert 1046 #endif 1047 1048 /* 1049 * In the non-WITNESS case, rw_assert() can only detect that at least 1050 * *some* thread owns an rlock, but it cannot guarantee that *this* 1051 * thread owns an rlock. 1052 */ 1053 void 1054 _rw_assert(const struct rwlock *rw, int what, const char *file, int line) 1055 { 1056 1057 if (panicstr != NULL) 1058 return; 1059 switch (what) { 1060 case RA_LOCKED: 1061 case RA_LOCKED | RA_RECURSED: 1062 case RA_LOCKED | RA_NOTRECURSED: 1063 case RA_RLOCKED: 1064 #ifdef WITNESS 1065 witness_assert(&rw->lock_object, what, file, line); 1066 #else 1067 /* 1068 * If some other thread has a write lock or we have one 1069 * and are asserting a read lock, fail. Also, if no one 1070 * has a lock at all, fail. 1071 */ 1072 if (rw->rw_lock == RW_UNLOCKED || 1073 (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 1074 rw_wowner(rw) != curthread))) 1075 panic("Lock %s not %slocked @ %s:%d\n", 1076 rw->lock_object.lo_name, (what == RA_RLOCKED) ? 1077 "read " : "", file, line); 1078 1079 if (!(rw->rw_lock & RW_LOCK_READ)) { 1080 if (rw_recursed(rw)) { 1081 if (what & RA_NOTRECURSED) 1082 panic("Lock %s recursed @ %s:%d\n", 1083 rw->lock_object.lo_name, file, 1084 line); 1085 } else if (what & RA_RECURSED) 1086 panic("Lock %s not recursed @ %s:%d\n", 1087 rw->lock_object.lo_name, file, line); 1088 } 1089 #endif 1090 break; 1091 case RA_WLOCKED: 1092 case RA_WLOCKED | RA_RECURSED: 1093 case RA_WLOCKED | RA_NOTRECURSED: 1094 if (rw_wowner(rw) != curthread) 1095 panic("Lock %s not exclusively locked @ %s:%d\n", 1096 rw->lock_object.lo_name, file, line); 1097 if (rw_recursed(rw)) { 1098 if (what & RA_NOTRECURSED) 1099 panic("Lock %s recursed @ %s:%d\n", 1100 rw->lock_object.lo_name, file, line); 1101 } else if (what & RA_RECURSED) 1102 panic("Lock %s not recursed @ %s:%d\n", 1103 rw->lock_object.lo_name, file, line); 1104 break; 1105 case RA_UNLOCKED: 1106 #ifdef WITNESS 1107 witness_assert(&rw->lock_object, what, file, line); 1108 #else 1109 /* 1110 * If we hold a write lock fail. We can't reliably check 1111 * to see if we hold a read lock or not. 1112 */ 1113 if (rw_wowner(rw) == curthread) 1114 panic("Lock %s exclusively locked @ %s:%d\n", 1115 rw->lock_object.lo_name, file, line); 1116 #endif 1117 break; 1118 default: 1119 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1120 line); 1121 } 1122 } 1123 #endif /* INVARIANT_SUPPORT */ 1124 1125 #ifdef DDB 1126 void 1127 db_show_rwlock(const struct lock_object *lock) 1128 { 1129 const struct rwlock *rw; 1130 struct thread *td; 1131 1132 rw = (const struct rwlock *)lock; 1133 1134 db_printf(" state: "); 1135 if (rw->rw_lock == RW_UNLOCKED) 1136 db_printf("UNLOCKED\n"); 1137 else if (rw->rw_lock == RW_DESTROYED) { 1138 db_printf("DESTROYED\n"); 1139 return; 1140 } else if (rw->rw_lock & RW_LOCK_READ) 1141 db_printf("RLOCK: %ju locks\n", 1142 (uintmax_t)(RW_READERS(rw->rw_lock))); 1143 else { 1144 td = rw_wowner(rw); 1145 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1146 td->td_tid, td->td_proc->p_pid, td->td_name); 1147 if (rw_recursed(rw)) 1148 db_printf(" recursed: %u\n", rw->rw_recurse); 1149 } 1150 db_printf(" waiters: "); 1151 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1152 case RW_LOCK_READ_WAITERS: 1153 db_printf("readers\n"); 1154 break; 1155 case RW_LOCK_WRITE_WAITERS: 1156 db_printf("writers\n"); 1157 break; 1158 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1159 db_printf("readers and writers\n"); 1160 break; 1161 default: 1162 db_printf("none\n"); 1163 break; 1164 } 1165 } 1166 1167 #endif 1168