1 /*- 2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * Machine independent bits of reader/writer lock implementation. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ddb.h" 38 #include "opt_hwpmc_hooks.h" 39 #include "opt_kdtrace.h" 40 #include "opt_no_adaptive_rwlocks.h" 41 42 #include <sys/param.h> 43 #include <sys/ktr.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/rwlock.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 #include <sys/turnstile.h> 52 53 #include <machine/cpu.h> 54 55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 56 #define ADAPTIVE_RWLOCKS 57 #endif 58 59 #ifdef HWPMC_HOOKS 60 #include <sys/pmckern.h> 61 PMC_SOFT_DECLARE( , , lock, failed); 62 #endif 63 64 #ifdef ADAPTIVE_RWLOCKS 65 static int rowner_retries = 10; 66 static int rowner_loops = 10000; 67 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, 68 "rwlock debugging"); 69 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 70 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 71 #endif 72 73 #ifdef DDB 74 #include <ddb/ddb.h> 75 76 static void db_show_rwlock(const struct lock_object *lock); 77 #endif 78 static void assert_rw(const struct lock_object *lock, int what); 79 static void lock_rw(struct lock_object *lock, int how); 80 #ifdef KDTRACE_HOOKS 81 static int owner_rw(const struct lock_object *lock, struct thread **owner); 82 #endif 83 static int unlock_rw(struct lock_object *lock); 84 85 struct lock_class lock_class_rw = { 86 .lc_name = "rw", 87 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 88 .lc_assert = assert_rw, 89 #ifdef DDB 90 .lc_ddb_show = db_show_rwlock, 91 #endif 92 .lc_lock = lock_rw, 93 .lc_unlock = unlock_rw, 94 #ifdef KDTRACE_HOOKS 95 .lc_owner = owner_rw, 96 #endif 97 }; 98 99 /* 100 * Return a pointer to the owning thread if the lock is write-locked or 101 * NULL if the lock is unlocked or read-locked. 102 */ 103 #define rw_wowner(rw) \ 104 ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 105 (struct thread *)RW_OWNER((rw)->rw_lock)) 106 107 /* 108 * Returns if a write owner is recursed. Write ownership is not assured 109 * here and should be previously checked. 110 */ 111 #define rw_recursed(rw) ((rw)->rw_recurse != 0) 112 113 /* 114 * Return true if curthread helds the lock. 115 */ 116 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 117 118 /* 119 * Return a pointer to the owning thread for this lock who should receive 120 * any priority lent by threads that block on this lock. Currently this 121 * is identical to rw_wowner(). 122 */ 123 #define rw_owner(rw) rw_wowner(rw) 124 125 #ifndef INVARIANTS 126 #define _rw_assert(rw, what, file, line) 127 #endif 128 129 void 130 assert_rw(const struct lock_object *lock, int what) 131 { 132 133 rw_assert((const struct rwlock *)lock, what); 134 } 135 136 void 137 lock_rw(struct lock_object *lock, int how) 138 { 139 struct rwlock *rw; 140 141 rw = (struct rwlock *)lock; 142 if (how) 143 rw_wlock(rw); 144 else 145 rw_rlock(rw); 146 } 147 148 int 149 unlock_rw(struct lock_object *lock) 150 { 151 struct rwlock *rw; 152 153 rw = (struct rwlock *)lock; 154 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 155 if (rw->rw_lock & RW_LOCK_READ) { 156 rw_runlock(rw); 157 return (0); 158 } else { 159 rw_wunlock(rw); 160 return (1); 161 } 162 } 163 164 #ifdef KDTRACE_HOOKS 165 int 166 owner_rw(const struct lock_object *lock, struct thread **owner) 167 { 168 const struct rwlock *rw = (const struct rwlock *)lock; 169 uintptr_t x = rw->rw_lock; 170 171 *owner = rw_wowner(rw); 172 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 173 (*owner != NULL)); 174 } 175 #endif 176 177 void 178 rw_init_flags(struct rwlock *rw, const char *name, int opts) 179 { 180 int flags; 181 182 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 183 RW_RECURSE)) == 0); 184 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, 185 ("%s: rw_lock not aligned for %s: %p", __func__, name, 186 &rw->rw_lock)); 187 188 flags = LO_UPGRADABLE; 189 if (opts & RW_DUPOK) 190 flags |= LO_DUPOK; 191 if (opts & RW_NOPROFILE) 192 flags |= LO_NOPROFILE; 193 if (!(opts & RW_NOWITNESS)) 194 flags |= LO_WITNESS; 195 if (opts & RW_RECURSE) 196 flags |= LO_RECURSABLE; 197 if (opts & RW_QUIET) 198 flags |= LO_QUIET; 199 200 rw->rw_lock = RW_UNLOCKED; 201 rw->rw_recurse = 0; 202 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 203 } 204 205 void 206 rw_destroy(struct rwlock *rw) 207 { 208 209 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); 210 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); 211 rw->rw_lock = RW_DESTROYED; 212 lock_destroy(&rw->lock_object); 213 } 214 215 void 216 rw_sysinit(void *arg) 217 { 218 struct rw_args *args = arg; 219 220 rw_init(args->ra_rw, args->ra_desc); 221 } 222 223 void 224 rw_sysinit_flags(void *arg) 225 { 226 struct rw_args_flags *args = arg; 227 228 rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags); 229 } 230 231 int 232 rw_wowned(const struct rwlock *rw) 233 { 234 235 return (rw_wowner(rw) == curthread); 236 } 237 238 void 239 _rw_wlock(struct rwlock *rw, const char *file, int line) 240 { 241 242 if (SCHEDULER_STOPPED()) 243 return; 244 KASSERT(!TD_IS_IDLETHREAD(curthread), 245 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", 246 curthread, rw->lock_object.lo_name, file, line)); 247 KASSERT(rw->rw_lock != RW_DESTROYED, 248 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 249 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 250 line, NULL); 251 __rw_wlock(rw, curthread, file, line); 252 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 253 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 254 curthread->td_locks++; 255 } 256 257 int 258 _rw_try_wlock(struct rwlock *rw, const char *file, int line) 259 { 260 int rval; 261 262 if (SCHEDULER_STOPPED()) 263 return (1); 264 265 KASSERT(!TD_IS_IDLETHREAD(curthread), 266 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", 267 curthread, rw->lock_object.lo_name, file, line)); 268 KASSERT(rw->rw_lock != RW_DESTROYED, 269 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 270 271 if (rw_wlocked(rw) && 272 (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) { 273 rw->rw_recurse++; 274 rval = 1; 275 } else 276 rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED, 277 (uintptr_t)curthread); 278 279 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 280 if (rval) { 281 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 282 file, line); 283 curthread->td_locks++; 284 } 285 return (rval); 286 } 287 288 void 289 _rw_wunlock(struct rwlock *rw, const char *file, int line) 290 { 291 292 if (SCHEDULER_STOPPED()) 293 return; 294 KASSERT(rw->rw_lock != RW_DESTROYED, 295 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 296 _rw_assert(rw, RA_WLOCKED, file, line); 297 curthread->td_locks--; 298 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 299 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 300 line); 301 if (!rw_recursed(rw)) 302 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw); 303 __rw_wunlock(rw, curthread, file, line); 304 } 305 /* 306 * Determines whether a new reader can acquire a lock. Succeeds if the 307 * reader already owns a read lock and the lock is locked for read to 308 * prevent deadlock from reader recursion. Also succeeds if the lock 309 * is unlocked and has no writer waiters or spinners. Failing otherwise 310 * prioritizes writers before readers. 311 */ 312 #define RW_CAN_READ(_rw) \ 313 ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \ 314 (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \ 315 RW_LOCK_READ) 316 317 void 318 _rw_rlock(struct rwlock *rw, const char *file, int line) 319 { 320 struct turnstile *ts; 321 #ifdef ADAPTIVE_RWLOCKS 322 volatile struct thread *owner; 323 int spintries = 0; 324 int i; 325 #endif 326 #ifdef LOCK_PROFILING 327 uint64_t waittime = 0; 328 int contested = 0; 329 #endif 330 uintptr_t v; 331 #ifdef KDTRACE_HOOKS 332 uint64_t spin_cnt = 0; 333 uint64_t sleep_cnt = 0; 334 int64_t sleep_time = 0; 335 #endif 336 337 if (SCHEDULER_STOPPED()) 338 return; 339 340 KASSERT(!TD_IS_IDLETHREAD(curthread), 341 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", 342 curthread, rw->lock_object.lo_name, file, line)); 343 KASSERT(rw->rw_lock != RW_DESTROYED, 344 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 345 KASSERT(rw_wowner(rw) != curthread, 346 ("%s (%s): wlock already held @ %s:%d", __func__, 347 rw->lock_object.lo_name, file, line)); 348 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 349 350 for (;;) { 351 #ifdef KDTRACE_HOOKS 352 spin_cnt++; 353 #endif 354 /* 355 * Handle the easy case. If no other thread has a write 356 * lock, then try to bump up the count of read locks. Note 357 * that we have to preserve the current state of the 358 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 359 * read lock, then rw_lock must have changed, so restart 360 * the loop. Note that this handles the case of a 361 * completely unlocked rwlock since such a lock is encoded 362 * as a read lock with no waiters. 363 */ 364 v = rw->rw_lock; 365 if (RW_CAN_READ(v)) { 366 /* 367 * The RW_LOCK_READ_WAITERS flag should only be set 368 * if the lock has been unlocked and write waiters 369 * were present. 370 */ 371 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, 372 v + RW_ONE_READER)) { 373 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 374 CTR4(KTR_LOCK, 375 "%s: %p succeed %p -> %p", __func__, 376 rw, (void *)v, 377 (void *)(v + RW_ONE_READER)); 378 break; 379 } 380 continue; 381 } 382 #ifdef HWPMC_HOOKS 383 PMC_SOFT_CALL( , , lock, failed); 384 #endif 385 lock_profile_obtain_lock_failed(&rw->lock_object, 386 &contested, &waittime); 387 388 #ifdef ADAPTIVE_RWLOCKS 389 /* 390 * If the owner is running on another CPU, spin until 391 * the owner stops running or the state of the lock 392 * changes. 393 */ 394 if ((v & RW_LOCK_READ) == 0) { 395 owner = (struct thread *)RW_OWNER(v); 396 if (TD_IS_RUNNING(owner)) { 397 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 398 CTR3(KTR_LOCK, 399 "%s: spinning on %p held by %p", 400 __func__, rw, owner); 401 while ((struct thread*)RW_OWNER(rw->rw_lock) == 402 owner && TD_IS_RUNNING(owner)) { 403 cpu_spinwait(); 404 #ifdef KDTRACE_HOOKS 405 spin_cnt++; 406 #endif 407 } 408 continue; 409 } 410 } else if (spintries < rowner_retries) { 411 spintries++; 412 for (i = 0; i < rowner_loops; i++) { 413 v = rw->rw_lock; 414 if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v)) 415 break; 416 cpu_spinwait(); 417 } 418 if (i != rowner_loops) 419 continue; 420 } 421 #endif 422 423 /* 424 * Okay, now it's the hard case. Some other thread already 425 * has a write lock or there are write waiters present, 426 * acquire the turnstile lock so we can begin the process 427 * of blocking. 428 */ 429 ts = turnstile_trywait(&rw->lock_object); 430 431 /* 432 * The lock might have been released while we spun, so 433 * recheck its state and restart the loop if needed. 434 */ 435 v = rw->rw_lock; 436 if (RW_CAN_READ(v)) { 437 turnstile_cancel(ts); 438 continue; 439 } 440 441 #ifdef ADAPTIVE_RWLOCKS 442 /* 443 * The current lock owner might have started executing 444 * on another CPU (or the lock could have changed 445 * owners) while we were waiting on the turnstile 446 * chain lock. If so, drop the turnstile lock and try 447 * again. 448 */ 449 if ((v & RW_LOCK_READ) == 0) { 450 owner = (struct thread *)RW_OWNER(v); 451 if (TD_IS_RUNNING(owner)) { 452 turnstile_cancel(ts); 453 continue; 454 } 455 } 456 #endif 457 458 /* 459 * The lock is held in write mode or it already has waiters. 460 */ 461 MPASS(!RW_CAN_READ(v)); 462 463 /* 464 * If the RW_LOCK_READ_WAITERS flag is already set, then 465 * we can go ahead and block. If it is not set then try 466 * to set it. If we fail to set it drop the turnstile 467 * lock and restart the loop. 468 */ 469 if (!(v & RW_LOCK_READ_WAITERS)) { 470 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 471 v | RW_LOCK_READ_WAITERS)) { 472 turnstile_cancel(ts); 473 continue; 474 } 475 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 476 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 477 __func__, rw); 478 } 479 480 /* 481 * We were unable to acquire the lock and the read waiters 482 * flag is set, so we must block on the turnstile. 483 */ 484 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 485 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 486 rw); 487 #ifdef KDTRACE_HOOKS 488 sleep_time -= lockstat_nsecs(); 489 #endif 490 turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); 491 #ifdef KDTRACE_HOOKS 492 sleep_time += lockstat_nsecs(); 493 sleep_cnt++; 494 #endif 495 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 496 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 497 __func__, rw); 498 } 499 500 /* 501 * TODO: acquire "owner of record" here. Here be turnstile dragons 502 * however. turnstiles don't like owners changing between calls to 503 * turnstile_wait() currently. 504 */ 505 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested, 506 waittime, file, line); 507 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 508 WITNESS_LOCK(&rw->lock_object, 0, file, line); 509 curthread->td_locks++; 510 curthread->td_rw_rlocks++; 511 #ifdef KDTRACE_HOOKS 512 if (sleep_time) 513 LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time); 514 515 /* 516 * Record only the loops spinning and not sleeping. 517 */ 518 if (spin_cnt > sleep_cnt) 519 LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); 520 #endif 521 } 522 523 int 524 _rw_try_rlock(struct rwlock *rw, const char *file, int line) 525 { 526 uintptr_t x; 527 528 if (SCHEDULER_STOPPED()) 529 return (1); 530 531 KASSERT(!TD_IS_IDLETHREAD(curthread), 532 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", 533 curthread, rw->lock_object.lo_name, file, line)); 534 535 for (;;) { 536 x = rw->rw_lock; 537 KASSERT(rw->rw_lock != RW_DESTROYED, 538 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 539 if (!(x & RW_LOCK_READ)) 540 break; 541 if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { 542 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 543 line); 544 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 545 curthread->td_locks++; 546 curthread->td_rw_rlocks++; 547 return (1); 548 } 549 } 550 551 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 552 return (0); 553 } 554 555 void 556 _rw_runlock(struct rwlock *rw, const char *file, int line) 557 { 558 struct turnstile *ts; 559 uintptr_t x, v, queue; 560 561 if (SCHEDULER_STOPPED()) 562 return; 563 564 KASSERT(rw->rw_lock != RW_DESTROYED, 565 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 566 _rw_assert(rw, RA_RLOCKED, file, line); 567 curthread->td_locks--; 568 curthread->td_rw_rlocks--; 569 WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 570 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 571 572 /* TODO: drop "owner of record" here. */ 573 574 for (;;) { 575 /* 576 * See if there is more than one read lock held. If so, 577 * just drop one and return. 578 */ 579 x = rw->rw_lock; 580 if (RW_READERS(x) > 1) { 581 if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, 582 x - RW_ONE_READER)) { 583 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 584 CTR4(KTR_LOCK, 585 "%s: %p succeeded %p -> %p", 586 __func__, rw, (void *)x, 587 (void *)(x - RW_ONE_READER)); 588 break; 589 } 590 continue; 591 } 592 /* 593 * If there aren't any waiters for a write lock, then try 594 * to drop it quickly. 595 */ 596 if (!(x & RW_LOCK_WAITERS)) { 597 MPASS((x & ~RW_LOCK_WRITE_SPINNER) == 598 RW_READERS_LOCK(1)); 599 if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, 600 RW_UNLOCKED)) { 601 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 602 CTR2(KTR_LOCK, "%s: %p last succeeded", 603 __func__, rw); 604 break; 605 } 606 continue; 607 } 608 /* 609 * Ok, we know we have waiters and we think we are the 610 * last reader, so grab the turnstile lock. 611 */ 612 turnstile_chain_lock(&rw->lock_object); 613 v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 614 MPASS(v & RW_LOCK_WAITERS); 615 616 /* 617 * Try to drop our lock leaving the lock in a unlocked 618 * state. 619 * 620 * If you wanted to do explicit lock handoff you'd have to 621 * do it here. You'd also want to use turnstile_signal() 622 * and you'd have to handle the race where a higher 623 * priority thread blocks on the write lock before the 624 * thread you wakeup actually runs and have the new thread 625 * "steal" the lock. For now it's a lot simpler to just 626 * wakeup all of the waiters. 627 * 628 * As above, if we fail, then another thread might have 629 * acquired a read lock, so drop the turnstile lock and 630 * restart. 631 */ 632 x = RW_UNLOCKED; 633 if (v & RW_LOCK_WRITE_WAITERS) { 634 queue = TS_EXCLUSIVE_QUEUE; 635 x |= (v & RW_LOCK_READ_WAITERS); 636 } else 637 queue = TS_SHARED_QUEUE; 638 if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 639 x)) { 640 turnstile_chain_unlock(&rw->lock_object); 641 continue; 642 } 643 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 644 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 645 __func__, rw); 646 647 /* 648 * Ok. The lock is released and all that's left is to 649 * wake up the waiters. Note that the lock might not be 650 * free anymore, but in that case the writers will just 651 * block again if they run before the new lock holder(s) 652 * release the lock. 653 */ 654 ts = turnstile_lookup(&rw->lock_object); 655 MPASS(ts != NULL); 656 turnstile_broadcast(ts, queue); 657 turnstile_unpend(ts, TS_SHARED_LOCK); 658 turnstile_chain_unlock(&rw->lock_object); 659 break; 660 } 661 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw); 662 } 663 664 /* 665 * This function is called when we are unable to obtain a write lock on the 666 * first try. This means that at least one other thread holds either a 667 * read or write lock. 668 */ 669 void 670 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 671 { 672 struct turnstile *ts; 673 #ifdef ADAPTIVE_RWLOCKS 674 volatile struct thread *owner; 675 int spintries = 0; 676 int i; 677 #endif 678 uintptr_t v, x; 679 #ifdef LOCK_PROFILING 680 uint64_t waittime = 0; 681 int contested = 0; 682 #endif 683 #ifdef KDTRACE_HOOKS 684 uint64_t spin_cnt = 0; 685 uint64_t sleep_cnt = 0; 686 int64_t sleep_time = 0; 687 #endif 688 689 if (SCHEDULER_STOPPED()) 690 return; 691 692 if (rw_wlocked(rw)) { 693 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, 694 ("%s: recursing but non-recursive rw %s @ %s:%d\n", 695 __func__, rw->lock_object.lo_name, file, line)); 696 rw->rw_recurse++; 697 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 698 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 699 return; 700 } 701 702 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 703 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 704 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 705 706 while (!_rw_write_lock(rw, tid)) { 707 #ifdef KDTRACE_HOOKS 708 spin_cnt++; 709 #endif 710 #ifdef HWPMC_HOOKS 711 PMC_SOFT_CALL( , , lock, failed); 712 #endif 713 lock_profile_obtain_lock_failed(&rw->lock_object, 714 &contested, &waittime); 715 #ifdef ADAPTIVE_RWLOCKS 716 /* 717 * If the lock is write locked and the owner is 718 * running on another CPU, spin until the owner stops 719 * running or the state of the lock changes. 720 */ 721 v = rw->rw_lock; 722 owner = (struct thread *)RW_OWNER(v); 723 if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 724 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 725 CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 726 __func__, rw, owner); 727 while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && 728 TD_IS_RUNNING(owner)) { 729 cpu_spinwait(); 730 #ifdef KDTRACE_HOOKS 731 spin_cnt++; 732 #endif 733 } 734 continue; 735 } 736 if ((v & RW_LOCK_READ) && RW_READERS(v) && 737 spintries < rowner_retries) { 738 if (!(v & RW_LOCK_WRITE_SPINNER)) { 739 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 740 v | RW_LOCK_WRITE_SPINNER)) { 741 continue; 742 } 743 } 744 spintries++; 745 for (i = 0; i < rowner_loops; i++) { 746 if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) 747 break; 748 cpu_spinwait(); 749 } 750 #ifdef KDTRACE_HOOKS 751 spin_cnt += rowner_loops - i; 752 #endif 753 if (i != rowner_loops) 754 continue; 755 } 756 #endif 757 ts = turnstile_trywait(&rw->lock_object); 758 v = rw->rw_lock; 759 760 #ifdef ADAPTIVE_RWLOCKS 761 /* 762 * The current lock owner might have started executing 763 * on another CPU (or the lock could have changed 764 * owners) while we were waiting on the turnstile 765 * chain lock. If so, drop the turnstile lock and try 766 * again. 767 */ 768 if (!(v & RW_LOCK_READ)) { 769 owner = (struct thread *)RW_OWNER(v); 770 if (TD_IS_RUNNING(owner)) { 771 turnstile_cancel(ts); 772 continue; 773 } 774 } 775 #endif 776 /* 777 * Check for the waiters flags about this rwlock. 778 * If the lock was released, without maintain any pending 779 * waiters queue, simply try to acquire it. 780 * If a pending waiters queue is present, claim the lock 781 * ownership and maintain the pending queue. 782 */ 783 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 784 if ((v & ~x) == RW_UNLOCKED) { 785 x &= ~RW_LOCK_WRITE_SPINNER; 786 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { 787 if (x) 788 turnstile_claim(ts); 789 else 790 turnstile_cancel(ts); 791 break; 792 } 793 turnstile_cancel(ts); 794 continue; 795 } 796 /* 797 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 798 * set it. If we fail to set it, then loop back and try 799 * again. 800 */ 801 if (!(v & RW_LOCK_WRITE_WAITERS)) { 802 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 803 v | RW_LOCK_WRITE_WAITERS)) { 804 turnstile_cancel(ts); 805 continue; 806 } 807 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 808 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 809 __func__, rw); 810 } 811 /* 812 * We were unable to acquire the lock and the write waiters 813 * flag is set, so we must block on the turnstile. 814 */ 815 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 816 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 817 rw); 818 #ifdef KDTRACE_HOOKS 819 sleep_time -= lockstat_nsecs(); 820 #endif 821 turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); 822 #ifdef KDTRACE_HOOKS 823 sleep_time += lockstat_nsecs(); 824 sleep_cnt++; 825 #endif 826 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 827 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 828 __func__, rw); 829 #ifdef ADAPTIVE_RWLOCKS 830 spintries = 0; 831 #endif 832 } 833 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, 834 waittime, file, line); 835 #ifdef KDTRACE_HOOKS 836 if (sleep_time) 837 LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time); 838 839 /* 840 * Record only the loops spinning and not sleeping. 841 */ 842 if (spin_cnt > sleep_cnt) 843 LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); 844 #endif 845 } 846 847 /* 848 * This function is called if the first try at releasing a write lock failed. 849 * This means that one of the 2 waiter bits must be set indicating that at 850 * least one thread is waiting on this lock. 851 */ 852 void 853 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 854 { 855 struct turnstile *ts; 856 uintptr_t v; 857 int queue; 858 859 if (SCHEDULER_STOPPED()) 860 return; 861 862 if (rw_wlocked(rw) && rw_recursed(rw)) { 863 rw->rw_recurse--; 864 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 865 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 866 return; 867 } 868 869 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 870 ("%s: neither of the waiter flags are set", __func__)); 871 872 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 873 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 874 875 turnstile_chain_lock(&rw->lock_object); 876 ts = turnstile_lookup(&rw->lock_object); 877 MPASS(ts != NULL); 878 879 /* 880 * Use the same algo as sx locks for now. Prefer waking up shared 881 * waiters if we have any over writers. This is probably not ideal. 882 * 883 * 'v' is the value we are going to write back to rw_lock. If we 884 * have waiters on both queues, we need to preserve the state of 885 * the waiter flag for the queue we don't wake up. For now this is 886 * hardcoded for the algorithm mentioned above. 887 * 888 * In the case of both readers and writers waiting we wakeup the 889 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 890 * new writer comes in before a reader it will claim the lock up 891 * above. There is probably a potential priority inversion in 892 * there that could be worked around either by waking both queues 893 * of waiters or doing some complicated lock handoff gymnastics. 894 */ 895 v = RW_UNLOCKED; 896 if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) { 897 queue = TS_EXCLUSIVE_QUEUE; 898 v |= (rw->rw_lock & RW_LOCK_READ_WAITERS); 899 } else 900 queue = TS_SHARED_QUEUE; 901 902 /* Wake up all waiters for the specific queue. */ 903 if (LOCK_LOG_TEST(&rw->lock_object, 0)) 904 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 905 queue == TS_SHARED_QUEUE ? "read" : "write"); 906 turnstile_broadcast(ts, queue); 907 atomic_store_rel_ptr(&rw->rw_lock, v); 908 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 909 turnstile_chain_unlock(&rw->lock_object); 910 } 911 912 /* 913 * Attempt to do a non-blocking upgrade from a read lock to a write 914 * lock. This will only succeed if this thread holds a single read 915 * lock. Returns true if the upgrade succeeded and false otherwise. 916 */ 917 int 918 _rw_try_upgrade(struct rwlock *rw, const char *file, int line) 919 { 920 uintptr_t v, x, tid; 921 struct turnstile *ts; 922 int success; 923 924 if (SCHEDULER_STOPPED()) 925 return (1); 926 927 KASSERT(rw->rw_lock != RW_DESTROYED, 928 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 929 _rw_assert(rw, RA_RLOCKED, file, line); 930 931 /* 932 * Attempt to switch from one reader to a writer. If there 933 * are any write waiters, then we will have to lock the 934 * turnstile first to prevent races with another writer 935 * calling turnstile_wait() before we have claimed this 936 * turnstile. So, do the simple case of no waiters first. 937 */ 938 tid = (uintptr_t)curthread; 939 success = 0; 940 for (;;) { 941 v = rw->rw_lock; 942 if (RW_READERS(v) > 1) 943 break; 944 if (!(v & RW_LOCK_WAITERS)) { 945 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid); 946 if (!success) 947 continue; 948 break; 949 } 950 951 /* 952 * Ok, we think we have waiters, so lock the turnstile. 953 */ 954 ts = turnstile_trywait(&rw->lock_object); 955 v = rw->rw_lock; 956 if (RW_READERS(v) > 1) { 957 turnstile_cancel(ts); 958 break; 959 } 960 /* 961 * Try to switch from one reader to a writer again. This time 962 * we honor the current state of the waiters flags. 963 * If we obtain the lock with the flags set, then claim 964 * ownership of the turnstile. 965 */ 966 x = rw->rw_lock & RW_LOCK_WAITERS; 967 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); 968 if (success) { 969 if (x) 970 turnstile_claim(ts); 971 else 972 turnstile_cancel(ts); 973 break; 974 } 975 turnstile_cancel(ts); 976 } 977 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 978 if (success) { 979 curthread->td_rw_rlocks--; 980 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 981 file, line); 982 LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw); 983 } 984 return (success); 985 } 986 987 /* 988 * Downgrade a write lock into a single read lock. 989 */ 990 void 991 _rw_downgrade(struct rwlock *rw, const char *file, int line) 992 { 993 struct turnstile *ts; 994 uintptr_t tid, v; 995 int rwait, wwait; 996 997 if (SCHEDULER_STOPPED()) 998 return; 999 1000 KASSERT(rw->rw_lock != RW_DESTROYED, 1001 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 1002 _rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line); 1003 #ifndef INVARIANTS 1004 if (rw_recursed(rw)) 1005 panic("downgrade of a recursed lock"); 1006 #endif 1007 1008 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 1009 1010 /* 1011 * Convert from a writer to a single reader. First we handle 1012 * the easy case with no waiters. If there are any waiters, we 1013 * lock the turnstile and "disown" the lock. 1014 */ 1015 tid = (uintptr_t)curthread; 1016 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 1017 goto out; 1018 1019 /* 1020 * Ok, we think we have waiters, so lock the turnstile so we can 1021 * read the waiter flags without any races. 1022 */ 1023 turnstile_chain_lock(&rw->lock_object); 1024 v = rw->rw_lock & RW_LOCK_WAITERS; 1025 rwait = v & RW_LOCK_READ_WAITERS; 1026 wwait = v & RW_LOCK_WRITE_WAITERS; 1027 MPASS(rwait | wwait); 1028 1029 /* 1030 * Downgrade from a write lock while preserving waiters flag 1031 * and give up ownership of the turnstile. 1032 */ 1033 ts = turnstile_lookup(&rw->lock_object); 1034 MPASS(ts != NULL); 1035 if (!wwait) 1036 v &= ~RW_LOCK_READ_WAITERS; 1037 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 1038 /* 1039 * Wake other readers if there are no writers pending. Otherwise they 1040 * won't be able to acquire the lock anyway. 1041 */ 1042 if (rwait && !wwait) { 1043 turnstile_broadcast(ts, TS_SHARED_QUEUE); 1044 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 1045 } else 1046 turnstile_disown(ts); 1047 turnstile_chain_unlock(&rw->lock_object); 1048 out: 1049 curthread->td_rw_rlocks++; 1050 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 1051 LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw); 1052 } 1053 1054 #ifdef INVARIANT_SUPPORT 1055 #ifndef INVARIANTS 1056 #undef _rw_assert 1057 #endif 1058 1059 /* 1060 * In the non-WITNESS case, rw_assert() can only detect that at least 1061 * *some* thread owns an rlock, but it cannot guarantee that *this* 1062 * thread owns an rlock. 1063 */ 1064 void 1065 _rw_assert(const struct rwlock *rw, int what, const char *file, int line) 1066 { 1067 1068 if (panicstr != NULL) 1069 return; 1070 switch (what) { 1071 case RA_LOCKED: 1072 case RA_LOCKED | RA_RECURSED: 1073 case RA_LOCKED | RA_NOTRECURSED: 1074 case RA_RLOCKED: 1075 #ifdef WITNESS 1076 witness_assert(&rw->lock_object, what, file, line); 1077 #else 1078 /* 1079 * If some other thread has a write lock or we have one 1080 * and are asserting a read lock, fail. Also, if no one 1081 * has a lock at all, fail. 1082 */ 1083 if (rw->rw_lock == RW_UNLOCKED || 1084 (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 1085 rw_wowner(rw) != curthread))) 1086 panic("Lock %s not %slocked @ %s:%d\n", 1087 rw->lock_object.lo_name, (what == RA_RLOCKED) ? 1088 "read " : "", file, line); 1089 1090 if (!(rw->rw_lock & RW_LOCK_READ)) { 1091 if (rw_recursed(rw)) { 1092 if (what & RA_NOTRECURSED) 1093 panic("Lock %s recursed @ %s:%d\n", 1094 rw->lock_object.lo_name, file, 1095 line); 1096 } else if (what & RA_RECURSED) 1097 panic("Lock %s not recursed @ %s:%d\n", 1098 rw->lock_object.lo_name, file, line); 1099 } 1100 #endif 1101 break; 1102 case RA_WLOCKED: 1103 case RA_WLOCKED | RA_RECURSED: 1104 case RA_WLOCKED | RA_NOTRECURSED: 1105 if (rw_wowner(rw) != curthread) 1106 panic("Lock %s not exclusively locked @ %s:%d\n", 1107 rw->lock_object.lo_name, file, line); 1108 if (rw_recursed(rw)) { 1109 if (what & RA_NOTRECURSED) 1110 panic("Lock %s recursed @ %s:%d\n", 1111 rw->lock_object.lo_name, file, line); 1112 } else if (what & RA_RECURSED) 1113 panic("Lock %s not recursed @ %s:%d\n", 1114 rw->lock_object.lo_name, file, line); 1115 break; 1116 case RA_UNLOCKED: 1117 #ifdef WITNESS 1118 witness_assert(&rw->lock_object, what, file, line); 1119 #else 1120 /* 1121 * If we hold a write lock fail. We can't reliably check 1122 * to see if we hold a read lock or not. 1123 */ 1124 if (rw_wowner(rw) == curthread) 1125 panic("Lock %s exclusively locked @ %s:%d\n", 1126 rw->lock_object.lo_name, file, line); 1127 #endif 1128 break; 1129 default: 1130 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1131 line); 1132 } 1133 } 1134 #endif /* INVARIANT_SUPPORT */ 1135 1136 #ifdef DDB 1137 void 1138 db_show_rwlock(const struct lock_object *lock) 1139 { 1140 const struct rwlock *rw; 1141 struct thread *td; 1142 1143 rw = (const struct rwlock *)lock; 1144 1145 db_printf(" state: "); 1146 if (rw->rw_lock == RW_UNLOCKED) 1147 db_printf("UNLOCKED\n"); 1148 else if (rw->rw_lock == RW_DESTROYED) { 1149 db_printf("DESTROYED\n"); 1150 return; 1151 } else if (rw->rw_lock & RW_LOCK_READ) 1152 db_printf("RLOCK: %ju locks\n", 1153 (uintmax_t)(RW_READERS(rw->rw_lock))); 1154 else { 1155 td = rw_wowner(rw); 1156 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1157 td->td_tid, td->td_proc->p_pid, td->td_name); 1158 if (rw_recursed(rw)) 1159 db_printf(" recursed: %u\n", rw->rw_recurse); 1160 } 1161 db_printf(" waiters: "); 1162 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1163 case RW_LOCK_READ_WAITERS: 1164 db_printf("readers\n"); 1165 break; 1166 case RW_LOCK_WRITE_WAITERS: 1167 db_printf("writers\n"); 1168 break; 1169 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1170 db_printf("readers and writers\n"); 1171 break; 1172 default: 1173 db_printf("none\n"); 1174 break; 1175 } 1176 } 1177 1178 #endif 1179