1 /*- 2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * Machine independent bits of reader/writer lock implementation. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ddb.h" 38 39 #include <sys/param.h> 40 #include <sys/ktr.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/rwlock.h> 45 #include <sys/systm.h> 46 #include <sys/turnstile.h> 47 48 #include <machine/cpu.h> 49 50 #ifdef DDB 51 #include <ddb/ddb.h> 52 53 static void db_show_rwlock(struct lock_object *lock); 54 #endif 55 56 struct lock_class lock_class_rw = { 57 "rw", 58 LC_SLEEPLOCK | LC_RECURSABLE /* | LC_UPGRADABLE */, 59 #ifdef DDB 60 db_show_rwlock 61 #endif 62 }; 63 64 #define rw_owner(rw) \ 65 ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 66 (struct thread *)RW_OWNER((rw)->rw_lock)) 67 68 #ifndef INVARIANTS 69 #define _rw_assert(rw, what, file, line) 70 #endif 71 72 void 73 rw_init(struct rwlock *rw, const char *name) 74 { 75 76 rw->rw_lock = RW_UNLOCKED; 77 78 lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS | 79 LO_RECURSABLE /* | LO_UPGRADABLE */); 80 } 81 82 void 83 rw_destroy(struct rwlock *rw) 84 { 85 86 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked")); 87 lock_destroy(&rw->rw_object); 88 } 89 90 void 91 rw_sysinit(void *arg) 92 { 93 struct rw_args *args = arg; 94 95 rw_init(args->ra_rw, args->ra_desc); 96 } 97 98 void 99 _rw_wlock(struct rwlock *rw, const char *file, int line) 100 { 101 102 MPASS(curthread != NULL); 103 KASSERT(rw_owner(rw) != curthread, 104 ("%s (%s): wlock already held @ %s:%d", __func__, 105 rw->rw_object.lo_name, file, line)); 106 WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 107 line); 108 __rw_wlock(rw, curthread, file, line); 109 LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line); 110 WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line); 111 } 112 113 void 114 _rw_wunlock(struct rwlock *rw, const char *file, int line) 115 { 116 117 MPASS(curthread != NULL); 118 _rw_assert(rw, RA_WLOCKED, file, line); 119 WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line); 120 LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line); 121 __rw_wunlock(rw, curthread, file, line); 122 } 123 124 void 125 _rw_rlock(struct rwlock *rw, const char *file, int line) 126 { 127 uintptr_t x; 128 129 KASSERT(rw_owner(rw) != curthread, 130 ("%s (%s): wlock already held @ %s:%d", __func__, 131 rw->rw_object.lo_name, file, line)); 132 WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line); 133 134 /* 135 * Note that we don't make any attempt to try to block read 136 * locks once a writer has blocked on the lock. The reason is 137 * that we currently allow for read locks to recurse and we 138 * don't keep track of all the holders of read locks. Thus, if 139 * we were to block readers once a writer blocked and a reader 140 * tried to recurse on their reader lock after a writer had 141 * blocked we would end up in a deadlock since the reader would 142 * be blocked on the writer, and the writer would be blocked 143 * waiting for the reader to release its original read lock. 144 */ 145 for (;;) { 146 /* 147 * Handle the easy case. If no other thread has a write 148 * lock, then try to bump up the count of read locks. Note 149 * that we have to preserve the current state of the 150 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 151 * read lock, then rw_lock must have changed, so restart 152 * the loop. Note that this handles the case of a 153 * completely unlocked rwlock since such a lock is encoded 154 * as a read lock with no waiters. 155 */ 156 x = rw->rw_lock; 157 if (x & RW_LOCK_READ) { 158 159 /* 160 * The RW_LOCK_READ_WAITERS flag should only be set 161 * if another thread currently holds a write lock, 162 * and in that case RW_LOCK_READ should be clear. 163 */ 164 MPASS((x & RW_LOCK_READ_WAITERS) == 0); 165 if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, 166 x + RW_ONE_READER)) { 167 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 168 CTR4(KTR_LOCK, 169 "%s: %p succeed %p -> %p", __func__, 170 rw, (void *)x, 171 (void *)(x + RW_ONE_READER)); 172 break; 173 } 174 continue; 175 } 176 177 /* 178 * Okay, now it's the hard case. Some other thread already 179 * has a write lock, so acquire the turnstile lock so we can 180 * begin the process of blocking. 181 */ 182 turnstile_lock(&rw->rw_object); 183 184 /* 185 * The lock might have been released while we spun, so 186 * recheck its state and restart the loop if there is no 187 * longer a write lock. 188 */ 189 x = rw->rw_lock; 190 if (x & RW_LOCK_READ) { 191 turnstile_release(&rw->rw_object); 192 continue; 193 } 194 195 /* 196 * Ok, it's still a write lock. If the RW_LOCK_READ_WAITERS 197 * flag is already set, then we can go ahead and block. If 198 * it is not set then try to set it. If we fail to set it 199 * drop the turnstile lock and restart the loop. 200 */ 201 if (!(x & RW_LOCK_READ_WAITERS) && 202 !atomic_cmpset_ptr(&rw->rw_lock, x, 203 x | RW_LOCK_READ_WAITERS)) { 204 turnstile_release(&rw->rw_object); 205 continue; 206 } 207 if (!(x & RW_LOCK_READ_WAITERS) && 208 LOCK_LOG_TEST(&rw->rw_object, 0)) 209 CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__, 210 rw); 211 212 /* 213 * We were unable to acquire the lock and the read waiters 214 * flag is set, so we must block on the turnstile. 215 */ 216 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 217 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 218 rw); 219 turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE); 220 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 221 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 222 __func__, rw); 223 } 224 225 /* 226 * TODO: acquire "owner of record" here. Here be turnstile dragons 227 * however. turnstiles don't like owners changing between calls to 228 * turnstile_wait() currently. 229 */ 230 231 LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line); 232 WITNESS_LOCK(&rw->rw_object, 0, file, line); 233 } 234 235 void 236 _rw_runlock(struct rwlock *rw, const char *file, int line) 237 { 238 struct turnstile *ts; 239 uintptr_t x; 240 241 _rw_assert(rw, RA_RLOCKED, file, line); 242 WITNESS_UNLOCK(&rw->rw_object, 0, file, line); 243 LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line); 244 245 /* TODO: drop "owner of record" here. */ 246 247 for (;;) { 248 /* 249 * See if there is more than one read lock held. If so, 250 * just drop one and return. 251 */ 252 x = rw->rw_lock; 253 if (RW_READERS(x) > 1) { 254 if (atomic_cmpset_ptr(&rw->rw_lock, x, 255 x - RW_ONE_READER)) { 256 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 257 CTR4(KTR_LOCK, 258 "%s: %p succeeded %p -> %p", 259 __func__, rw, (void *)x, 260 (void *)(x - RW_ONE_READER)); 261 break; 262 } 263 continue; 264 } 265 266 /* 267 * We should never have read waiters while at least one 268 * thread holds a read lock. (See note above) 269 */ 270 KASSERT(!(x & RW_LOCK_READ_WAITERS), 271 ("%s: waiting readers", __func__)); 272 273 /* 274 * If there aren't any waiters for a write lock, then try 275 * to drop it quickly. 276 */ 277 if (!(x & RW_LOCK_WRITE_WAITERS)) { 278 279 /* 280 * There shouldn't be any flags set and we should 281 * be the only read lock. If we fail to release 282 * the single read lock, then another thread might 283 * have just acquired a read lock, so go back up 284 * to the multiple read locks case. 285 */ 286 MPASS(x == RW_READERS_LOCK(1)); 287 if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1), 288 RW_UNLOCKED)) { 289 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 290 CTR2(KTR_LOCK, "%s: %p last succeeded", 291 __func__, rw); 292 break; 293 } 294 continue; 295 } 296 297 /* 298 * There should just be one reader with one or more 299 * writers waiting. 300 */ 301 MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS)); 302 303 /* 304 * Ok, we know we have a waiting writer and we think we 305 * are the last reader, so grab the turnstile lock. 306 */ 307 turnstile_lock(&rw->rw_object); 308 309 /* 310 * Try to drop our lock leaving the lock in a unlocked 311 * state. 312 * 313 * If you wanted to do explicit lock handoff you'd have to 314 * do it here. You'd also want to use turnstile_signal() 315 * and you'd have to handle the race where a higher 316 * priority thread blocks on the write lock before the 317 * thread you wakeup actually runs and have the new thread 318 * "steal" the lock. For now it's a lot simpler to just 319 * wakeup all of the waiters. 320 * 321 * As above, if we fail, then another thread might have 322 * acquired a read lock, so drop the turnstile lock and 323 * restart. 324 */ 325 if (!atomic_cmpset_ptr(&rw->rw_lock, 326 RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) { 327 turnstile_release(&rw->rw_object); 328 continue; 329 } 330 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 331 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 332 __func__, rw); 333 334 /* 335 * Ok. The lock is released and all that's left is to 336 * wake up the waiters. Note that the lock might not be 337 * free anymore, but in that case the writers will just 338 * block again if they run before the new lock holder(s) 339 * release the lock. 340 */ 341 ts = turnstile_lookup(&rw->rw_object); 342 turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); 343 turnstile_unpend(ts, TS_SHARED_LOCK); 344 break; 345 } 346 } 347 348 /* 349 * This function is called when we are unable to obtain a write lock on the 350 * first try. This means that at least one other thread holds either a 351 * read or write lock. 352 */ 353 void 354 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 355 { 356 uintptr_t v; 357 358 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 359 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 360 rw->rw_object.lo_name, (void *)rw->rw_lock, file, line); 361 362 while (!_rw_write_lock(rw, tid)) { 363 turnstile_lock(&rw->rw_object); 364 v = rw->rw_lock; 365 366 /* 367 * If the lock was released while spinning on the 368 * turnstile chain lock, try again. 369 */ 370 if (v == RW_UNLOCKED) { 371 turnstile_release(&rw->rw_object); 372 cpu_spinwait(); 373 continue; 374 } 375 376 /* 377 * If the lock was released by a writer with both readers 378 * and writers waiting and a reader hasn't woken up and 379 * acquired the lock yet, rw_lock will be set to the 380 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS. If we see 381 * that value, try to acquire it once. Note that we have 382 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are 383 * other writers waiting still. If we fail, restart the 384 * loop. 385 */ 386 if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) { 387 if (atomic_cmpset_acq_ptr(&rw->rw_lock, 388 RW_UNLOCKED | RW_LOCK_WRITE_WAITERS, 389 tid | RW_LOCK_WRITE_WAITERS)) { 390 turnstile_claim(&rw->rw_object); 391 CTR2(KTR_LOCK, "%s: %p claimed by new writer", 392 __func__, rw); 393 break; 394 } 395 turnstile_release(&rw->rw_object); 396 cpu_spinwait(); 397 continue; 398 } 399 400 /* 401 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 402 * set it. If we fail to set it, then loop back and try 403 * again. 404 */ 405 if (!(v & RW_LOCK_WRITE_WAITERS) && 406 !atomic_cmpset_ptr(&rw->rw_lock, v, 407 v | RW_LOCK_WRITE_WAITERS)) { 408 turnstile_release(&rw->rw_object); 409 cpu_spinwait(); 410 continue; 411 } 412 if (!(v & RW_LOCK_WRITE_WAITERS) && 413 LOCK_LOG_TEST(&rw->rw_object, 0)) 414 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 415 __func__, rw); 416 417 /* XXX: Adaptively spin if current wlock owner on another CPU? */ 418 419 /* 420 * We were unable to acquire the lock and the write waiters 421 * flag is set, so we must block on the turnstile. 422 */ 423 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 424 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 425 rw); 426 turnstile_wait(&rw->rw_object, rw_owner(rw), 427 TS_EXCLUSIVE_QUEUE); 428 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 429 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 430 __func__, rw); 431 } 432 } 433 434 /* 435 * This function is called if the first try at releasing a write lock failed. 436 * This means that one of the 2 waiter bits must be set indicating that at 437 * least one thread is waiting on this lock. 438 */ 439 void 440 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 441 { 442 struct turnstile *ts; 443 uintptr_t v; 444 int queue; 445 446 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 447 ("%s: neither of the waiter flags are set", __func__)); 448 449 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 450 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 451 452 turnstile_lock(&rw->rw_object); 453 ts = turnstile_lookup(&rw->rw_object); 454 455 /* XXX: Adaptive fixup would be required here. */ 456 MPASS(ts != NULL); 457 458 /* 459 * Use the same algo as sx locks for now. Prefer waking up shared 460 * waiters if we have any over writers. This is probably not ideal. 461 * 462 * 'v' is the value we are going to write back to rw_lock. If we 463 * have waiters on both queues, we need to preserve the state of 464 * the waiter flag for the queue we don't wake up. For now this is 465 * hardcoded for the algorithm mentioned above. 466 * 467 * In the case of both readers and writers waiting we wakeup the 468 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 469 * new writer comes in before a reader it will claim the lock up 470 * above. There is probably a potential priority inversion in 471 * there that could be worked around either by waking both queues 472 * of waiters or doing some complicated lock handoff gymnastics. 473 */ 474 if (rw->rw_lock & RW_LOCK_READ_WAITERS) { 475 queue = TS_SHARED_QUEUE; 476 v = RW_UNLOCKED | (rw->rw_lock & RW_LOCK_WRITE_WAITERS); 477 } else { 478 queue = TS_EXCLUSIVE_QUEUE; 479 v = RW_UNLOCKED; 480 } 481 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 482 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 483 queue == TS_SHARED_QUEUE ? "read" : "write"); 484 485 /* Wake up all waiters for the specific queue. */ 486 turnstile_broadcast(ts, queue); 487 atomic_store_rel_ptr(&rw->rw_lock, v); 488 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 489 } 490 491 #ifdef INVARIANT_SUPPORT 492 #ifndef INVARIANTS 493 #undef _rw_assert 494 #endif 495 496 /* 497 * In the non-WITNESS case, rw_assert() can only detect that at least 498 * *some* thread owns an rlock, but it cannot guarantee that *this* 499 * thread owns an rlock. 500 */ 501 void 502 _rw_assert(struct rwlock *rw, int what, const char *file, int line) 503 { 504 505 if (panicstr != NULL) 506 return; 507 switch (what) { 508 case RA_LOCKED: 509 case RA_RLOCKED: 510 #ifdef WITNESS 511 witness_assert(&rw->rw_object, what, file, line); 512 #else 513 /* 514 * If some other thread has a write lock or we have one 515 * and are asserting a read lock, fail. Also, if no one 516 * has a lock at all, fail. 517 */ 518 if (rw->rw_lock == RW_UNLOCKED || 519 (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 520 rw_owner(rw) != curthread))) 521 panic("Lock %s not %slocked @ %s:%d\n", 522 rw->rw_object.lo_name, (what == RA_RLOCKED) ? 523 "read " : "", file, line); 524 #endif 525 break; 526 case RA_WLOCKED: 527 if (rw_owner(rw) != curthread) 528 panic("Lock %s not exclusively locked @ %s:%d\n", 529 rw->rw_object.lo_name, file, line); 530 break; 531 case RA_UNLOCKED: 532 #ifdef WITNESS 533 witness_assert(&rw->rw_object, what, file, line); 534 #else 535 /* 536 * If we hold a write lock fail. We can't reliably check 537 * to see if we hold a read lock or not. 538 */ 539 if (rw_owner(rw) == curthread) 540 panic("Lock %s exclusively locked @ %s:%d\n", 541 rw->rw_object.lo_name, file, line); 542 #endif 543 break; 544 default: 545 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 546 line); 547 } 548 } 549 #endif /* INVARIANT_SUPPORT */ 550 551 #ifdef DDB 552 void 553 db_show_rwlock(struct lock_object *lock) 554 { 555 struct rwlock *rw; 556 struct thread *td; 557 558 rw = (struct rwlock *)lock; 559 560 db_printf(" state: "); 561 if (rw->rw_lock == RW_UNLOCKED) 562 db_printf("UNLOCKED\n"); 563 else if (rw->rw_lock & RW_LOCK_READ) 564 db_printf("RLOCK: %jd locks\n", 565 (intmax_t)(RW_READERS(rw->rw_lock))); 566 else { 567 td = rw_owner(rw); 568 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 569 td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); 570 } 571 db_printf(" waiters: "); 572 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 573 case RW_LOCK_READ_WAITERS: 574 db_printf("readers\n"); 575 break; 576 case RW_LOCK_WRITE_WAITERS: 577 db_printf("writers\n"); 578 break; 579 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 580 db_printf("readers and waiters\n"); 581 break; 582 default: 583 db_printf("none\n"); 584 break; 585 } 586 } 587 588 #endif 589