1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2016 by Delphix. All rights reserved. 25 * Copyright 2024 Oxide Computer Company 26 */ 27 28 #include "lint.h" 29 #include "thr_uberdata.h" 30 #include <sys/sdt.h> 31 32 #define TRY_FLAG 0x10 33 #define READ_LOCK 0 34 #define WRITE_LOCK 1 35 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 36 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 37 38 #define NLOCKS 4 /* initial number of readlock_t structs allocated */ 39 40 #define ASSERT_CONSISTENT_STATE(readers) \ 41 ASSERT(!((readers) & URW_WRITE_LOCKED) || \ 42 ((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED) 43 44 /* 45 * Find/allocate an entry for rwlp in our array of rwlocks held for reading. 46 * We must be deferring signals for this to be safe. 47 * Else if we are returning an entry with ul_rdlockcnt == 0, 48 * it could be reassigned behind our back in a signal handler. 49 */ 50 static readlock_t * 51 rwl_entry(rwlock_t *rwlp) 52 { 53 ulwp_t *self = curthread; 54 readlock_t *remembered = NULL; 55 readlock_t *readlockp; 56 uint_t nlocks; 57 58 /* we must be deferring signals */ 59 ASSERT((self->ul_critical + self->ul_sigdefer) != 0); 60 61 if ((nlocks = self->ul_rdlockcnt) != 0) 62 readlockp = self->ul_readlock.array; 63 else { 64 nlocks = 1; 65 readlockp = &self->ul_readlock.single; 66 } 67 68 for (; nlocks; nlocks--, readlockp++) { 69 if (readlockp->rd_rwlock == rwlp) 70 return (readlockp); 71 if (readlockp->rd_count == 0 && remembered == NULL) 72 remembered = readlockp; 73 } 74 if (remembered != NULL) { 75 remembered->rd_rwlock = rwlp; 76 return (remembered); 77 } 78 79 /* 80 * No entry available. Allocate more space, converting the single 81 * readlock_t entry into an array of readlock_t entries if necessary. 82 */ 83 if ((nlocks = self->ul_rdlockcnt) == 0) { 84 /* 85 * Initial allocation of the readlock_t array. 86 * Convert the single entry into an array. 87 */ 88 self->ul_rdlockcnt = nlocks = NLOCKS; 89 readlockp = lmalloc(nlocks * sizeof (readlock_t)); 90 /* 91 * The single readlock_t becomes the first entry in the array. 92 */ 93 *readlockp = self->ul_readlock.single; 94 self->ul_readlock.single.rd_count = 0; 95 self->ul_readlock.array = readlockp; 96 /* 97 * Return the next available entry in the array. 98 */ 99 (++readlockp)->rd_rwlock = rwlp; 100 return (readlockp); 101 } 102 /* 103 * Reallocate the array, double the size each time. 104 */ 105 readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t)); 106 (void) memcpy(readlockp, self->ul_readlock.array, 107 nlocks * sizeof (readlock_t)); 108 lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t)); 109 self->ul_readlock.array = readlockp; 110 self->ul_rdlockcnt *= 2; 111 /* 112 * Return the next available entry in the newly allocated array. 113 */ 114 (readlockp += nlocks)->rd_rwlock = rwlp; 115 return (readlockp); 116 } 117 118 /* 119 * Free the array of rwlocks held for reading. 120 */ 121 void 122 rwl_free(ulwp_t *ulwp) 123 { 124 uint_t nlocks; 125 126 if ((nlocks = ulwp->ul_rdlockcnt) != 0) 127 lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t)); 128 ulwp->ul_rdlockcnt = 0; 129 ulwp->ul_readlock.single.rd_rwlock = NULL; 130 ulwp->ul_readlock.single.rd_count = 0; 131 } 132 133 /* 134 * Check if a reader version of the lock is held by the current thread. 135 */ 136 #pragma weak _rw_read_held = rw_read_held 137 int 138 rw_read_held(rwlock_t *rwlp) 139 { 140 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 141 uint32_t readers; 142 ulwp_t *self = curthread; 143 readlock_t *readlockp; 144 uint_t nlocks; 145 int rval = 0; 146 147 no_preempt(self); 148 149 readers = *rwstate; 150 ASSERT_CONSISTENT_STATE(readers); 151 if (!(readers & URW_WRITE_LOCKED) && 152 (readers & URW_READERS_MASK) != 0) { 153 /* 154 * The lock is held for reading by some thread. 155 * Search our array of rwlocks held for reading for a match. 156 */ 157 if ((nlocks = self->ul_rdlockcnt) != 0) 158 readlockp = self->ul_readlock.array; 159 else { 160 nlocks = 1; 161 readlockp = &self->ul_readlock.single; 162 } 163 for (; nlocks; nlocks--, readlockp++) { 164 if (readlockp->rd_rwlock == rwlp) { 165 if (readlockp->rd_count) 166 rval = 1; 167 break; 168 } 169 } 170 } 171 172 preempt(self); 173 return (rval); 174 } 175 176 /* 177 * Check if a writer version of the lock is held by the current thread. 178 */ 179 #pragma weak _rw_write_held = rw_write_held 180 int 181 rw_write_held(rwlock_t *rwlp) 182 { 183 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 184 uint32_t readers; 185 ulwp_t *self = curthread; 186 int rval; 187 188 no_preempt(self); 189 190 readers = *rwstate; 191 ASSERT_CONSISTENT_STATE(readers); 192 rval = ((readers & URW_WRITE_LOCKED) && 193 rwlp->rwlock_owner == (uintptr_t)self && 194 (rwlp->rwlock_type == USYNC_THREAD || 195 rwlp->rwlock_ownerpid == self->ul_uberdata->pid)); 196 197 preempt(self); 198 return (rval); 199 } 200 201 #pragma weak _rwlock_init = rwlock_init 202 int 203 rwlock_init(rwlock_t *rwlp, int type, void *arg __unused) 204 { 205 ulwp_t *self = curthread; 206 207 if (type != USYNC_THREAD && type != USYNC_PROCESS) 208 return (EINVAL); 209 /* 210 * Once reinitialized, we can no longer be holding a read or write lock. 211 * We can do nothing about other threads that are holding read locks. 212 */ 213 sigoff(self); 214 rwl_entry(rwlp)->rd_count = 0; 215 sigon(self); 216 (void) memset(rwlp, 0, sizeof (*rwlp)); 217 rwlp->rwlock_type = (uint16_t)type; 218 rwlp->rwlock_magic = RWL_MAGIC; 219 rwlp->mutex.mutex_type = (uint8_t)type; 220 rwlp->mutex.mutex_flag = LOCK_INITED; 221 rwlp->mutex.mutex_magic = MUTEX_MAGIC; 222 223 /* 224 * This should be at the beginning of the function, 225 * but for the sake of old broken applications that 226 * do not have proper alignment for their rwlocks 227 * (and don't check the return code from rwlock_init), 228 * we put it here, after initializing the rwlock regardless. 229 */ 230 if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) && 231 self->ul_misaligned == 0) 232 return (EINVAL); 233 234 return (0); 235 } 236 237 #pragma weak pthread_rwlock_destroy = rwlock_destroy 238 #pragma weak _rwlock_destroy = rwlock_destroy 239 int 240 rwlock_destroy(rwlock_t *rwlp) 241 { 242 ulwp_t *self = curthread; 243 244 /* 245 * Once destroyed, we can no longer be holding a read or write lock. 246 * We can do nothing about other threads that are holding read locks. 247 */ 248 sigoff(self); 249 rwl_entry(rwlp)->rd_count = 0; 250 sigon(self); 251 rwlp->rwlock_magic = 0; 252 tdb_sync_obj_deregister(rwlp); 253 return (0); 254 } 255 256 /* 257 * The following four functions: 258 * read_lock_try() 259 * read_unlock_try() 260 * write_lock_try() 261 * write_unlock_try() 262 * lie at the heart of the fast-path code for rwlocks, 263 * both process-private and process-shared. 264 * 265 * They are called once without recourse to any other locking primitives. 266 * If they succeed, we are done and the fast-path code was successful. 267 * If they fail, we have to deal with lock queues, either to enqueue 268 * ourself and sleep or to dequeue and wake up someone else (slow paths). 269 * 270 * Unless 'ignore_waiters_flag' is true (a condition that applies only 271 * when read_lock_try() or write_lock_try() is called from code that 272 * is already in the slow path and has already acquired the queue lock), 273 * these functions will always fail if the waiters flag, URW_HAS_WAITERS, 274 * is set in the 'rwstate' word. Thus, setting the waiters flag on the 275 * rwlock and acquiring the queue lock guarantees exclusive access to 276 * the rwlock (and is the only way to guarantee exclusive access). 277 */ 278 279 /* 280 * Attempt to acquire a readers lock. Return true on success. 281 */ 282 static int 283 read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) 284 { 285 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 286 uint32_t mask = ignore_waiters_flag? 287 URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED); 288 uint32_t readers; 289 ulwp_t *self = curthread; 290 291 no_preempt(self); 292 while (((readers = *rwstate) & mask) == 0) { 293 if (atomic_cas_32(rwstate, readers, readers + 1) == readers) { 294 preempt(self); 295 return (1); 296 } 297 } 298 preempt(self); 299 return (0); 300 } 301 302 /* 303 * Attempt to release a reader lock. Return true on success. 304 */ 305 static int 306 read_unlock_try(rwlock_t *rwlp) 307 { 308 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 309 uint32_t readers; 310 ulwp_t *self = curthread; 311 312 no_preempt(self); 313 while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) { 314 if (atomic_cas_32(rwstate, readers, readers - 1) == readers) { 315 preempt(self); 316 return (1); 317 } 318 } 319 preempt(self); 320 return (0); 321 } 322 323 /* 324 * Attempt to acquire a writer lock. Return true on success. 325 */ 326 static int 327 write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) 328 { 329 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 330 uint32_t mask = ignore_waiters_flag? 331 (URW_WRITE_LOCKED | URW_READERS_MASK) : 332 (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK); 333 ulwp_t *self = curthread; 334 uint32_t readers; 335 336 no_preempt(self); 337 while (((readers = *rwstate) & mask) == 0) { 338 if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED) 339 == readers) { 340 preempt(self); 341 return (1); 342 } 343 } 344 preempt(self); 345 return (0); 346 } 347 348 /* 349 * Attempt to release a writer lock. Return true on success. 350 */ 351 static int 352 write_unlock_try(rwlock_t *rwlp) 353 { 354 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 355 uint32_t readers; 356 ulwp_t *self = curthread; 357 358 no_preempt(self); 359 while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) { 360 if (atomic_cas_32(rwstate, readers, 0) == readers) { 361 preempt(self); 362 return (1); 363 } 364 } 365 preempt(self); 366 return (0); 367 } 368 369 /* 370 * Release a process-private rwlock and wake up any thread(s) sleeping on it. 371 * This is called when a thread releases a lock that appears to have waiters. 372 */ 373 static void 374 rw_queue_release(rwlock_t *rwlp) 375 { 376 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 377 queue_head_t *qp; 378 uint32_t readers; 379 uint32_t writer; 380 ulwp_t **ulwpp; 381 ulwp_t *ulwp; 382 ulwp_t *prev; 383 int nlwpid = 0; 384 int more; 385 int maxlwps = MAXLWPS; 386 lwpid_t buffer[MAXLWPS]; 387 lwpid_t *lwpid = buffer; 388 389 qp = queue_lock(rwlp, MX); 390 391 /* 392 * Here is where we actually drop the lock, 393 * but we retain the URW_HAS_WAITERS flag, if it is already set. 394 */ 395 readers = *rwstate; 396 ASSERT_CONSISTENT_STATE(readers); 397 if (readers & URW_WRITE_LOCKED) /* drop the writer lock */ 398 atomic_and_32(rwstate, ~URW_WRITE_LOCKED); 399 else /* drop the readers lock */ 400 atomic_dec_32(rwstate); 401 if (!(readers & URW_HAS_WAITERS)) { /* no waiters */ 402 queue_unlock(qp); 403 return; 404 } 405 406 /* 407 * The presence of the URW_HAS_WAITERS flag causes all rwlock 408 * code to go through the slow path, acquiring queue_lock(qp). 409 * Therefore, the rest of this code is safe because we are 410 * holding the queue lock and the URW_HAS_WAITERS flag is set. 411 */ 412 413 readers = *rwstate; /* must fetch the value again */ 414 ASSERT_CONSISTENT_STATE(readers); 415 ASSERT(readers & URW_HAS_WAITERS); 416 readers &= URW_READERS_MASK; /* count of current readers */ 417 writer = 0; /* no current writer */ 418 419 /* 420 * Examine the queue of waiters in priority order and prepare 421 * to wake up as many readers as we encounter before encountering 422 * a writer. If the highest priority thread on the queue is a 423 * writer, stop there and wake it up. 424 * 425 * We keep track of lwpids that are to be unparked in lwpid[]. 426 * __lwp_unpark_all() is called to unpark all of them after 427 * they have been removed from the sleep queue and the sleep 428 * queue lock has been dropped. If we run out of space in our 429 * on-stack buffer, we need to allocate more but we can't call 430 * lmalloc() because we are holding a queue lock when the overflow 431 * occurs and lmalloc() acquires a lock. We can't use alloca() 432 * either because the application may have allocated a small 433 * stack and we don't want to overrun the stack. So we call 434 * alloc_lwpids() to allocate a bigger buffer using the mmap() 435 * system call directly since that path acquires no locks. 436 */ 437 while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) { 438 ulwp = *ulwpp; 439 ASSERT(ulwp->ul_wchan == rwlp); 440 if (ulwp->ul_writer) { 441 if (writer != 0 || readers != 0) 442 break; 443 /* one writer to wake */ 444 writer++; 445 } else { 446 if (writer != 0) 447 break; 448 /* at least one reader to wake */ 449 readers++; 450 if (nlwpid == maxlwps) 451 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 452 } 453 queue_unlink(qp, ulwpp, prev); 454 ulwp->ul_sleepq = NULL; 455 ulwp->ul_wchan = NULL; 456 if (writer) { 457 /* 458 * Hand off the lock to the writer we will be waking. 459 */ 460 ASSERT((*rwstate & ~URW_HAS_WAITERS) == 0); 461 atomic_or_32(rwstate, URW_WRITE_LOCKED); 462 rwlp->rwlock_owner = (uintptr_t)ulwp; 463 } 464 lwpid[nlwpid++] = ulwp->ul_lwpid; 465 } 466 467 /* 468 * This modification of rwstate must be done last. 469 * The presence of the URW_HAS_WAITERS flag causes all rwlock 470 * code to go through the slow path, acquiring queue_lock(qp). 471 * Otherwise the read_lock_try() and write_lock_try() fast paths 472 * are effective. 473 */ 474 if (ulwpp == NULL) 475 atomic_and_32(rwstate, ~URW_HAS_WAITERS); 476 477 if (nlwpid == 0) { 478 queue_unlock(qp); 479 } else { 480 ulwp_t *self = curthread; 481 no_preempt(self); 482 queue_unlock(qp); 483 if (nlwpid == 1) 484 (void) __lwp_unpark(lwpid[0]); 485 else 486 (void) __lwp_unpark_all(lwpid, nlwpid); 487 preempt(self); 488 } 489 if (lwpid != buffer) 490 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 491 } 492 493 /* 494 * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, 495 * and trywrlock for process-shared (USYNC_PROCESS) rwlocks. 496 * 497 * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock() 498 * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex 499 * released, and if they need to sleep will release the mutex first. In the 500 * event of a spurious wakeup, these will return EAGAIN (because it is much 501 * easier for us to re-acquire the mutex here). 502 */ 503 int 504 shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) 505 { 506 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 507 mutex_t *mp = &rwlp->mutex; 508 int try_flag; 509 int error; 510 511 try_flag = (rd_wr & TRY_FLAG); 512 rd_wr &= ~TRY_FLAG; 513 ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); 514 515 if (!try_flag) { 516 DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); 517 } 518 519 do { 520 if (try_flag && (*rwstate & URW_WRITE_LOCKED)) { 521 error = EBUSY; 522 break; 523 } 524 if ((error = mutex_lock(mp)) != 0) 525 break; 526 if (rd_wr == READ_LOCK) { 527 if (read_lock_try(rwlp, 0)) { 528 (void) mutex_unlock(mp); 529 break; 530 } 531 } else { 532 if (write_lock_try(rwlp, 0)) { 533 (void) mutex_unlock(mp); 534 break; 535 } 536 } 537 atomic_or_32(rwstate, URW_HAS_WAITERS); 538 539 #ifdef DEBUG 540 uint32_t readers; 541 readers = *rwstate; 542 ASSERT_CONSISTENT_STATE(readers); 543 #endif 544 /* 545 * The calls to __lwp_rwlock_*() below will release the mutex, 546 * so we need a dtrace probe here. The owner field of the 547 * mutex is cleared in the kernel when the mutex is released, 548 * so we should not clear it here. 549 */ 550 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 551 /* 552 * The waiters bit may be inaccurate. 553 * Only the kernel knows for sure. 554 */ 555 if (rd_wr == READ_LOCK) { 556 if (try_flag) 557 error = __lwp_rwlock_tryrdlock(rwlp); 558 else 559 error = __lwp_rwlock_rdlock(rwlp, tsp); 560 } else { 561 if (try_flag) 562 error = __lwp_rwlock_trywrlock(rwlp); 563 else 564 error = __lwp_rwlock_wrlock(rwlp, tsp); 565 } 566 } while (error == EAGAIN || error == EINTR); 567 568 if (!try_flag) { 569 DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); 570 } 571 572 return (error); 573 } 574 575 /* 576 * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, 577 * and trywrlock for process-private (USYNC_THREAD) rwlocks. 578 */ 579 int 580 rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) 581 { 582 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 583 uint32_t readers; 584 ulwp_t *self = curthread; 585 queue_head_t *qp; 586 ulwp_t *ulwp; 587 int try_flag; 588 int ignore_waiters_flag; 589 int error = 0; 590 591 try_flag = (rd_wr & TRY_FLAG); 592 rd_wr &= ~TRY_FLAG; 593 ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); 594 595 if (!try_flag) { 596 DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); 597 } 598 599 qp = queue_lock(rwlp, MX); 600 /* initial attempt to acquire the lock fails if there are waiters */ 601 ignore_waiters_flag = 0; 602 while (error == 0) { 603 if (rd_wr == READ_LOCK) { 604 if (read_lock_try(rwlp, ignore_waiters_flag)) 605 break; 606 } else { 607 if (write_lock_try(rwlp, ignore_waiters_flag)) 608 break; 609 } 610 /* subsequent attempts do not fail due to waiters */ 611 ignore_waiters_flag = 1; 612 atomic_or_32(rwstate, URW_HAS_WAITERS); 613 readers = *rwstate; 614 ASSERT_CONSISTENT_STATE(readers); 615 if ((readers & URW_WRITE_LOCKED) || 616 (rd_wr == WRITE_LOCK && 617 (readers & URW_READERS_MASK) != 0)) 618 /* EMPTY */; /* somebody holds the lock */ 619 else if ((ulwp = queue_waiter(qp)) == NULL) { 620 atomic_and_32(rwstate, ~URW_HAS_WAITERS); 621 ignore_waiters_flag = 0; 622 continue; /* no queued waiters, start over */ 623 } else { 624 /* 625 * Do a priority check on the queued waiter (the 626 * highest priority thread on the queue) to see 627 * if we should defer to it or just grab the lock. 628 */ 629 int our_pri = real_priority(self); 630 int his_pri = real_priority(ulwp); 631 632 if (rd_wr == WRITE_LOCK) { 633 /* 634 * We defer to a queued thread that has 635 * a higher priority than ours. 636 */ 637 if (his_pri <= our_pri) { 638 /* 639 * Don't defer, just grab the lock. 640 */ 641 continue; 642 } 643 } else { 644 /* 645 * We defer to a queued thread that has 646 * a higher priority than ours or that 647 * is a writer whose priority equals ours. 648 */ 649 if (his_pri < our_pri || 650 (his_pri == our_pri && !ulwp->ul_writer)) { 651 /* 652 * Don't defer, just grab the lock. 653 */ 654 continue; 655 } 656 } 657 } 658 /* 659 * We are about to block. 660 * If we're doing a trylock, return EBUSY instead. 661 */ 662 if (try_flag) { 663 error = EBUSY; 664 break; 665 } 666 /* 667 * Enqueue writers ahead of readers. 668 */ 669 self->ul_writer = rd_wr; /* *must* be 0 or 1 */ 670 enqueue(qp, self, 0); 671 set_parking_flag(self, 1); 672 queue_unlock(qp); 673 if ((error = __lwp_park(tsp, 0)) == EINTR) 674 error = 0; 675 set_parking_flag(self, 0); 676 qp = queue_lock(rwlp, MX); 677 if (self->ul_sleepq && dequeue_self(qp) == 0) { 678 atomic_and_32(rwstate, ~URW_HAS_WAITERS); 679 ignore_waiters_flag = 0; 680 } 681 self->ul_writer = 0; 682 if (rd_wr == WRITE_LOCK && 683 (*rwstate & URW_WRITE_LOCKED) && 684 rwlp->rwlock_owner == (uintptr_t)self) { 685 /* 686 * We acquired the lock by hand-off 687 * from the previous owner, 688 */ 689 error = 0; /* timedlock did not fail */ 690 break; 691 } 692 } 693 694 /* 695 * Make one final check to see if there are any threads left 696 * on the rwlock queue. Clear the URW_HAS_WAITERS flag if not. 697 */ 698 if (qp->qh_root == NULL || qp->qh_root->qr_head == NULL) 699 atomic_and_32(rwstate, ~URW_HAS_WAITERS); 700 701 queue_unlock(qp); 702 703 if (!try_flag) { 704 DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); 705 } 706 707 return (error); 708 } 709 710 int 711 rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp) 712 { 713 ulwp_t *self = curthread; 714 uberdata_t *udp = self->ul_uberdata; 715 readlock_t *readlockp; 716 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 717 int error; 718 719 /* 720 * If we already hold a readers lock on this rwlock, 721 * just increment our reference count and return. 722 */ 723 sigoff(self); 724 readlockp = rwl_entry(rwlp); 725 if (readlockp->rd_count != 0) { 726 if (readlockp->rd_count == READ_LOCK_MAX) { 727 sigon(self); 728 error = EAGAIN; 729 goto out; 730 } 731 sigon(self); 732 error = 0; 733 goto out; 734 } 735 sigon(self); 736 737 /* 738 * If we hold the writer lock, bail out. 739 */ 740 if (rw_write_held(rwlp)) { 741 if (self->ul_error_detection) 742 rwlock_error(rwlp, "rwlock_rdlock", 743 "calling thread owns the writer lock"); 744 error = EDEADLK; 745 goto out; 746 } 747 748 if (read_lock_try(rwlp, 0)) 749 error = 0; 750 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 751 error = shared_rwlock_lock(rwlp, tsp, READ_LOCK); 752 else /* user-level */ 753 error = rwlock_lock(rwlp, tsp, READ_LOCK); 754 755 out: 756 if (error == 0) { 757 sigoff(self); 758 rwl_entry(rwlp)->rd_count++; 759 sigon(self); 760 if (rwsp) 761 tdb_incr(rwsp->rw_rdlock); 762 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); 763 } else { 764 DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error); 765 } 766 767 return (error); 768 } 769 770 #pragma weak pthread_rwlock_rdlock = rw_rdlock 771 #pragma weak _rw_rdlock = rw_rdlock 772 int 773 rw_rdlock(rwlock_t *rwlp) 774 { 775 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 776 return (rw_rdlock_impl(rwlp, NULL)); 777 } 778 779 void 780 lrw_rdlock(rwlock_t *rwlp) 781 { 782 enter_critical(curthread); 783 (void) rw_rdlock_impl(rwlp, NULL); 784 } 785 786 int 787 pthread_rwlock_relclockrdlock_np(pthread_rwlock_t *restrict rwlp, 788 clockid_t clock, const struct timespec *restrict reltime) 789 { 790 timespec_t tslocal = *reltime; 791 int error; 792 793 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 794 795 switch (clock) { 796 case CLOCK_REALTIME: 797 case CLOCK_HIGHRES: 798 break; 799 default: 800 return (EINVAL); 801 } 802 803 error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal); 804 if (error == ETIME) 805 error = ETIMEDOUT; 806 return (error); 807 } 808 809 int 810 pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *restrict rwlp, 811 const struct timespec *restrict reltime) 812 { 813 return (pthread_rwlock_relclockrdlock_np(rwlp, CLOCK_REALTIME, 814 reltime)); 815 } 816 817 int 818 pthread_rwlock_clockrdlock(pthread_rwlock_t *restrict rwlp, clockid_t clock, 819 const struct timespec *restrict abstime) 820 { 821 timespec_t tslocal; 822 int error; 823 824 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 825 826 switch (clock) { 827 case CLOCK_REALTIME: 828 case CLOCK_HIGHRES: 829 break; 830 default: 831 return (EINVAL); 832 } 833 834 abstime_to_reltime(clock, abstime, &tslocal); 835 error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal); 836 if (error == ETIME) 837 error = ETIMEDOUT; 838 return (error); 839 } 840 841 int 842 pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rwlp, 843 const struct timespec *restrict abstime) 844 { 845 return (pthread_rwlock_clockrdlock(rwlp, CLOCK_REALTIME, abstime)); 846 } 847 848 int 849 rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp) 850 { 851 ulwp_t *self = curthread; 852 uberdata_t *udp = self->ul_uberdata; 853 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 854 int error; 855 856 /* 857 * If we hold a readers lock on this rwlock, bail out. 858 */ 859 if (rw_read_held(rwlp)) { 860 if (self->ul_error_detection) 861 rwlock_error(rwlp, "rwlock_wrlock", 862 "calling thread owns the readers lock"); 863 error = EDEADLK; 864 goto out; 865 } 866 867 /* 868 * If we hold the writer lock, bail out. 869 */ 870 if (rw_write_held(rwlp)) { 871 if (self->ul_error_detection) 872 rwlock_error(rwlp, "rwlock_wrlock", 873 "calling thread owns the writer lock"); 874 error = EDEADLK; 875 goto out; 876 } 877 878 if (write_lock_try(rwlp, 0)) 879 error = 0; 880 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 881 error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK); 882 else /* user-level */ 883 error = rwlock_lock(rwlp, tsp, WRITE_LOCK); 884 885 out: 886 if (error == 0) { 887 rwlp->rwlock_owner = (uintptr_t)self; 888 if (rwlp->rwlock_type == USYNC_PROCESS) 889 rwlp->rwlock_ownerpid = udp->pid; 890 if (rwsp) { 891 tdb_incr(rwsp->rw_wrlock); 892 rwsp->rw_wrlock_begin_hold = gethrtime(); 893 } 894 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK); 895 } else { 896 DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error); 897 } 898 return (error); 899 } 900 901 #pragma weak pthread_rwlock_wrlock = rw_wrlock 902 #pragma weak _rw_wrlock = rw_wrlock 903 int 904 rw_wrlock(rwlock_t *rwlp) 905 { 906 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 907 return (rw_wrlock_impl(rwlp, NULL)); 908 } 909 910 void 911 lrw_wrlock(rwlock_t *rwlp) 912 { 913 enter_critical(curthread); 914 (void) rw_wrlock_impl(rwlp, NULL); 915 } 916 917 int 918 pthread_rwlock_relclockwrlock_np(pthread_rwlock_t *restrict rwlp, 919 clockid_t clock, const struct timespec *restrict reltime) 920 { 921 timespec_t tslocal = *reltime; 922 int error; 923 924 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 925 926 switch (clock) { 927 case CLOCK_REALTIME: 928 case CLOCK_HIGHRES: 929 break; 930 default: 931 return (EINVAL); 932 } 933 934 error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal); 935 if (error == ETIME) 936 error = ETIMEDOUT; 937 return (error); 938 } 939 940 int 941 pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *restrict rwlp, 942 const struct timespec *restrict reltime) 943 { 944 return (pthread_rwlock_relclockwrlock_np(rwlp, CLOCK_REALTIME, 945 reltime)); 946 } 947 948 int 949 pthread_rwlock_clockwrlock(pthread_rwlock_t *rwlp, clockid_t clock, 950 const timespec_t *abstime) 951 { 952 timespec_t tslocal; 953 int error; 954 955 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 956 957 switch (clock) { 958 case CLOCK_REALTIME: 959 case CLOCK_HIGHRES: 960 break; 961 default: 962 return (EINVAL); 963 } 964 965 abstime_to_reltime(clock, abstime, &tslocal); 966 error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal); 967 if (error == ETIME) 968 error = ETIMEDOUT; 969 return (error); 970 } 971 972 int 973 pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime) 974 { 975 return (pthread_rwlock_clockwrlock(rwlp, CLOCK_REALTIME, abstime)); 976 } 977 978 #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock 979 int 980 rw_tryrdlock(rwlock_t *rwlp) 981 { 982 ulwp_t *self = curthread; 983 uberdata_t *udp = self->ul_uberdata; 984 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 985 readlock_t *readlockp; 986 int error; 987 988 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 989 990 if (rwsp) 991 tdb_incr(rwsp->rw_rdlock_try); 992 993 /* 994 * If we already hold a readers lock on this rwlock, 995 * just increment our reference count and return. 996 */ 997 sigoff(self); 998 readlockp = rwl_entry(rwlp); 999 if (readlockp->rd_count != 0) { 1000 if (readlockp->rd_count == READ_LOCK_MAX) { 1001 sigon(self); 1002 error = EAGAIN; 1003 goto out; 1004 } 1005 sigon(self); 1006 error = 0; 1007 goto out; 1008 } 1009 sigon(self); 1010 1011 if (read_lock_try(rwlp, 0)) 1012 error = 0; 1013 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 1014 error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY); 1015 else /* user-level */ 1016 error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY); 1017 1018 out: 1019 if (error == 0) { 1020 sigoff(self); 1021 rwl_entry(rwlp)->rd_count++; 1022 sigon(self); 1023 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); 1024 } else { 1025 if (rwsp) 1026 tdb_incr(rwsp->rw_rdlock_try_fail); 1027 if (error != EBUSY) { 1028 DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, 1029 error); 1030 } 1031 } 1032 1033 return (error); 1034 } 1035 1036 #pragma weak pthread_rwlock_trywrlock = rw_trywrlock 1037 int 1038 rw_trywrlock(rwlock_t *rwlp) 1039 { 1040 ulwp_t *self = curthread; 1041 uberdata_t *udp = self->ul_uberdata; 1042 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 1043 int error; 1044 1045 ASSERT(!self->ul_critical || self->ul_bindflags); 1046 1047 if (rwsp) 1048 tdb_incr(rwsp->rw_wrlock_try); 1049 1050 if (write_lock_try(rwlp, 0)) 1051 error = 0; 1052 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 1053 error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY); 1054 else /* user-level */ 1055 error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY); 1056 1057 if (error == 0) { 1058 rwlp->rwlock_owner = (uintptr_t)self; 1059 if (rwlp->rwlock_type == USYNC_PROCESS) 1060 rwlp->rwlock_ownerpid = udp->pid; 1061 if (rwsp) 1062 rwsp->rw_wrlock_begin_hold = gethrtime(); 1063 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK); 1064 } else { 1065 if (rwsp) 1066 tdb_incr(rwsp->rw_wrlock_try_fail); 1067 if (error != EBUSY) { 1068 DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, 1069 error); 1070 } 1071 } 1072 return (error); 1073 } 1074 1075 #pragma weak pthread_rwlock_unlock = rw_unlock 1076 #pragma weak _rw_unlock = rw_unlock 1077 int 1078 rw_unlock(rwlock_t *rwlp) 1079 { 1080 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 1081 uint32_t readers; 1082 ulwp_t *self = curthread; 1083 uberdata_t *udp = self->ul_uberdata; 1084 tdb_rwlock_stats_t *rwsp; 1085 int rd_wr; 1086 1087 readers = *rwstate; 1088 ASSERT_CONSISTENT_STATE(readers); 1089 if (readers & URW_WRITE_LOCKED) { 1090 rd_wr = WRITE_LOCK; 1091 readers = 0; 1092 } else { 1093 rd_wr = READ_LOCK; 1094 readers &= URW_READERS_MASK; 1095 } 1096 1097 if (rd_wr == WRITE_LOCK) { 1098 /* 1099 * Since the writer lock is held, we'd better be 1100 * holding it, else we cannot legitimately be here. 1101 */ 1102 if (!rw_write_held(rwlp)) { 1103 if (self->ul_error_detection) 1104 rwlock_error(rwlp, "rwlock_unlock", 1105 "writer lock held, " 1106 "but not by the calling thread"); 1107 return (EPERM); 1108 } 1109 if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) { 1110 if (rwsp->rw_wrlock_begin_hold) 1111 rwsp->rw_wrlock_hold_time += 1112 gethrtime() - rwsp->rw_wrlock_begin_hold; 1113 rwsp->rw_wrlock_begin_hold = 0; 1114 } 1115 rwlp->rwlock_owner = 0; 1116 rwlp->rwlock_ownerpid = 0; 1117 } else if (readers > 0) { 1118 /* 1119 * A readers lock is held; if we don't hold one, bail out. 1120 */ 1121 readlock_t *readlockp; 1122 1123 sigoff(self); 1124 readlockp = rwl_entry(rwlp); 1125 if (readlockp->rd_count == 0) { 1126 sigon(self); 1127 if (self->ul_error_detection) 1128 rwlock_error(rwlp, "rwlock_unlock", 1129 "readers lock held, " 1130 "but not by the calling thread"); 1131 return (EPERM); 1132 } 1133 /* 1134 * If we hold more than one readers lock on this rwlock, 1135 * just decrement our reference count and return. 1136 */ 1137 if (--readlockp->rd_count != 0) { 1138 sigon(self); 1139 goto out; 1140 } 1141 sigon(self); 1142 } else { 1143 /* 1144 * This is a usage error. 1145 * No thread should release an unowned lock. 1146 */ 1147 if (self->ul_error_detection) 1148 rwlock_error(rwlp, "rwlock_unlock", "lock not owned"); 1149 return (EPERM); 1150 } 1151 1152 if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) { 1153 /* EMPTY */; 1154 } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) { 1155 /* EMPTY */; 1156 } else if (rwlp->rwlock_type == USYNC_PROCESS) { 1157 (void) mutex_lock(&rwlp->mutex); 1158 (void) __lwp_rwlock_unlock(rwlp); 1159 (void) mutex_unlock(&rwlp->mutex); 1160 } else { 1161 rw_queue_release(rwlp); 1162 } 1163 1164 out: 1165 DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr); 1166 return (0); 1167 } 1168 1169 void 1170 lrw_unlock(rwlock_t *rwlp) 1171 { 1172 (void) rw_unlock(rwlp); 1173 exit_critical(curthread); 1174 } 1175