17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*41efec22Sraf * Common Development and Distribution License (the "License"). 6*41efec22Sraf * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21*41efec22Sraf 227c478bd9Sstevel@tonic-gate /* 23*41efec22Sraf * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate #include "lint.h" 307c478bd9Sstevel@tonic-gate #include "thr_uberdata.h" 317c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 327c478bd9Sstevel@tonic-gate 337c478bd9Sstevel@tonic-gate #define TRY_FLAG 0x10 347c478bd9Sstevel@tonic-gate #define READ_LOCK 0 357c478bd9Sstevel@tonic-gate #define WRITE_LOCK 1 367c478bd9Sstevel@tonic-gate #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 377c478bd9Sstevel@tonic-gate #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 387c478bd9Sstevel@tonic-gate 397c478bd9Sstevel@tonic-gate #define NLOCKS 4 /* initial number of readlock_t structs allocated */ 407c478bd9Sstevel@tonic-gate 41*41efec22Sraf #define ASSERT_CONSISTENT_STATE(readers) \ 42*41efec22Sraf ASSERT(!((readers) & URW_WRITE_LOCKED) || \ 43*41efec22Sraf ((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED) 44*41efec22Sraf 457c478bd9Sstevel@tonic-gate /* 467c478bd9Sstevel@tonic-gate * Find/allocate an entry for rwlp in our array of rwlocks held for reading. 47*41efec22Sraf * We must be deferring signals for this to be safe. 48*41efec22Sraf * Else if we are returning an entry with ul_rdlocks == 0, 49*41efec22Sraf * it could be reassigned behind our back in a signal handler. 507c478bd9Sstevel@tonic-gate */ 517c478bd9Sstevel@tonic-gate static readlock_t * 527c478bd9Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp) 537c478bd9Sstevel@tonic-gate { 547c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 557c478bd9Sstevel@tonic-gate readlock_t *remembered = NULL; 567c478bd9Sstevel@tonic-gate readlock_t *readlockp; 577c478bd9Sstevel@tonic-gate uint_t nlocks; 587c478bd9Sstevel@tonic-gate 59*41efec22Sraf /* we must be deferring signals */ 60*41efec22Sraf ASSERT((self->ul_critical + self->ul_sigdefer) != 0); 61*41efec22Sraf 627c478bd9Sstevel@tonic-gate if ((nlocks = self->ul_rdlocks) != 0) 637c478bd9Sstevel@tonic-gate readlockp = self->ul_readlock.array; 647c478bd9Sstevel@tonic-gate else { 657c478bd9Sstevel@tonic-gate nlocks = 1; 667c478bd9Sstevel@tonic-gate readlockp = &self->ul_readlock.single; 677c478bd9Sstevel@tonic-gate } 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate for (; nlocks; nlocks--, readlockp++) { 707c478bd9Sstevel@tonic-gate if (readlockp->rd_rwlock == rwlp) 717c478bd9Sstevel@tonic-gate return (readlockp); 727c478bd9Sstevel@tonic-gate if (readlockp->rd_count == 0 && remembered == NULL) 737c478bd9Sstevel@tonic-gate remembered = readlockp; 747c478bd9Sstevel@tonic-gate } 757c478bd9Sstevel@tonic-gate if (remembered != NULL) { 767c478bd9Sstevel@tonic-gate remembered->rd_rwlock = rwlp; 777c478bd9Sstevel@tonic-gate return (remembered); 787c478bd9Sstevel@tonic-gate } 797c478bd9Sstevel@tonic-gate 807c478bd9Sstevel@tonic-gate /* 817c478bd9Sstevel@tonic-gate * No entry available. Allocate more space, converting the single 827c478bd9Sstevel@tonic-gate * readlock_t entry into an array of readlock_t entries if necessary. 837c478bd9Sstevel@tonic-gate */ 847c478bd9Sstevel@tonic-gate if ((nlocks = self->ul_rdlocks) == 0) { 857c478bd9Sstevel@tonic-gate /* 867c478bd9Sstevel@tonic-gate * Initial allocation of the readlock_t array. 877c478bd9Sstevel@tonic-gate * Convert the single entry into an array. 887c478bd9Sstevel@tonic-gate */ 897c478bd9Sstevel@tonic-gate self->ul_rdlocks = nlocks = NLOCKS; 907c478bd9Sstevel@tonic-gate readlockp = lmalloc(nlocks * sizeof (readlock_t)); 917c478bd9Sstevel@tonic-gate /* 927c478bd9Sstevel@tonic-gate * The single readlock_t becomes the first entry in the array. 937c478bd9Sstevel@tonic-gate */ 947c478bd9Sstevel@tonic-gate *readlockp = self->ul_readlock.single; 957c478bd9Sstevel@tonic-gate self->ul_readlock.single.rd_count = 0; 967c478bd9Sstevel@tonic-gate self->ul_readlock.array = readlockp; 977c478bd9Sstevel@tonic-gate /* 987c478bd9Sstevel@tonic-gate * Return the next available entry in the array. 997c478bd9Sstevel@tonic-gate */ 1007c478bd9Sstevel@tonic-gate (++readlockp)->rd_rwlock = rwlp; 1017c478bd9Sstevel@tonic-gate return (readlockp); 1027c478bd9Sstevel@tonic-gate } 1037c478bd9Sstevel@tonic-gate /* 1047c478bd9Sstevel@tonic-gate * Reallocate the array, double the size each time. 1057c478bd9Sstevel@tonic-gate */ 1067c478bd9Sstevel@tonic-gate readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t)); 1077c478bd9Sstevel@tonic-gate (void) _memcpy(readlockp, self->ul_readlock.array, 1087c478bd9Sstevel@tonic-gate nlocks * sizeof (readlock_t)); 1097c478bd9Sstevel@tonic-gate lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t)); 1107c478bd9Sstevel@tonic-gate self->ul_readlock.array = readlockp; 1117c478bd9Sstevel@tonic-gate self->ul_rdlocks *= 2; 1127c478bd9Sstevel@tonic-gate /* 1137c478bd9Sstevel@tonic-gate * Return the next available entry in the newly allocated array. 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate (readlockp += nlocks)->rd_rwlock = rwlp; 1167c478bd9Sstevel@tonic-gate return (readlockp); 1177c478bd9Sstevel@tonic-gate } 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* 1207c478bd9Sstevel@tonic-gate * Free the array of rwlocks held for reading. 1217c478bd9Sstevel@tonic-gate */ 1227c478bd9Sstevel@tonic-gate void 1237c478bd9Sstevel@tonic-gate rwl_free(ulwp_t *ulwp) 1247c478bd9Sstevel@tonic-gate { 1257c478bd9Sstevel@tonic-gate uint_t nlocks; 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate if ((nlocks = ulwp->ul_rdlocks) != 0) 1287c478bd9Sstevel@tonic-gate lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t)); 1297c478bd9Sstevel@tonic-gate ulwp->ul_rdlocks = 0; 1307c478bd9Sstevel@tonic-gate ulwp->ul_readlock.single.rd_rwlock = NULL; 1317c478bd9Sstevel@tonic-gate ulwp->ul_readlock.single.rd_count = 0; 1327c478bd9Sstevel@tonic-gate } 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate /* 1357c478bd9Sstevel@tonic-gate * Check if a reader version of the lock is held by the current thread. 1367c478bd9Sstevel@tonic-gate * rw_read_is_held() is private to libc. 1377c478bd9Sstevel@tonic-gate */ 1387c478bd9Sstevel@tonic-gate #pragma weak rw_read_is_held = _rw_read_held 1397c478bd9Sstevel@tonic-gate #pragma weak rw_read_held = _rw_read_held 1407c478bd9Sstevel@tonic-gate int 1417c478bd9Sstevel@tonic-gate _rw_read_held(rwlock_t *rwlp) 1427c478bd9Sstevel@tonic-gate { 143*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 144*41efec22Sraf uint32_t readers; 145*41efec22Sraf ulwp_t *self = curthread; 1467c478bd9Sstevel@tonic-gate readlock_t *readlockp; 1477c478bd9Sstevel@tonic-gate uint_t nlocks; 148*41efec22Sraf int rval = 0; 1497c478bd9Sstevel@tonic-gate 150*41efec22Sraf no_preempt(self); 1517c478bd9Sstevel@tonic-gate 152*41efec22Sraf readers = *rwstate; 153*41efec22Sraf ASSERT_CONSISTENT_STATE(readers); 154*41efec22Sraf if (!(readers & URW_WRITE_LOCKED) && 155*41efec22Sraf (readers & URW_READERS_MASK) != 0) { 1567c478bd9Sstevel@tonic-gate /* 1577c478bd9Sstevel@tonic-gate * The lock is held for reading by some thread. 1587c478bd9Sstevel@tonic-gate * Search our array of rwlocks held for reading for a match. 1597c478bd9Sstevel@tonic-gate */ 1607c478bd9Sstevel@tonic-gate if ((nlocks = self->ul_rdlocks) != 0) 1617c478bd9Sstevel@tonic-gate readlockp = self->ul_readlock.array; 1627c478bd9Sstevel@tonic-gate else { 1637c478bd9Sstevel@tonic-gate nlocks = 1; 1647c478bd9Sstevel@tonic-gate readlockp = &self->ul_readlock.single; 1657c478bd9Sstevel@tonic-gate } 166*41efec22Sraf for (; nlocks; nlocks--, readlockp++) { 167*41efec22Sraf if (readlockp->rd_rwlock == rwlp) { 168*41efec22Sraf if (readlockp->rd_count) 169*41efec22Sraf rval = 1; 170*41efec22Sraf break; 171*41efec22Sraf } 172*41efec22Sraf } 173*41efec22Sraf } 1747c478bd9Sstevel@tonic-gate 175*41efec22Sraf preempt(self); 176*41efec22Sraf return (rval); 1777c478bd9Sstevel@tonic-gate } 1787c478bd9Sstevel@tonic-gate 1797c478bd9Sstevel@tonic-gate /* 1807c478bd9Sstevel@tonic-gate * Check if a writer version of the lock is held by the current thread. 1817c478bd9Sstevel@tonic-gate * rw_write_is_held() is private to libc. 1827c478bd9Sstevel@tonic-gate */ 1837c478bd9Sstevel@tonic-gate #pragma weak rw_write_is_held = _rw_write_held 1847c478bd9Sstevel@tonic-gate #pragma weak rw_write_held = _rw_write_held 1857c478bd9Sstevel@tonic-gate int 1867c478bd9Sstevel@tonic-gate _rw_write_held(rwlock_t *rwlp) 1877c478bd9Sstevel@tonic-gate { 188*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 189*41efec22Sraf uint32_t readers; 1907c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 191*41efec22Sraf int rval; 1927c478bd9Sstevel@tonic-gate 193*41efec22Sraf no_preempt(self); 1947c478bd9Sstevel@tonic-gate 195*41efec22Sraf readers = *rwstate; 196*41efec22Sraf ASSERT_CONSISTENT_STATE(readers); 197*41efec22Sraf rval = ((readers & URW_WRITE_LOCKED) && 198*41efec22Sraf rwlp->rwlock_owner == (uintptr_t)self && 199*41efec22Sraf (rwlp->rwlock_type == USYNC_THREAD || 200*41efec22Sraf rwlp->rwlock_ownerpid == self->ul_uberdata->pid)); 201*41efec22Sraf 202*41efec22Sraf preempt(self); 203*41efec22Sraf return (rval); 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate #pragma weak rwlock_init = __rwlock_init 2077c478bd9Sstevel@tonic-gate #pragma weak _rwlock_init = __rwlock_init 2087c478bd9Sstevel@tonic-gate /* ARGSUSED2 */ 2097c478bd9Sstevel@tonic-gate int 2107c478bd9Sstevel@tonic-gate __rwlock_init(rwlock_t *rwlp, int type, void *arg) 2117c478bd9Sstevel@tonic-gate { 2127c478bd9Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS) 2137c478bd9Sstevel@tonic-gate return (EINVAL); 2147c478bd9Sstevel@tonic-gate /* 2157c478bd9Sstevel@tonic-gate * Once reinitialized, we can no longer be holding a read or write lock. 2167c478bd9Sstevel@tonic-gate * We can do nothing about other threads that are holding read locks. 2177c478bd9Sstevel@tonic-gate */ 218*41efec22Sraf sigoff(curthread); 2197c478bd9Sstevel@tonic-gate rwl_entry(rwlp)->rd_count = 0; 220*41efec22Sraf sigon(curthread); 2217c478bd9Sstevel@tonic-gate (void) _memset(rwlp, 0, sizeof (*rwlp)); 2227c478bd9Sstevel@tonic-gate rwlp->rwlock_type = (uint16_t)type; 2237c478bd9Sstevel@tonic-gate rwlp->rwlock_magic = RWL_MAGIC; 2247c478bd9Sstevel@tonic-gate rwlp->mutex.mutex_type = (uint8_t)type; 2257c478bd9Sstevel@tonic-gate rwlp->mutex.mutex_flag = LOCK_INITED; 2267c478bd9Sstevel@tonic-gate rwlp->mutex.mutex_magic = MUTEX_MAGIC; 2277c478bd9Sstevel@tonic-gate return (0); 2287c478bd9Sstevel@tonic-gate } 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate #pragma weak rwlock_destroy = __rwlock_destroy 2317c478bd9Sstevel@tonic-gate #pragma weak _rwlock_destroy = __rwlock_destroy 2327c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_destroy = __rwlock_destroy 2337c478bd9Sstevel@tonic-gate #pragma weak _pthread_rwlock_destroy = __rwlock_destroy 2347c478bd9Sstevel@tonic-gate int 2357c478bd9Sstevel@tonic-gate __rwlock_destroy(rwlock_t *rwlp) 2367c478bd9Sstevel@tonic-gate { 2377c478bd9Sstevel@tonic-gate /* 2387c478bd9Sstevel@tonic-gate * Once destroyed, we can no longer be holding a read or write lock. 2397c478bd9Sstevel@tonic-gate * We can do nothing about other threads that are holding read locks. 2407c478bd9Sstevel@tonic-gate */ 241*41efec22Sraf sigoff(curthread); 2427c478bd9Sstevel@tonic-gate rwl_entry(rwlp)->rd_count = 0; 243*41efec22Sraf sigon(curthread); 2447c478bd9Sstevel@tonic-gate rwlp->rwlock_magic = 0; 2457c478bd9Sstevel@tonic-gate tdb_sync_obj_deregister(rwlp); 2467c478bd9Sstevel@tonic-gate return (0); 2477c478bd9Sstevel@tonic-gate } 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate /* 250*41efec22Sraf * Attempt to acquire a readers lock. Return true on success. 2517c478bd9Sstevel@tonic-gate */ 2527c478bd9Sstevel@tonic-gate static int 253*41efec22Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) 2547c478bd9Sstevel@tonic-gate { 255*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 256*41efec22Sraf uint32_t mask = ignore_waiters_flag? 257*41efec22Sraf URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED); 258*41efec22Sraf uint32_t readers; 2597c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 2607c478bd9Sstevel@tonic-gate 2617c478bd9Sstevel@tonic-gate no_preempt(self); 262*41efec22Sraf while (((readers = *rwstate) & mask) == 0) { 263*41efec22Sraf if (atomic_cas_32(rwstate, readers, readers + 1) == readers) { 2647c478bd9Sstevel@tonic-gate preempt(self); 2657c478bd9Sstevel@tonic-gate return (1); 2667c478bd9Sstevel@tonic-gate } 2677c478bd9Sstevel@tonic-gate } 268*41efec22Sraf preempt(self); 269*41efec22Sraf return (0); 2707c478bd9Sstevel@tonic-gate } 271*41efec22Sraf 272*41efec22Sraf /* 273*41efec22Sraf * Attempt to release a reader lock. Return true on success. 274*41efec22Sraf */ 275*41efec22Sraf static int 276*41efec22Sraf read_unlock_try(rwlock_t *rwlp) 277*41efec22Sraf { 278*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 279*41efec22Sraf uint32_t readers; 280*41efec22Sraf ulwp_t *self = curthread; 281*41efec22Sraf 282*41efec22Sraf no_preempt(self); 283*41efec22Sraf while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) { 284*41efec22Sraf if (atomic_cas_32(rwstate, readers, readers - 1) == readers) { 285*41efec22Sraf preempt(self); 286*41efec22Sraf return (1); 287*41efec22Sraf } 288*41efec22Sraf } 289*41efec22Sraf preempt(self); 290*41efec22Sraf return (0); 291*41efec22Sraf } 292*41efec22Sraf 293*41efec22Sraf /* 294*41efec22Sraf * Attempt to acquire a writer lock. Return true on success. 295*41efec22Sraf */ 296*41efec22Sraf static int 297*41efec22Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) 298*41efec22Sraf { 299*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 300*41efec22Sraf uint32_t mask = ignore_waiters_flag? 301*41efec22Sraf (URW_WRITE_LOCKED | URW_READERS_MASK) : 302*41efec22Sraf (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK); 303*41efec22Sraf ulwp_t *self = curthread; 304*41efec22Sraf uint32_t readers; 305*41efec22Sraf 306*41efec22Sraf no_preempt(self); 307*41efec22Sraf while (((readers = *rwstate) & mask) == 0) { 308*41efec22Sraf if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED) 309*41efec22Sraf == readers) { 310*41efec22Sraf preempt(self); 311*41efec22Sraf return (1); 312*41efec22Sraf } 313*41efec22Sraf } 314*41efec22Sraf preempt(self); 315*41efec22Sraf return (0); 316*41efec22Sraf } 317*41efec22Sraf 318*41efec22Sraf /* 319*41efec22Sraf * Attempt to release a writer lock. Return true on success. 320*41efec22Sraf */ 321*41efec22Sraf static int 322*41efec22Sraf write_unlock_try(rwlock_t *rwlp) 323*41efec22Sraf { 324*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 325*41efec22Sraf uint32_t readers; 326*41efec22Sraf ulwp_t *self = curthread; 327*41efec22Sraf 328*41efec22Sraf no_preempt(self); 329*41efec22Sraf while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) { 330*41efec22Sraf if (atomic_cas_32(rwstate, readers, 0) == readers) { 331*41efec22Sraf preempt(self); 332*41efec22Sraf return (1); 333*41efec22Sraf } 334*41efec22Sraf } 335*41efec22Sraf preempt(self); 336*41efec22Sraf return (0); 337*41efec22Sraf } 338*41efec22Sraf 339*41efec22Sraf /* 340*41efec22Sraf * Wake up thread(s) sleeping on the rwlock queue and then 341*41efec22Sraf * drop the queue lock. Return non-zero if we wake up someone. 342*41efec22Sraf * This is called when a thread releases a lock that appears to have waiters. 343*41efec22Sraf */ 344*41efec22Sraf static int 345*41efec22Sraf rw_queue_release(queue_head_t *qp, rwlock_t *rwlp) 346*41efec22Sraf { 347*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 348*41efec22Sraf uint32_t readers; 349*41efec22Sraf uint32_t writers; 350*41efec22Sraf int nlwpid = 0; 351*41efec22Sraf int maxlwps = MAXLWPS; 352*41efec22Sraf ulwp_t *self; 353*41efec22Sraf ulwp_t **ulwpp; 354*41efec22Sraf ulwp_t *ulwp; 355*41efec22Sraf ulwp_t *prev = NULL; 356*41efec22Sraf lwpid_t buffer[MAXLWPS]; 357*41efec22Sraf lwpid_t *lwpid = buffer; 358*41efec22Sraf 359*41efec22Sraf readers = *rwstate; 360*41efec22Sraf ASSERT_CONSISTENT_STATE(readers); 361*41efec22Sraf if (!(readers & URW_HAS_WAITERS)) { 3627c478bd9Sstevel@tonic-gate queue_unlock(qp); 3637c478bd9Sstevel@tonic-gate return (0); 3647c478bd9Sstevel@tonic-gate } 365*41efec22Sraf readers &= URW_READERS_MASK; 366*41efec22Sraf writers = 0; 367*41efec22Sraf 368*41efec22Sraf /* 369*41efec22Sraf * Walk the list of waiters and prepare to wake up as 370*41efec22Sraf * many readers as we encounter before encountering 371*41efec22Sraf * a writer. If the first thread on the list is a 372*41efec22Sraf * writer, stop there and wake it up. 373*41efec22Sraf * 374*41efec22Sraf * We keep track of lwpids that are to be unparked in lwpid[]. 375*41efec22Sraf * __lwp_unpark_all() is called to unpark all of them after 376*41efec22Sraf * they have been removed from the sleep queue and the sleep 377*41efec22Sraf * queue lock has been dropped. If we run out of space in our 378*41efec22Sraf * on-stack buffer, we need to allocate more but we can't call 379*41efec22Sraf * lmalloc() because we are holding a queue lock when the overflow 380*41efec22Sraf * occurs and lmalloc() acquires a lock. We can't use alloca() 381*41efec22Sraf * either because the application may have allocated a small 382*41efec22Sraf * stack and we don't want to overrun the stack. So we call 383*41efec22Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 384*41efec22Sraf * system call directly since that path acquires no locks. 385*41efec22Sraf */ 386*41efec22Sraf ulwpp = &qp->qh_head; 387*41efec22Sraf while ((ulwp = *ulwpp) != NULL) { 388*41efec22Sraf if (ulwp->ul_wchan != rwlp) { 389*41efec22Sraf prev = ulwp; 390*41efec22Sraf ulwpp = &ulwp->ul_link; 391*41efec22Sraf continue; 392*41efec22Sraf } 393*41efec22Sraf if (ulwp->ul_writer) { 394*41efec22Sraf if (writers != 0 || readers != 0) 395*41efec22Sraf break; 396*41efec22Sraf /* one writer to wake */ 397*41efec22Sraf writers++; 398*41efec22Sraf } else { 399*41efec22Sraf if (writers != 0) 400*41efec22Sraf break; 401*41efec22Sraf /* at least one reader to wake */ 402*41efec22Sraf readers++; 403*41efec22Sraf if (nlwpid == maxlwps) 404*41efec22Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 405*41efec22Sraf } 406*41efec22Sraf (void) queue_unlink(qp, ulwpp, prev); 407*41efec22Sraf lwpid[nlwpid++] = ulwp->ul_lwpid; 408*41efec22Sraf } 409*41efec22Sraf if (ulwp == NULL) 410*41efec22Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS); 411*41efec22Sraf if (nlwpid == 0) { 412*41efec22Sraf queue_unlock(qp); 413*41efec22Sraf } else { 414*41efec22Sraf self = curthread; 415*41efec22Sraf no_preempt(self); 416*41efec22Sraf queue_unlock(qp); 417*41efec22Sraf if (nlwpid == 1) 418*41efec22Sraf (void) __lwp_unpark(lwpid[0]); 419*41efec22Sraf else 420*41efec22Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 421*41efec22Sraf preempt(self); 422*41efec22Sraf } 423*41efec22Sraf if (lwpid != buffer) 424*41efec22Sraf (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 425*41efec22Sraf return (nlwpid != 0); 426*41efec22Sraf } 4277c478bd9Sstevel@tonic-gate 4287c478bd9Sstevel@tonic-gate /* 4297c478bd9Sstevel@tonic-gate * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, 4307c478bd9Sstevel@tonic-gate * and trywrlock for process-shared (USYNC_PROCESS) rwlocks. 4317c478bd9Sstevel@tonic-gate * 4327c478bd9Sstevel@tonic-gate * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock() 4337c478bd9Sstevel@tonic-gate * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex 4347c478bd9Sstevel@tonic-gate * released, and if they need to sleep will release the mutex first. In the 4357c478bd9Sstevel@tonic-gate * event of a spurious wakeup, these will return EAGAIN (because it is much 4367c478bd9Sstevel@tonic-gate * easier for us to re-acquire the mutex here). 4377c478bd9Sstevel@tonic-gate */ 4387c478bd9Sstevel@tonic-gate int 4397c478bd9Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) 4407c478bd9Sstevel@tonic-gate { 441*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 442*41efec22Sraf mutex_t *mp = &rwlp->mutex; 443*41efec22Sraf /* LINTED set but not used */ 444*41efec22Sraf uint32_t readers; 4457c478bd9Sstevel@tonic-gate int try_flag; 446*41efec22Sraf int error; 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate try_flag = (rd_wr & TRY_FLAG); 4497c478bd9Sstevel@tonic-gate rd_wr &= ~TRY_FLAG; 4507c478bd9Sstevel@tonic-gate ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); 4517c478bd9Sstevel@tonic-gate 4527c478bd9Sstevel@tonic-gate if (!try_flag) { 4537c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); 4547c478bd9Sstevel@tonic-gate } 4557c478bd9Sstevel@tonic-gate 4567c478bd9Sstevel@tonic-gate do { 457*41efec22Sraf if (try_flag && (*rwstate & URW_WRITE_LOCKED)) { 458*41efec22Sraf error = EBUSY; 4597c478bd9Sstevel@tonic-gate break; 460*41efec22Sraf } 461*41efec22Sraf if ((error = _private_mutex_lock(mp)) != 0) 462*41efec22Sraf break; 4637c478bd9Sstevel@tonic-gate if (rd_wr == READ_LOCK) { 464*41efec22Sraf if (read_lock_try(rwlp, 0)) { 465*41efec22Sraf (void) _private_mutex_unlock(mp); 466*41efec22Sraf break; 4677c478bd9Sstevel@tonic-gate } 4687c478bd9Sstevel@tonic-gate } else { 469*41efec22Sraf if (write_lock_try(rwlp, 0)) { 470*41efec22Sraf (void) _private_mutex_unlock(mp); 471*41efec22Sraf break; 4727c478bd9Sstevel@tonic-gate } 473*41efec22Sraf } 474*41efec22Sraf atomic_or_32(rwstate, URW_HAS_WAITERS); 475*41efec22Sraf readers = *rwstate; 476*41efec22Sraf ASSERT_CONSISTENT_STATE(readers); 4777c478bd9Sstevel@tonic-gate /* 478*41efec22Sraf * The calls to __lwp_rwlock_*() below will release the mutex, 479*41efec22Sraf * so we need a dtrace probe here. 4807c478bd9Sstevel@tonic-gate */ 481*41efec22Sraf mp->mutex_owner = 0; 482*41efec22Sraf DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 4837c478bd9Sstevel@tonic-gate /* 4847c478bd9Sstevel@tonic-gate * The waiters bit may be inaccurate. 4857c478bd9Sstevel@tonic-gate * Only the kernel knows for sure. 4867c478bd9Sstevel@tonic-gate */ 487*41efec22Sraf if (rd_wr == READ_LOCK) { 488*41efec22Sraf if (try_flag) 489*41efec22Sraf error = __lwp_rwlock_tryrdlock(rwlp); 490*41efec22Sraf else 491*41efec22Sraf error = __lwp_rwlock_rdlock(rwlp, tsp); 4927c478bd9Sstevel@tonic-gate } else { 493*41efec22Sraf if (try_flag) 494*41efec22Sraf error = __lwp_rwlock_trywrlock(rwlp); 495*41efec22Sraf else 4967c478bd9Sstevel@tonic-gate error = __lwp_rwlock_wrlock(rwlp, tsp); 4977c478bd9Sstevel@tonic-gate } 498*41efec22Sraf } while (error == EAGAIN || error == EINTR); 4997c478bd9Sstevel@tonic-gate 5007c478bd9Sstevel@tonic-gate if (!try_flag) { 501*41efec22Sraf DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); 5027c478bd9Sstevel@tonic-gate } 5037c478bd9Sstevel@tonic-gate 5047c478bd9Sstevel@tonic-gate return (error); 5057c478bd9Sstevel@tonic-gate } 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate /* 5087c478bd9Sstevel@tonic-gate * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, 5097c478bd9Sstevel@tonic-gate * and trywrlock for process-private (USYNC_THREAD) rwlocks. 5107c478bd9Sstevel@tonic-gate */ 5117c478bd9Sstevel@tonic-gate int 5127c478bd9Sstevel@tonic-gate rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) 5137c478bd9Sstevel@tonic-gate { 514*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 515*41efec22Sraf uint32_t readers; 5167c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 5177c478bd9Sstevel@tonic-gate queue_head_t *qp; 5187c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 5197c478bd9Sstevel@tonic-gate int try_flag; 5207c478bd9Sstevel@tonic-gate int error = 0; 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate try_flag = (rd_wr & TRY_FLAG); 5237c478bd9Sstevel@tonic-gate rd_wr &= ~TRY_FLAG; 5247c478bd9Sstevel@tonic-gate ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate if (!try_flag) { 5277c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate qp = queue_lock(rwlp, MX); 531*41efec22Sraf retry: 5327c478bd9Sstevel@tonic-gate while (error == 0) { 533*41efec22Sraf if (rd_wr == READ_LOCK) { 534*41efec22Sraf if (read_lock_try(rwlp, 0)) 535*41efec22Sraf goto out; 536*41efec22Sraf } else { 537*41efec22Sraf if (write_lock_try(rwlp, 0)) 538*41efec22Sraf goto out; 539*41efec22Sraf } 540*41efec22Sraf atomic_or_32(rwstate, URW_HAS_WAITERS); 541*41efec22Sraf readers = *rwstate; 542*41efec22Sraf ASSERT_CONSISTENT_STATE(readers); 543*41efec22Sraf if ((readers & URW_WRITE_LOCKED) || 544*41efec22Sraf (rd_wr == WRITE_LOCK && 545*41efec22Sraf (readers & URW_READERS_MASK) != 0)) 5467c478bd9Sstevel@tonic-gate /* EMPTY */; /* somebody holds the lock */ 5477c478bd9Sstevel@tonic-gate else if ((ulwp = queue_waiter(qp, rwlp)) == NULL) { 548*41efec22Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS); 5497c478bd9Sstevel@tonic-gate break; /* no queued waiters */ 5507c478bd9Sstevel@tonic-gate } else { 5517c478bd9Sstevel@tonic-gate int our_pri = real_priority(self); 5527c478bd9Sstevel@tonic-gate int his_pri = real_priority(ulwp); 5537c478bd9Sstevel@tonic-gate 5547c478bd9Sstevel@tonic-gate if (rd_wr == WRITE_LOCK) { 5557c478bd9Sstevel@tonic-gate /* 5567c478bd9Sstevel@tonic-gate * We defer to a queued thread that has 5577c478bd9Sstevel@tonic-gate * a higher priority than ours. 5587c478bd9Sstevel@tonic-gate */ 5597c478bd9Sstevel@tonic-gate if (his_pri <= our_pri) 5607c478bd9Sstevel@tonic-gate break; 5617c478bd9Sstevel@tonic-gate } else { 5627c478bd9Sstevel@tonic-gate /* 5637c478bd9Sstevel@tonic-gate * We defer to a queued thread that has 5647c478bd9Sstevel@tonic-gate * a higher priority than ours or that 5657c478bd9Sstevel@tonic-gate * is a writer whose priority equals ours. 5667c478bd9Sstevel@tonic-gate */ 5677c478bd9Sstevel@tonic-gate if (his_pri < our_pri || 5687c478bd9Sstevel@tonic-gate (his_pri == our_pri && !ulwp->ul_writer)) 5697c478bd9Sstevel@tonic-gate break; 5707c478bd9Sstevel@tonic-gate } 5717c478bd9Sstevel@tonic-gate } 5727c478bd9Sstevel@tonic-gate /* 5737c478bd9Sstevel@tonic-gate * We are about to block. 5747c478bd9Sstevel@tonic-gate * If we're doing a trylock, return EBUSY instead. 5757c478bd9Sstevel@tonic-gate */ 5767c478bd9Sstevel@tonic-gate if (try_flag) { 5777c478bd9Sstevel@tonic-gate error = EBUSY; 5787c478bd9Sstevel@tonic-gate break; 5797c478bd9Sstevel@tonic-gate } 5807c478bd9Sstevel@tonic-gate /* 5817c478bd9Sstevel@tonic-gate * Enqueue writers ahead of readers of the 5827c478bd9Sstevel@tonic-gate * same priority. 5837c478bd9Sstevel@tonic-gate */ 5847c478bd9Sstevel@tonic-gate self->ul_writer = rd_wr; /* *must* be 0 or 1 */ 5857c478bd9Sstevel@tonic-gate enqueue(qp, self, rwlp, MX); 5867c478bd9Sstevel@tonic-gate set_parking_flag(self, 1); 5877c478bd9Sstevel@tonic-gate queue_unlock(qp); 5887c478bd9Sstevel@tonic-gate if ((error = __lwp_park(tsp, 0)) == EINTR) 5897c478bd9Sstevel@tonic-gate error = 0; 5907c478bd9Sstevel@tonic-gate self->ul_writer = 0; 5917c478bd9Sstevel@tonic-gate set_parking_flag(self, 0); 5927c478bd9Sstevel@tonic-gate qp = queue_lock(rwlp, MX); 593*41efec22Sraf if (self->ul_sleepq && dequeue_self(qp, rwlp) == 0) 594*41efec22Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS); 5957c478bd9Sstevel@tonic-gate } 5967c478bd9Sstevel@tonic-gate 5977c478bd9Sstevel@tonic-gate if (error == 0) { 598*41efec22Sraf if (rd_wr == READ_LOCK) { 599*41efec22Sraf if (!read_lock_try(rwlp, 1)) 600*41efec22Sraf goto retry; 601*41efec22Sraf } else { 602*41efec22Sraf if (!write_lock_try(rwlp, 1)) 603*41efec22Sraf goto retry; 6047c478bd9Sstevel@tonic-gate } 6057c478bd9Sstevel@tonic-gate } 6067c478bd9Sstevel@tonic-gate 607*41efec22Sraf out: 608*41efec22Sraf queue_unlock(qp); 609*41efec22Sraf 610*41efec22Sraf if (!try_flag) { 611*41efec22Sraf DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); 612*41efec22Sraf } 6137c478bd9Sstevel@tonic-gate 6147c478bd9Sstevel@tonic-gate return (error); 6157c478bd9Sstevel@tonic-gate } 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate int 6187c478bd9Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp) 6197c478bd9Sstevel@tonic-gate { 6207c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 6217c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 6227c478bd9Sstevel@tonic-gate readlock_t *readlockp; 6237c478bd9Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 6247c478bd9Sstevel@tonic-gate int error; 6257c478bd9Sstevel@tonic-gate 6267c478bd9Sstevel@tonic-gate /* 6277c478bd9Sstevel@tonic-gate * If we already hold a readers lock on this rwlock, 6287c478bd9Sstevel@tonic-gate * just increment our reference count and return. 6297c478bd9Sstevel@tonic-gate */ 630*41efec22Sraf sigoff(self); 6317c478bd9Sstevel@tonic-gate readlockp = rwl_entry(rwlp); 6327c478bd9Sstevel@tonic-gate if (readlockp->rd_count != 0) { 633*41efec22Sraf if (readlockp->rd_count == READ_LOCK_MAX) { 634*41efec22Sraf sigon(self); 635*41efec22Sraf error = EAGAIN; 636*41efec22Sraf goto out; 6377c478bd9Sstevel@tonic-gate } 638*41efec22Sraf sigon(self); 639*41efec22Sraf error = 0; 640*41efec22Sraf goto out; 641*41efec22Sraf } 642*41efec22Sraf sigon(self); 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate /* 6457c478bd9Sstevel@tonic-gate * If we hold the writer lock, bail out. 6467c478bd9Sstevel@tonic-gate */ 6477c478bd9Sstevel@tonic-gate if (rw_write_is_held(rwlp)) { 6487c478bd9Sstevel@tonic-gate if (self->ul_error_detection) 6497c478bd9Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_rdlock", 6507c478bd9Sstevel@tonic-gate "calling thread owns the writer lock"); 651*41efec22Sraf error = EDEADLK; 652*41efec22Sraf goto out; 6537c478bd9Sstevel@tonic-gate } 6547c478bd9Sstevel@tonic-gate 655*41efec22Sraf if (read_lock_try(rwlp, 0)) 656*41efec22Sraf error = 0; 657*41efec22Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 6587c478bd9Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, tsp, READ_LOCK); 6597c478bd9Sstevel@tonic-gate else /* user-level */ 6607c478bd9Sstevel@tonic-gate error = rwlock_lock(rwlp, tsp, READ_LOCK); 6617c478bd9Sstevel@tonic-gate 662*41efec22Sraf out: 6637c478bd9Sstevel@tonic-gate if (error == 0) { 664*41efec22Sraf sigoff(self); 665*41efec22Sraf rwl_entry(rwlp)->rd_count++; 666*41efec22Sraf sigon(self); 6677c478bd9Sstevel@tonic-gate if (rwsp) 6687c478bd9Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock); 669*41efec22Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); 670*41efec22Sraf } else { 671*41efec22Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error); 6727c478bd9Sstevel@tonic-gate } 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate return (error); 6757c478bd9Sstevel@tonic-gate } 6767c478bd9Sstevel@tonic-gate 6777c478bd9Sstevel@tonic-gate #pragma weak rw_rdlock = __rw_rdlock 6787c478bd9Sstevel@tonic-gate #pragma weak _rw_rdlock = __rw_rdlock 6797c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_rdlock = __rw_rdlock 6807c478bd9Sstevel@tonic-gate #pragma weak _pthread_rwlock_rdlock = __rw_rdlock 6817c478bd9Sstevel@tonic-gate int 6827c478bd9Sstevel@tonic-gate __rw_rdlock(rwlock_t *rwlp) 6837c478bd9Sstevel@tonic-gate { 6847c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 6857c478bd9Sstevel@tonic-gate return (rw_rdlock_impl(rwlp, NULL)); 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate void 6897c478bd9Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp) 6907c478bd9Sstevel@tonic-gate { 6917c478bd9Sstevel@tonic-gate enter_critical(curthread); 6927c478bd9Sstevel@tonic-gate (void) rw_rdlock_impl(rwlp, NULL); 6937c478bd9Sstevel@tonic-gate } 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_reltimedrdlock_np = \ 6967c478bd9Sstevel@tonic-gate _pthread_rwlock_reltimedrdlock_np 6977c478bd9Sstevel@tonic-gate int 6987c478bd9Sstevel@tonic-gate _pthread_rwlock_reltimedrdlock_np(rwlock_t *rwlp, const timespec_t *reltime) 6997c478bd9Sstevel@tonic-gate { 7007c478bd9Sstevel@tonic-gate timespec_t tslocal = *reltime; 7017c478bd9Sstevel@tonic-gate int error; 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 7047c478bd9Sstevel@tonic-gate error = rw_rdlock_impl(rwlp, &tslocal); 7057c478bd9Sstevel@tonic-gate if (error == ETIME) 7067c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 7077c478bd9Sstevel@tonic-gate return (error); 7087c478bd9Sstevel@tonic-gate } 7097c478bd9Sstevel@tonic-gate 7107c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_timedrdlock = _pthread_rwlock_timedrdlock 7117c478bd9Sstevel@tonic-gate int 7127c478bd9Sstevel@tonic-gate _pthread_rwlock_timedrdlock(rwlock_t *rwlp, const timespec_t *abstime) 7137c478bd9Sstevel@tonic-gate { 7147c478bd9Sstevel@tonic-gate timespec_t tslocal; 7157c478bd9Sstevel@tonic-gate int error; 7167c478bd9Sstevel@tonic-gate 7177c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 7187c478bd9Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 7197c478bd9Sstevel@tonic-gate error = rw_rdlock_impl(rwlp, &tslocal); 7207c478bd9Sstevel@tonic-gate if (error == ETIME) 7217c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 7227c478bd9Sstevel@tonic-gate return (error); 7237c478bd9Sstevel@tonic-gate } 7247c478bd9Sstevel@tonic-gate 7257c478bd9Sstevel@tonic-gate int 7267c478bd9Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp) 7277c478bd9Sstevel@tonic-gate { 7287c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 7297c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 7307c478bd9Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 7317c478bd9Sstevel@tonic-gate int error; 7327c478bd9Sstevel@tonic-gate 7337c478bd9Sstevel@tonic-gate /* 7347c478bd9Sstevel@tonic-gate * If we hold a readers lock on this rwlock, bail out. 7357c478bd9Sstevel@tonic-gate */ 7367c478bd9Sstevel@tonic-gate if (rw_read_is_held(rwlp)) { 7377c478bd9Sstevel@tonic-gate if (self->ul_error_detection) 7387c478bd9Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_wrlock", 7397c478bd9Sstevel@tonic-gate "calling thread owns the readers lock"); 740*41efec22Sraf error = EDEADLK; 741*41efec22Sraf goto out; 7427c478bd9Sstevel@tonic-gate } 7437c478bd9Sstevel@tonic-gate 7447c478bd9Sstevel@tonic-gate /* 7457c478bd9Sstevel@tonic-gate * If we hold the writer lock, bail out. 7467c478bd9Sstevel@tonic-gate */ 7477c478bd9Sstevel@tonic-gate if (rw_write_is_held(rwlp)) { 7487c478bd9Sstevel@tonic-gate if (self->ul_error_detection) 7497c478bd9Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_wrlock", 7507c478bd9Sstevel@tonic-gate "calling thread owns the writer lock"); 751*41efec22Sraf error = EDEADLK; 752*41efec22Sraf goto out; 7537c478bd9Sstevel@tonic-gate } 7547c478bd9Sstevel@tonic-gate 755*41efec22Sraf if (write_lock_try(rwlp, 0)) 756*41efec22Sraf error = 0; 757*41efec22Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 7587c478bd9Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK); 759*41efec22Sraf else /* user-level */ 7607c478bd9Sstevel@tonic-gate error = rwlock_lock(rwlp, tsp, WRITE_LOCK); 7617c478bd9Sstevel@tonic-gate 762*41efec22Sraf out: 763*41efec22Sraf if (error == 0) { 764*41efec22Sraf rwlp->rwlock_owner = (uintptr_t)self; 765*41efec22Sraf if (rwlp->rwlock_type == USYNC_PROCESS) 766*41efec22Sraf rwlp->rwlock_ownerpid = udp->pid; 767*41efec22Sraf if (rwsp) { 7687c478bd9Sstevel@tonic-gate tdb_incr(rwsp->rw_wrlock); 7697c478bd9Sstevel@tonic-gate rwsp->rw_wrlock_begin_hold = gethrtime(); 7707c478bd9Sstevel@tonic-gate } 771*41efec22Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK); 772*41efec22Sraf } else { 773*41efec22Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error); 774*41efec22Sraf } 7757c478bd9Sstevel@tonic-gate return (error); 7767c478bd9Sstevel@tonic-gate } 7777c478bd9Sstevel@tonic-gate 7787c478bd9Sstevel@tonic-gate #pragma weak rw_wrlock = __rw_wrlock 7797c478bd9Sstevel@tonic-gate #pragma weak _rw_wrlock = __rw_wrlock 7807c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_wrlock = __rw_wrlock 7817c478bd9Sstevel@tonic-gate #pragma weak _pthread_rwlock_wrlock = __rw_wrlock 7827c478bd9Sstevel@tonic-gate int 7837c478bd9Sstevel@tonic-gate __rw_wrlock(rwlock_t *rwlp) 7847c478bd9Sstevel@tonic-gate { 7857c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 7867c478bd9Sstevel@tonic-gate return (rw_wrlock_impl(rwlp, NULL)); 7877c478bd9Sstevel@tonic-gate } 7887c478bd9Sstevel@tonic-gate 7897c478bd9Sstevel@tonic-gate void 7907c478bd9Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp) 7917c478bd9Sstevel@tonic-gate { 7927c478bd9Sstevel@tonic-gate enter_critical(curthread); 7937c478bd9Sstevel@tonic-gate (void) rw_wrlock_impl(rwlp, NULL); 7947c478bd9Sstevel@tonic-gate } 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_reltimedwrlock_np = \ 7977c478bd9Sstevel@tonic-gate _pthread_rwlock_reltimedwrlock_np 7987c478bd9Sstevel@tonic-gate int 7997c478bd9Sstevel@tonic-gate _pthread_rwlock_reltimedwrlock_np(rwlock_t *rwlp, const timespec_t *reltime) 8007c478bd9Sstevel@tonic-gate { 8017c478bd9Sstevel@tonic-gate timespec_t tslocal = *reltime; 8027c478bd9Sstevel@tonic-gate int error; 8037c478bd9Sstevel@tonic-gate 8047c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 8057c478bd9Sstevel@tonic-gate error = rw_wrlock_impl(rwlp, &tslocal); 8067c478bd9Sstevel@tonic-gate if (error == ETIME) 8077c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 8087c478bd9Sstevel@tonic-gate return (error); 8097c478bd9Sstevel@tonic-gate } 8107c478bd9Sstevel@tonic-gate 8117c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_timedwrlock = _pthread_rwlock_timedwrlock 8127c478bd9Sstevel@tonic-gate int 8137c478bd9Sstevel@tonic-gate _pthread_rwlock_timedwrlock(rwlock_t *rwlp, const timespec_t *abstime) 8147c478bd9Sstevel@tonic-gate { 8157c478bd9Sstevel@tonic-gate timespec_t tslocal; 8167c478bd9Sstevel@tonic-gate int error; 8177c478bd9Sstevel@tonic-gate 8187c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 8197c478bd9Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 8207c478bd9Sstevel@tonic-gate error = rw_wrlock_impl(rwlp, &tslocal); 8217c478bd9Sstevel@tonic-gate if (error == ETIME) 8227c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 8237c478bd9Sstevel@tonic-gate return (error); 8247c478bd9Sstevel@tonic-gate } 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate #pragma weak rw_tryrdlock = __rw_tryrdlock 8277c478bd9Sstevel@tonic-gate #pragma weak _rw_tryrdlock = __rw_tryrdlock 8287c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_tryrdlock = __rw_tryrdlock 8297c478bd9Sstevel@tonic-gate #pragma weak _pthread_rwlock_tryrdlock = __rw_tryrdlock 8307c478bd9Sstevel@tonic-gate int 8317c478bd9Sstevel@tonic-gate __rw_tryrdlock(rwlock_t *rwlp) 8327c478bd9Sstevel@tonic-gate { 8337c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 8347c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8357c478bd9Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 8367c478bd9Sstevel@tonic-gate readlock_t *readlockp; 8377c478bd9Sstevel@tonic-gate int error; 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 8407c478bd9Sstevel@tonic-gate 8417c478bd9Sstevel@tonic-gate if (rwsp) 8427c478bd9Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock_try); 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate /* 8457c478bd9Sstevel@tonic-gate * If we already hold a readers lock on this rwlock, 8467c478bd9Sstevel@tonic-gate * just increment our reference count and return. 8477c478bd9Sstevel@tonic-gate */ 848*41efec22Sraf sigoff(self); 8497c478bd9Sstevel@tonic-gate readlockp = rwl_entry(rwlp); 8507c478bd9Sstevel@tonic-gate if (readlockp->rd_count != 0) { 851*41efec22Sraf if (readlockp->rd_count == READ_LOCK_MAX) { 852*41efec22Sraf sigon(self); 853*41efec22Sraf error = EAGAIN; 854*41efec22Sraf goto out; 8557c478bd9Sstevel@tonic-gate } 856*41efec22Sraf sigon(self); 857*41efec22Sraf error = 0; 858*41efec22Sraf goto out; 859*41efec22Sraf } 860*41efec22Sraf sigon(self); 8617c478bd9Sstevel@tonic-gate 862*41efec22Sraf if (read_lock_try(rwlp, 0)) 863*41efec22Sraf error = 0; 864*41efec22Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 8657c478bd9Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY); 8667c478bd9Sstevel@tonic-gate else /* user-level */ 8677c478bd9Sstevel@tonic-gate error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY); 8687c478bd9Sstevel@tonic-gate 869*41efec22Sraf out: 870*41efec22Sraf if (error == 0) { 871*41efec22Sraf sigoff(self); 872*41efec22Sraf rwl_entry(rwlp)->rd_count++; 873*41efec22Sraf sigon(self); 874*41efec22Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); 875*41efec22Sraf } else { 876*41efec22Sraf if (rwsp) 8777c478bd9Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock_try_fail); 878*41efec22Sraf if (error != EBUSY) { 879*41efec22Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, 880*41efec22Sraf error); 881*41efec22Sraf } 882*41efec22Sraf } 8837c478bd9Sstevel@tonic-gate 8847c478bd9Sstevel@tonic-gate return (error); 8857c478bd9Sstevel@tonic-gate } 8867c478bd9Sstevel@tonic-gate 8877c478bd9Sstevel@tonic-gate #pragma weak rw_trywrlock = __rw_trywrlock 8887c478bd9Sstevel@tonic-gate #pragma weak _rw_trywrlock = __rw_trywrlock 8897c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_trywrlock = __rw_trywrlock 8907c478bd9Sstevel@tonic-gate #pragma weak _pthread_rwlock_trywrlock = __rw_trywrlock 8917c478bd9Sstevel@tonic-gate int 8927c478bd9Sstevel@tonic-gate __rw_trywrlock(rwlock_t *rwlp) 8937c478bd9Sstevel@tonic-gate { 8947c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 8957c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8967c478bd9Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 8977c478bd9Sstevel@tonic-gate int error; 8987c478bd9Sstevel@tonic-gate 899*41efec22Sraf ASSERT(!self->ul_critical || self->ul_bindflags); 9007c478bd9Sstevel@tonic-gate 9017c478bd9Sstevel@tonic-gate if (rwsp) 9027c478bd9Sstevel@tonic-gate tdb_incr(rwsp->rw_wrlock_try); 9037c478bd9Sstevel@tonic-gate 904*41efec22Sraf if (write_lock_try(rwlp, 0)) 905*41efec22Sraf error = 0; 906*41efec22Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 9077c478bd9Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY); 908*41efec22Sraf else /* user-level */ 9097c478bd9Sstevel@tonic-gate error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY); 910*41efec22Sraf 911*41efec22Sraf if (error == 0) { 912*41efec22Sraf rwlp->rwlock_owner = (uintptr_t)self; 913*41efec22Sraf if (rwlp->rwlock_type == USYNC_PROCESS) 914*41efec22Sraf rwlp->rwlock_ownerpid = udp->pid; 915*41efec22Sraf if (rwsp) 9167c478bd9Sstevel@tonic-gate rwsp->rw_wrlock_begin_hold = gethrtime(); 917*41efec22Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK); 918*41efec22Sraf } else { 919*41efec22Sraf if (rwsp) 920*41efec22Sraf tdb_incr(rwsp->rw_wrlock_try_fail); 921*41efec22Sraf if (error != EBUSY) { 922*41efec22Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, 923*41efec22Sraf error); 924*41efec22Sraf } 9257c478bd9Sstevel@tonic-gate } 9267c478bd9Sstevel@tonic-gate return (error); 9277c478bd9Sstevel@tonic-gate } 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate #pragma weak rw_unlock = __rw_unlock 9307c478bd9Sstevel@tonic-gate #pragma weak _rw_unlock = __rw_unlock 9317c478bd9Sstevel@tonic-gate #pragma weak pthread_rwlock_unlock = __rw_unlock 9327c478bd9Sstevel@tonic-gate #pragma weak _pthread_rwlock_unlock = __rw_unlock 9337c478bd9Sstevel@tonic-gate int 9347c478bd9Sstevel@tonic-gate __rw_unlock(rwlock_t *rwlp) 9357c478bd9Sstevel@tonic-gate { 936*41efec22Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 937*41efec22Sraf uint32_t readers; 9387c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 9397c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 9407c478bd9Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp; 941*41efec22Sraf queue_head_t *qp; 942*41efec22Sraf int rd_wr; 943*41efec22Sraf int waked = 0; 9447c478bd9Sstevel@tonic-gate 945*41efec22Sraf readers = *rwstate; 946*41efec22Sraf ASSERT_CONSISTENT_STATE(readers); 947*41efec22Sraf if (readers & URW_WRITE_LOCKED) { 948*41efec22Sraf rd_wr = WRITE_LOCK; 949*41efec22Sraf readers = 0; 950*41efec22Sraf } else { 951*41efec22Sraf rd_wr = READ_LOCK; 952*41efec22Sraf readers &= URW_READERS_MASK; 9537c478bd9Sstevel@tonic-gate } 9547c478bd9Sstevel@tonic-gate 955*41efec22Sraf if (rd_wr == WRITE_LOCK) { 9567c478bd9Sstevel@tonic-gate /* 9577c478bd9Sstevel@tonic-gate * Since the writer lock is held, we'd better be 9587c478bd9Sstevel@tonic-gate * holding it, else we cannot legitimately be here. 9597c478bd9Sstevel@tonic-gate */ 9607c478bd9Sstevel@tonic-gate if (!rw_write_is_held(rwlp)) { 9617c478bd9Sstevel@tonic-gate if (self->ul_error_detection) 9627c478bd9Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", 9637c478bd9Sstevel@tonic-gate "writer lock held, " 9647c478bd9Sstevel@tonic-gate "but not by the calling thread"); 9657c478bd9Sstevel@tonic-gate return (EPERM); 9667c478bd9Sstevel@tonic-gate } 9677c478bd9Sstevel@tonic-gate if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) { 9687c478bd9Sstevel@tonic-gate if (rwsp->rw_wrlock_begin_hold) 9697c478bd9Sstevel@tonic-gate rwsp->rw_wrlock_hold_time += 9707c478bd9Sstevel@tonic-gate gethrtime() - rwsp->rw_wrlock_begin_hold; 9717c478bd9Sstevel@tonic-gate rwsp->rw_wrlock_begin_hold = 0; 9727c478bd9Sstevel@tonic-gate } 973*41efec22Sraf rwlp->rwlock_owner = 0; 974*41efec22Sraf rwlp->rwlock_ownerpid = 0; 975*41efec22Sraf } else if (readers > 0) { 9767c478bd9Sstevel@tonic-gate /* 9777c478bd9Sstevel@tonic-gate * A readers lock is held; if we don't hold one, bail out. 9787c478bd9Sstevel@tonic-gate */ 979*41efec22Sraf readlock_t *readlockp; 980*41efec22Sraf 981*41efec22Sraf sigoff(self); 982*41efec22Sraf readlockp = rwl_entry(rwlp); 9837c478bd9Sstevel@tonic-gate if (readlockp->rd_count == 0) { 984*41efec22Sraf sigon(self); 9857c478bd9Sstevel@tonic-gate if (self->ul_error_detection) 9867c478bd9Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", 9877c478bd9Sstevel@tonic-gate "readers lock held, " 9887c478bd9Sstevel@tonic-gate "but not by the calling thread"); 9897c478bd9Sstevel@tonic-gate return (EPERM); 9907c478bd9Sstevel@tonic-gate } 9917c478bd9Sstevel@tonic-gate /* 9927c478bd9Sstevel@tonic-gate * If we hold more than one readers lock on this rwlock, 9937c478bd9Sstevel@tonic-gate * just decrement our reference count and return. 9947c478bd9Sstevel@tonic-gate */ 9957c478bd9Sstevel@tonic-gate if (--readlockp->rd_count != 0) { 996*41efec22Sraf sigon(self); 997*41efec22Sraf goto out; 9987c478bd9Sstevel@tonic-gate } 999*41efec22Sraf sigon(self); 10007c478bd9Sstevel@tonic-gate } else { 10017c478bd9Sstevel@tonic-gate /* 10027c478bd9Sstevel@tonic-gate * This is a usage error. 10037c478bd9Sstevel@tonic-gate * No thread should release an unowned lock. 10047c478bd9Sstevel@tonic-gate */ 10057c478bd9Sstevel@tonic-gate if (self->ul_error_detection) 10067c478bd9Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", "lock not owned"); 10077c478bd9Sstevel@tonic-gate return (EPERM); 10087c478bd9Sstevel@tonic-gate } 10097c478bd9Sstevel@tonic-gate 1010*41efec22Sraf if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) { 1011*41efec22Sraf /* EMPTY */; 1012*41efec22Sraf } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) { 1013*41efec22Sraf /* EMPTY */; 1014*41efec22Sraf } else if (rwlp->rwlock_type == USYNC_PROCESS) { 1015*41efec22Sraf (void) _private_mutex_lock(&rwlp->mutex); 1016*41efec22Sraf (void) __lwp_rwlock_unlock(rwlp); 1017*41efec22Sraf (void) _private_mutex_unlock(&rwlp->mutex); 1018*41efec22Sraf waked = 1; 10197c478bd9Sstevel@tonic-gate } else { 10207c478bd9Sstevel@tonic-gate qp = queue_lock(rwlp, MX); 1021*41efec22Sraf if (rd_wr == READ_LOCK) 1022*41efec22Sraf atomic_dec_32(rwstate); 1023*41efec22Sraf else 1024*41efec22Sraf atomic_and_32(rwstate, ~URW_WRITE_LOCKED); 10257c478bd9Sstevel@tonic-gate waked = rw_queue_release(qp, rwlp); 10267c478bd9Sstevel@tonic-gate } 10277c478bd9Sstevel@tonic-gate 1028*41efec22Sraf out: 1029*41efec22Sraf DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr); 1030*41efec22Sraf 10317c478bd9Sstevel@tonic-gate /* 10327c478bd9Sstevel@tonic-gate * Yield to the thread we just waked up, just in case we might 10337c478bd9Sstevel@tonic-gate * be about to grab the rwlock again immediately upon return. 10347c478bd9Sstevel@tonic-gate * This is pretty weak but it helps on a uniprocessor and also 10357c478bd9Sstevel@tonic-gate * when cpu affinity has assigned both ourself and the other 10367c478bd9Sstevel@tonic-gate * thread to the same CPU. Note that lwp_yield() will yield 10377c478bd9Sstevel@tonic-gate * the processor only if the writer is at the same or higher 10387c478bd9Sstevel@tonic-gate * priority than ourself. This provides more balanced program 10397c478bd9Sstevel@tonic-gate * behavior; it doesn't guarantee acquisition of the lock by 10407c478bd9Sstevel@tonic-gate * the pending writer. 10417c478bd9Sstevel@tonic-gate */ 10427c478bd9Sstevel@tonic-gate if (waked) 10437c478bd9Sstevel@tonic-gate lwp_yield(); 10447c478bd9Sstevel@tonic-gate return (0); 10457c478bd9Sstevel@tonic-gate } 10467c478bd9Sstevel@tonic-gate 10477c478bd9Sstevel@tonic-gate void 10487c478bd9Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp) 10497c478bd9Sstevel@tonic-gate { 10507c478bd9Sstevel@tonic-gate (void) __rw_unlock(rwlp); 10517c478bd9Sstevel@tonic-gate exit_critical(curthread); 10527c478bd9Sstevel@tonic-gate } 1053