xref: /illumos-gate/usr/src/lib/libc/port/threads/rwlock.c (revision 7c5714f667c21540147234b280036c21ff93bc69)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
541efec22Sraf  * Common Development and Distribution License (the "License").
641efec22Sraf  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
2141efec22Sraf 
227c478bd9Sstevel@tonic-gate /*
23d4204c85Sraf  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate #include "lint.h"
307c478bd9Sstevel@tonic-gate #include "thr_uberdata.h"
317c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #define	TRY_FLAG		0x10
347c478bd9Sstevel@tonic-gate #define	READ_LOCK		0
357c478bd9Sstevel@tonic-gate #define	WRITE_LOCK		1
367c478bd9Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
377c478bd9Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
387c478bd9Sstevel@tonic-gate 
397c478bd9Sstevel@tonic-gate #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
407c478bd9Sstevel@tonic-gate 
4141efec22Sraf #define	ASSERT_CONSISTENT_STATE(readers)		\
4241efec22Sraf 	ASSERT(!((readers) & URW_WRITE_LOCKED) ||	\
4341efec22Sraf 		((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
4441efec22Sraf 
457c478bd9Sstevel@tonic-gate /*
467c478bd9Sstevel@tonic-gate  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
4741efec22Sraf  * We must be deferring signals for this to be safe.
48883492d5Sraf  * Else if we are returning an entry with ul_rdlockcnt == 0,
4941efec22Sraf  * it could be reassigned behind our back in a signal handler.
507c478bd9Sstevel@tonic-gate  */
517c478bd9Sstevel@tonic-gate static readlock_t *
527c478bd9Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp)
537c478bd9Sstevel@tonic-gate {
547c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
557c478bd9Sstevel@tonic-gate 	readlock_t *remembered = NULL;
567c478bd9Sstevel@tonic-gate 	readlock_t *readlockp;
577c478bd9Sstevel@tonic-gate 	uint_t nlocks;
587c478bd9Sstevel@tonic-gate 
5941efec22Sraf 	/* we must be deferring signals */
6041efec22Sraf 	ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
6141efec22Sraf 
62883492d5Sraf 	if ((nlocks = self->ul_rdlockcnt) != 0)
637c478bd9Sstevel@tonic-gate 		readlockp = self->ul_readlock.array;
647c478bd9Sstevel@tonic-gate 	else {
657c478bd9Sstevel@tonic-gate 		nlocks = 1;
667c478bd9Sstevel@tonic-gate 		readlockp = &self->ul_readlock.single;
677c478bd9Sstevel@tonic-gate 	}
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate 	for (; nlocks; nlocks--, readlockp++) {
707c478bd9Sstevel@tonic-gate 		if (readlockp->rd_rwlock == rwlp)
717c478bd9Sstevel@tonic-gate 			return (readlockp);
727c478bd9Sstevel@tonic-gate 		if (readlockp->rd_count == 0 && remembered == NULL)
737c478bd9Sstevel@tonic-gate 			remembered = readlockp;
747c478bd9Sstevel@tonic-gate 	}
757c478bd9Sstevel@tonic-gate 	if (remembered != NULL) {
767c478bd9Sstevel@tonic-gate 		remembered->rd_rwlock = rwlp;
777c478bd9Sstevel@tonic-gate 		return (remembered);
787c478bd9Sstevel@tonic-gate 	}
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate 	/*
817c478bd9Sstevel@tonic-gate 	 * No entry available.  Allocate more space, converting the single
827c478bd9Sstevel@tonic-gate 	 * readlock_t entry into an array of readlock_t entries if necessary.
837c478bd9Sstevel@tonic-gate 	 */
84883492d5Sraf 	if ((nlocks = self->ul_rdlockcnt) == 0) {
857c478bd9Sstevel@tonic-gate 		/*
867c478bd9Sstevel@tonic-gate 		 * Initial allocation of the readlock_t array.
877c478bd9Sstevel@tonic-gate 		 * Convert the single entry into an array.
887c478bd9Sstevel@tonic-gate 		 */
89883492d5Sraf 		self->ul_rdlockcnt = nlocks = NLOCKS;
907c478bd9Sstevel@tonic-gate 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
917c478bd9Sstevel@tonic-gate 		/*
927c478bd9Sstevel@tonic-gate 		 * The single readlock_t becomes the first entry in the array.
937c478bd9Sstevel@tonic-gate 		 */
947c478bd9Sstevel@tonic-gate 		*readlockp = self->ul_readlock.single;
957c478bd9Sstevel@tonic-gate 		self->ul_readlock.single.rd_count = 0;
967c478bd9Sstevel@tonic-gate 		self->ul_readlock.array = readlockp;
977c478bd9Sstevel@tonic-gate 		/*
987c478bd9Sstevel@tonic-gate 		 * Return the next available entry in the array.
997c478bd9Sstevel@tonic-gate 		 */
1007c478bd9Sstevel@tonic-gate 		(++readlockp)->rd_rwlock = rwlp;
1017c478bd9Sstevel@tonic-gate 		return (readlockp);
1027c478bd9Sstevel@tonic-gate 	}
1037c478bd9Sstevel@tonic-gate 	/*
1047c478bd9Sstevel@tonic-gate 	 * Reallocate the array, double the size each time.
1057c478bd9Sstevel@tonic-gate 	 */
1067c478bd9Sstevel@tonic-gate 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
1078cd45542Sraf 	(void) memcpy(readlockp, self->ul_readlock.array,
1087c478bd9Sstevel@tonic-gate 	    nlocks * sizeof (readlock_t));
1097c478bd9Sstevel@tonic-gate 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
1107c478bd9Sstevel@tonic-gate 	self->ul_readlock.array = readlockp;
111883492d5Sraf 	self->ul_rdlockcnt *= 2;
1127c478bd9Sstevel@tonic-gate 	/*
1137c478bd9Sstevel@tonic-gate 	 * Return the next available entry in the newly allocated array.
1147c478bd9Sstevel@tonic-gate 	 */
1157c478bd9Sstevel@tonic-gate 	(readlockp += nlocks)->rd_rwlock = rwlp;
1167c478bd9Sstevel@tonic-gate 	return (readlockp);
1177c478bd9Sstevel@tonic-gate }
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate /*
1207c478bd9Sstevel@tonic-gate  * Free the array of rwlocks held for reading.
1217c478bd9Sstevel@tonic-gate  */
1227c478bd9Sstevel@tonic-gate void
1237c478bd9Sstevel@tonic-gate rwl_free(ulwp_t *ulwp)
1247c478bd9Sstevel@tonic-gate {
1257c478bd9Sstevel@tonic-gate 	uint_t nlocks;
1267c478bd9Sstevel@tonic-gate 
127883492d5Sraf 	if ((nlocks = ulwp->ul_rdlockcnt) != 0)
1287c478bd9Sstevel@tonic-gate 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
129883492d5Sraf 	ulwp->ul_rdlockcnt = 0;
1307c478bd9Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_rwlock = NULL;
1317c478bd9Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_count = 0;
1327c478bd9Sstevel@tonic-gate }
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate /*
1357c478bd9Sstevel@tonic-gate  * Check if a reader version of the lock is held by the current thread.
1367c478bd9Sstevel@tonic-gate  */
1377257d1b4Sraf #pragma weak _rw_read_held = rw_read_held
1387c478bd9Sstevel@tonic-gate int
1397257d1b4Sraf rw_read_held(rwlock_t *rwlp)
1407c478bd9Sstevel@tonic-gate {
14141efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
14241efec22Sraf 	uint32_t readers;
14341efec22Sraf 	ulwp_t *self = curthread;
1447c478bd9Sstevel@tonic-gate 	readlock_t *readlockp;
1457c478bd9Sstevel@tonic-gate 	uint_t nlocks;
14641efec22Sraf 	int rval = 0;
1477c478bd9Sstevel@tonic-gate 
14841efec22Sraf 	no_preempt(self);
1497c478bd9Sstevel@tonic-gate 
15041efec22Sraf 	readers = *rwstate;
15141efec22Sraf 	ASSERT_CONSISTENT_STATE(readers);
15241efec22Sraf 	if (!(readers & URW_WRITE_LOCKED) &&
15341efec22Sraf 	    (readers & URW_READERS_MASK) != 0) {
1547c478bd9Sstevel@tonic-gate 		/*
1557c478bd9Sstevel@tonic-gate 		 * The lock is held for reading by some thread.
1567c478bd9Sstevel@tonic-gate 		 * Search our array of rwlocks held for reading for a match.
1577c478bd9Sstevel@tonic-gate 		 */
158883492d5Sraf 		if ((nlocks = self->ul_rdlockcnt) != 0)
1597c478bd9Sstevel@tonic-gate 			readlockp = self->ul_readlock.array;
1607c478bd9Sstevel@tonic-gate 		else {
1617c478bd9Sstevel@tonic-gate 			nlocks = 1;
1627c478bd9Sstevel@tonic-gate 			readlockp = &self->ul_readlock.single;
1637c478bd9Sstevel@tonic-gate 		}
16441efec22Sraf 		for (; nlocks; nlocks--, readlockp++) {
16541efec22Sraf 			if (readlockp->rd_rwlock == rwlp) {
16641efec22Sraf 				if (readlockp->rd_count)
16741efec22Sraf 					rval = 1;
16841efec22Sraf 				break;
16941efec22Sraf 			}
17041efec22Sraf 		}
17141efec22Sraf 	}
1727c478bd9Sstevel@tonic-gate 
17341efec22Sraf 	preempt(self);
17441efec22Sraf 	return (rval);
1757c478bd9Sstevel@tonic-gate }
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate /*
1787c478bd9Sstevel@tonic-gate  * Check if a writer version of the lock is held by the current thread.
1797c478bd9Sstevel@tonic-gate  */
1807257d1b4Sraf #pragma weak _rw_write_held = rw_write_held
1817c478bd9Sstevel@tonic-gate int
1827257d1b4Sraf rw_write_held(rwlock_t *rwlp)
1837c478bd9Sstevel@tonic-gate {
18441efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
18541efec22Sraf 	uint32_t readers;
1867c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
18741efec22Sraf 	int rval;
1887c478bd9Sstevel@tonic-gate 
18941efec22Sraf 	no_preempt(self);
1907c478bd9Sstevel@tonic-gate 
19141efec22Sraf 	readers = *rwstate;
19241efec22Sraf 	ASSERT_CONSISTENT_STATE(readers);
19341efec22Sraf 	rval = ((readers & URW_WRITE_LOCKED) &&
19441efec22Sraf 	    rwlp->rwlock_owner == (uintptr_t)self &&
19541efec22Sraf 	    (rwlp->rwlock_type == USYNC_THREAD ||
19641efec22Sraf 	    rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
19741efec22Sraf 
19841efec22Sraf 	preempt(self);
19941efec22Sraf 	return (rval);
2007c478bd9Sstevel@tonic-gate }
2017c478bd9Sstevel@tonic-gate 
2027257d1b4Sraf #pragma weak _rwlock_init = rwlock_init
2037c478bd9Sstevel@tonic-gate /* ARGSUSED2 */
2047c478bd9Sstevel@tonic-gate int
2057257d1b4Sraf rwlock_init(rwlock_t *rwlp, int type, void *arg)
2067c478bd9Sstevel@tonic-gate {
207*7c5714f6Sraf 	ulwp_t *self = curthread;
208*7c5714f6Sraf 
2097c478bd9Sstevel@tonic-gate 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2107c478bd9Sstevel@tonic-gate 		return (EINVAL);
2117c478bd9Sstevel@tonic-gate 	/*
2127c478bd9Sstevel@tonic-gate 	 * Once reinitialized, we can no longer be holding a read or write lock.
2137c478bd9Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2147c478bd9Sstevel@tonic-gate 	 */
215*7c5714f6Sraf 	sigoff(self);
2167c478bd9Sstevel@tonic-gate 	rwl_entry(rwlp)->rd_count = 0;
217*7c5714f6Sraf 	sigon(self);
2188cd45542Sraf 	(void) memset(rwlp, 0, sizeof (*rwlp));
2197c478bd9Sstevel@tonic-gate 	rwlp->rwlock_type = (uint16_t)type;
2207c478bd9Sstevel@tonic-gate 	rwlp->rwlock_magic = RWL_MAGIC;
2217c478bd9Sstevel@tonic-gate 	rwlp->mutex.mutex_type = (uint8_t)type;
2227c478bd9Sstevel@tonic-gate 	rwlp->mutex.mutex_flag = LOCK_INITED;
2237c478bd9Sstevel@tonic-gate 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
224*7c5714f6Sraf 
225*7c5714f6Sraf 	/*
226*7c5714f6Sraf 	 * This should be at the beginning of the function,
227*7c5714f6Sraf 	 * but for the sake of old broken applications that
228*7c5714f6Sraf 	 * do not have proper alignment for their rwlocks
229*7c5714f6Sraf 	 * (and don't check the return code from rwlock_init),
230*7c5714f6Sraf 	 * we put it here, after initializing the rwlock regardless.
231*7c5714f6Sraf 	 */
232*7c5714f6Sraf 	if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
233*7c5714f6Sraf 	    self->ul_misaligned == 0)
234*7c5714f6Sraf 		return (EINVAL);
235*7c5714f6Sraf 
2367c478bd9Sstevel@tonic-gate 	return (0);
2377c478bd9Sstevel@tonic-gate }
2387c478bd9Sstevel@tonic-gate 
2397257d1b4Sraf #pragma weak pthread_rwlock_destroy = rwlock_destroy
2407257d1b4Sraf #pragma weak _rwlock_destroy = rwlock_destroy
2417c478bd9Sstevel@tonic-gate int
2427257d1b4Sraf rwlock_destroy(rwlock_t *rwlp)
2437c478bd9Sstevel@tonic-gate {
2447c478bd9Sstevel@tonic-gate 	/*
2457c478bd9Sstevel@tonic-gate 	 * Once destroyed, we can no longer be holding a read or write lock.
2467c478bd9Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2477c478bd9Sstevel@tonic-gate 	 */
24841efec22Sraf 	sigoff(curthread);
2497c478bd9Sstevel@tonic-gate 	rwl_entry(rwlp)->rd_count = 0;
25041efec22Sraf 	sigon(curthread);
2517c478bd9Sstevel@tonic-gate 	rwlp->rwlock_magic = 0;
2527c478bd9Sstevel@tonic-gate 	tdb_sync_obj_deregister(rwlp);
2537c478bd9Sstevel@tonic-gate 	return (0);
2547c478bd9Sstevel@tonic-gate }
2557c478bd9Sstevel@tonic-gate 
2567c478bd9Sstevel@tonic-gate /*
25741efec22Sraf  * Attempt to acquire a readers lock.  Return true on success.
2587c478bd9Sstevel@tonic-gate  */
2597c478bd9Sstevel@tonic-gate static int
26041efec22Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
2617c478bd9Sstevel@tonic-gate {
26241efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
26341efec22Sraf 	uint32_t mask = ignore_waiters_flag?
26441efec22Sraf 	    URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
26541efec22Sraf 	uint32_t readers;
2667c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate 	no_preempt(self);
26941efec22Sraf 	while (((readers = *rwstate) & mask) == 0) {
27041efec22Sraf 		if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
2717c478bd9Sstevel@tonic-gate 			preempt(self);
2727c478bd9Sstevel@tonic-gate 			return (1);
2737c478bd9Sstevel@tonic-gate 		}
2747c478bd9Sstevel@tonic-gate 	}
27541efec22Sraf 	preempt(self);
27641efec22Sraf 	return (0);
2777c478bd9Sstevel@tonic-gate }
27841efec22Sraf 
27941efec22Sraf /*
28041efec22Sraf  * Attempt to release a reader lock.  Return true on success.
28141efec22Sraf  */
28241efec22Sraf static int
28341efec22Sraf read_unlock_try(rwlock_t *rwlp)
28441efec22Sraf {
28541efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
28641efec22Sraf 	uint32_t readers;
28741efec22Sraf 	ulwp_t *self = curthread;
28841efec22Sraf 
28941efec22Sraf 	no_preempt(self);
29041efec22Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
29141efec22Sraf 		if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
29241efec22Sraf 			preempt(self);
29341efec22Sraf 			return (1);
29441efec22Sraf 		}
29541efec22Sraf 	}
29641efec22Sraf 	preempt(self);
29741efec22Sraf 	return (0);
29841efec22Sraf }
29941efec22Sraf 
30041efec22Sraf /*
30141efec22Sraf  * Attempt to acquire a writer lock.  Return true on success.
30241efec22Sraf  */
30341efec22Sraf static int
30441efec22Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
30541efec22Sraf {
30641efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
30741efec22Sraf 	uint32_t mask = ignore_waiters_flag?
30841efec22Sraf 	    (URW_WRITE_LOCKED | URW_READERS_MASK) :
30941efec22Sraf 	    (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
31041efec22Sraf 	ulwp_t *self = curthread;
31141efec22Sraf 	uint32_t readers;
31241efec22Sraf 
31341efec22Sraf 	no_preempt(self);
31441efec22Sraf 	while (((readers = *rwstate) & mask) == 0) {
31541efec22Sraf 		if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
31641efec22Sraf 		    == readers) {
31741efec22Sraf 			preempt(self);
31841efec22Sraf 			return (1);
31941efec22Sraf 		}
32041efec22Sraf 	}
32141efec22Sraf 	preempt(self);
32241efec22Sraf 	return (0);
32341efec22Sraf }
32441efec22Sraf 
32541efec22Sraf /*
32641efec22Sraf  * Attempt to release a writer lock.  Return true on success.
32741efec22Sraf  */
32841efec22Sraf static int
32941efec22Sraf write_unlock_try(rwlock_t *rwlp)
33041efec22Sraf {
33141efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
33241efec22Sraf 	uint32_t readers;
33341efec22Sraf 	ulwp_t *self = curthread;
33441efec22Sraf 
33541efec22Sraf 	no_preempt(self);
33641efec22Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
33741efec22Sraf 		if (atomic_cas_32(rwstate, readers, 0) == readers) {
33841efec22Sraf 			preempt(self);
33941efec22Sraf 			return (1);
34041efec22Sraf 		}
34141efec22Sraf 	}
34241efec22Sraf 	preempt(self);
34341efec22Sraf 	return (0);
34441efec22Sraf }
34541efec22Sraf 
34641efec22Sraf /*
34741efec22Sraf  * Wake up thread(s) sleeping on the rwlock queue and then
34841efec22Sraf  * drop the queue lock.  Return non-zero if we wake up someone.
34941efec22Sraf  * This is called when a thread releases a lock that appears to have waiters.
35041efec22Sraf  */
35141efec22Sraf static int
35241efec22Sraf rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
35341efec22Sraf {
35441efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
35541efec22Sraf 	uint32_t readers;
35641efec22Sraf 	uint32_t writers;
35741efec22Sraf 	ulwp_t **ulwpp;
35841efec22Sraf 	ulwp_t *ulwp;
359d4204c85Sraf 	ulwp_t *prev;
360d4204c85Sraf 	int nlwpid = 0;
361d4204c85Sraf 	int more;
362d4204c85Sraf 	int maxlwps = MAXLWPS;
36341efec22Sraf 	lwpid_t buffer[MAXLWPS];
36441efec22Sraf 	lwpid_t *lwpid = buffer;
36541efec22Sraf 
36641efec22Sraf 	readers = *rwstate;
36741efec22Sraf 	ASSERT_CONSISTENT_STATE(readers);
36841efec22Sraf 	if (!(readers & URW_HAS_WAITERS)) {
3697c478bd9Sstevel@tonic-gate 		queue_unlock(qp);
3707c478bd9Sstevel@tonic-gate 		return (0);
3717c478bd9Sstevel@tonic-gate 	}
37241efec22Sraf 	readers &= URW_READERS_MASK;
37341efec22Sraf 	writers = 0;
37441efec22Sraf 
37541efec22Sraf 	/*
376d4204c85Sraf 	 * Examine the queue of waiters in priority order and prepare
377d4204c85Sraf 	 * to wake up as many readers as we encounter before encountering
378d4204c85Sraf 	 * a writer.  If the highest priority thread on the queue is a
37941efec22Sraf 	 * writer, stop there and wake it up.
38041efec22Sraf 	 *
38141efec22Sraf 	 * We keep track of lwpids that are to be unparked in lwpid[].
38241efec22Sraf 	 * __lwp_unpark_all() is called to unpark all of them after
38341efec22Sraf 	 * they have been removed from the sleep queue and the sleep
38441efec22Sraf 	 * queue lock has been dropped.  If we run out of space in our
38541efec22Sraf 	 * on-stack buffer, we need to allocate more but we can't call
38641efec22Sraf 	 * lmalloc() because we are holding a queue lock when the overflow
38741efec22Sraf 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
38841efec22Sraf 	 * either because the application may have allocated a small
38941efec22Sraf 	 * stack and we don't want to overrun the stack.  So we call
39041efec22Sraf 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
39141efec22Sraf 	 * system call directly since that path acquires no locks.
39241efec22Sraf 	 */
393d4204c85Sraf 	while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
394d4204c85Sraf 		ulwp = *ulwpp;
395d4204c85Sraf 		ASSERT(ulwp->ul_wchan == rwlp);
39641efec22Sraf 		if (ulwp->ul_writer) {
39741efec22Sraf 			if (writers != 0 || readers != 0)
39841efec22Sraf 				break;
39941efec22Sraf 			/* one writer to wake */
40041efec22Sraf 			writers++;
40141efec22Sraf 		} else {
40241efec22Sraf 			if (writers != 0)
40341efec22Sraf 				break;
40441efec22Sraf 			/* at least one reader to wake */
40541efec22Sraf 			readers++;
40641efec22Sraf 			if (nlwpid == maxlwps)
40741efec22Sraf 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
40841efec22Sraf 		}
409d4204c85Sraf 		queue_unlink(qp, ulwpp, prev);
410d4204c85Sraf 		ulwp->ul_sleepq = NULL;
411d4204c85Sraf 		ulwp->ul_wchan = NULL;
41241efec22Sraf 		lwpid[nlwpid++] = ulwp->ul_lwpid;
41341efec22Sraf 	}
414d4204c85Sraf 	if (ulwpp == NULL)
41541efec22Sraf 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
41641efec22Sraf 	if (nlwpid == 0) {
41741efec22Sraf 		queue_unlock(qp);
41841efec22Sraf 	} else {
419d4204c85Sraf 		ulwp_t *self = curthread;
42041efec22Sraf 		no_preempt(self);
42141efec22Sraf 		queue_unlock(qp);
42241efec22Sraf 		if (nlwpid == 1)
42341efec22Sraf 			(void) __lwp_unpark(lwpid[0]);
42441efec22Sraf 		else
42541efec22Sraf 			(void) __lwp_unpark_all(lwpid, nlwpid);
42641efec22Sraf 		preempt(self);
42741efec22Sraf 	}
42841efec22Sraf 	if (lwpid != buffer)
4298cd45542Sraf 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
43041efec22Sraf 	return (nlwpid != 0);
43141efec22Sraf }
4327c478bd9Sstevel@tonic-gate 
4337c478bd9Sstevel@tonic-gate /*
4347c478bd9Sstevel@tonic-gate  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
4357c478bd9Sstevel@tonic-gate  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
4367c478bd9Sstevel@tonic-gate  *
4377c478bd9Sstevel@tonic-gate  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
4387c478bd9Sstevel@tonic-gate  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
4397c478bd9Sstevel@tonic-gate  * released, and if they need to sleep will release the mutex first. In the
4407c478bd9Sstevel@tonic-gate  * event of a spurious wakeup, these will return EAGAIN (because it is much
4417c478bd9Sstevel@tonic-gate  * easier for us to re-acquire the mutex here).
4427c478bd9Sstevel@tonic-gate  */
4437c478bd9Sstevel@tonic-gate int
4447c478bd9Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
4457c478bd9Sstevel@tonic-gate {
44641efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
44741efec22Sraf 	mutex_t *mp = &rwlp->mutex;
44841efec22Sraf 	uint32_t readers;
4497c478bd9Sstevel@tonic-gate 	int try_flag;
45041efec22Sraf 	int error;
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
4537c478bd9Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
4547c478bd9Sstevel@tonic-gate 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate 	if (!try_flag) {
4577c478bd9Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
4587c478bd9Sstevel@tonic-gate 	}
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	do {
46141efec22Sraf 		if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
46241efec22Sraf 			error = EBUSY;
4637c478bd9Sstevel@tonic-gate 			break;
46441efec22Sraf 		}
4658cd45542Sraf 		if ((error = mutex_lock(mp)) != 0)
46641efec22Sraf 			break;
4677c478bd9Sstevel@tonic-gate 		if (rd_wr == READ_LOCK) {
46841efec22Sraf 			if (read_lock_try(rwlp, 0)) {
4698cd45542Sraf 				(void) mutex_unlock(mp);
47041efec22Sraf 				break;
4717c478bd9Sstevel@tonic-gate 			}
4727c478bd9Sstevel@tonic-gate 		} else {
47341efec22Sraf 			if (write_lock_try(rwlp, 0)) {
4748cd45542Sraf 				(void) mutex_unlock(mp);
47541efec22Sraf 				break;
4767c478bd9Sstevel@tonic-gate 			}
47741efec22Sraf 		}
47841efec22Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
47941efec22Sraf 		readers = *rwstate;
48041efec22Sraf 		ASSERT_CONSISTENT_STATE(readers);
4817c478bd9Sstevel@tonic-gate 		/*
48241efec22Sraf 		 * The calls to __lwp_rwlock_*() below will release the mutex,
48341efec22Sraf 		 * so we need a dtrace probe here.
4847c478bd9Sstevel@tonic-gate 		 */
48541efec22Sraf 		mp->mutex_owner = 0;
48641efec22Sraf 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
4877c478bd9Sstevel@tonic-gate 		/*
4887c478bd9Sstevel@tonic-gate 		 * The waiters bit may be inaccurate.
4897c478bd9Sstevel@tonic-gate 		 * Only the kernel knows for sure.
4907c478bd9Sstevel@tonic-gate 		 */
49141efec22Sraf 		if (rd_wr == READ_LOCK) {
49241efec22Sraf 			if (try_flag)
49341efec22Sraf 				error = __lwp_rwlock_tryrdlock(rwlp);
49441efec22Sraf 			else
49541efec22Sraf 				error = __lwp_rwlock_rdlock(rwlp, tsp);
4967c478bd9Sstevel@tonic-gate 		} else {
49741efec22Sraf 			if (try_flag)
49841efec22Sraf 				error = __lwp_rwlock_trywrlock(rwlp);
49941efec22Sraf 			else
5007c478bd9Sstevel@tonic-gate 				error = __lwp_rwlock_wrlock(rwlp, tsp);
5017c478bd9Sstevel@tonic-gate 		}
50241efec22Sraf 	} while (error == EAGAIN || error == EINTR);
5037c478bd9Sstevel@tonic-gate 
5047c478bd9Sstevel@tonic-gate 	if (!try_flag) {
50541efec22Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
5067c478bd9Sstevel@tonic-gate 	}
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate 	return (error);
5097c478bd9Sstevel@tonic-gate }
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate /*
5127c478bd9Sstevel@tonic-gate  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
5137c478bd9Sstevel@tonic-gate  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
5147c478bd9Sstevel@tonic-gate  */
5157c478bd9Sstevel@tonic-gate int
5167c478bd9Sstevel@tonic-gate rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
5177c478bd9Sstevel@tonic-gate {
51841efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
51941efec22Sraf 	uint32_t readers;
5207c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
5217c478bd9Sstevel@tonic-gate 	queue_head_t *qp;
5227c478bd9Sstevel@tonic-gate 	ulwp_t *ulwp;
5237c478bd9Sstevel@tonic-gate 	int try_flag;
524d4204c85Sraf 	int ignore_waiters_flag;
5257c478bd9Sstevel@tonic-gate 	int error = 0;
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
5287c478bd9Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
5297c478bd9Sstevel@tonic-gate 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 	if (!try_flag) {
5327c478bd9Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5337c478bd9Sstevel@tonic-gate 	}
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate 	qp = queue_lock(rwlp, MX);
536d4204c85Sraf 	/* initial attempt to acquire the lock fails if there are waiters */
537d4204c85Sraf 	ignore_waiters_flag = 0;
5387c478bd9Sstevel@tonic-gate 	while (error == 0) {
53941efec22Sraf 		if (rd_wr == READ_LOCK) {
540d4204c85Sraf 			if (read_lock_try(rwlp, ignore_waiters_flag))
541d4204c85Sraf 				break;
54241efec22Sraf 		} else {
543d4204c85Sraf 			if (write_lock_try(rwlp, ignore_waiters_flag))
544d4204c85Sraf 				break;
54541efec22Sraf 		}
546d4204c85Sraf 		/* subsequent attempts do not fail due to waiters */
547d4204c85Sraf 		ignore_waiters_flag = 1;
54841efec22Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
54941efec22Sraf 		readers = *rwstate;
55041efec22Sraf 		ASSERT_CONSISTENT_STATE(readers);
55141efec22Sraf 		if ((readers & URW_WRITE_LOCKED) ||
55241efec22Sraf 		    (rd_wr == WRITE_LOCK &&
55341efec22Sraf 		    (readers & URW_READERS_MASK) != 0))
5547c478bd9Sstevel@tonic-gate 			/* EMPTY */;	/* somebody holds the lock */
555d4204c85Sraf 		else if ((ulwp = queue_waiter(qp)) == NULL) {
55641efec22Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
557d4204c85Sraf 			continue;	/* no queued waiters, try again */
5587c478bd9Sstevel@tonic-gate 		} else {
559d4204c85Sraf 			/*
560d4204c85Sraf 			 * Do a priority check on the queued waiter (the
561d4204c85Sraf 			 * highest priority thread on the queue) to see
562d4204c85Sraf 			 * if we should defer to him or just grab the lock.
563d4204c85Sraf 			 */
5647c478bd9Sstevel@tonic-gate 			int our_pri = real_priority(self);
5657c478bd9Sstevel@tonic-gate 			int his_pri = real_priority(ulwp);
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate 			if (rd_wr == WRITE_LOCK) {
5687c478bd9Sstevel@tonic-gate 				/*
5697c478bd9Sstevel@tonic-gate 				 * We defer to a queued thread that has
5707c478bd9Sstevel@tonic-gate 				 * a higher priority than ours.
5717c478bd9Sstevel@tonic-gate 				 */
5727c478bd9Sstevel@tonic-gate 				if (his_pri <= our_pri)
573d4204c85Sraf 					continue;	/* try again */
5747c478bd9Sstevel@tonic-gate 			} else {
5757c478bd9Sstevel@tonic-gate 				/*
5767c478bd9Sstevel@tonic-gate 				 * We defer to a queued thread that has
5777c478bd9Sstevel@tonic-gate 				 * a higher priority than ours or that
5787c478bd9Sstevel@tonic-gate 				 * is a writer whose priority equals ours.
5797c478bd9Sstevel@tonic-gate 				 */
5807c478bd9Sstevel@tonic-gate 				if (his_pri < our_pri ||
5817c478bd9Sstevel@tonic-gate 				    (his_pri == our_pri && !ulwp->ul_writer))
582d4204c85Sraf 					continue;	/* try again */
5837c478bd9Sstevel@tonic-gate 			}
5847c478bd9Sstevel@tonic-gate 		}
5857c478bd9Sstevel@tonic-gate 		/*
5867c478bd9Sstevel@tonic-gate 		 * We are about to block.
5877c478bd9Sstevel@tonic-gate 		 * If we're doing a trylock, return EBUSY instead.
5887c478bd9Sstevel@tonic-gate 		 */
5897c478bd9Sstevel@tonic-gate 		if (try_flag) {
5907c478bd9Sstevel@tonic-gate 			error = EBUSY;
5917c478bd9Sstevel@tonic-gate 			break;
5927c478bd9Sstevel@tonic-gate 		}
5937c478bd9Sstevel@tonic-gate 		/*
594d4204c85Sraf 		 * Enqueue writers ahead of readers.
5957c478bd9Sstevel@tonic-gate 		 */
5967c478bd9Sstevel@tonic-gate 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
597d4204c85Sraf 		enqueue(qp, self, 0);
5987c478bd9Sstevel@tonic-gate 		set_parking_flag(self, 1);
5997c478bd9Sstevel@tonic-gate 		queue_unlock(qp);
6007c478bd9Sstevel@tonic-gate 		if ((error = __lwp_park(tsp, 0)) == EINTR)
601d4204c85Sraf 			error = ignore_waiters_flag = 0;
6027c478bd9Sstevel@tonic-gate 		set_parking_flag(self, 0);
6037c478bd9Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
604d4204c85Sraf 		if (self->ul_sleepq && dequeue_self(qp) == 0)
60541efec22Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
606d4204c85Sraf 		self->ul_writer = 0;
6077c478bd9Sstevel@tonic-gate 	}
6087c478bd9Sstevel@tonic-gate 
60941efec22Sraf 	queue_unlock(qp);
61041efec22Sraf 
61141efec22Sraf 	if (!try_flag) {
61241efec22Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
61341efec22Sraf 	}
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 	return (error);
6167c478bd9Sstevel@tonic-gate }
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate int
6197c478bd9Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
6207c478bd9Sstevel@tonic-gate {
6217c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
6227c478bd9Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
6237c478bd9Sstevel@tonic-gate 	readlock_t *readlockp;
6247c478bd9Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
6257c478bd9Sstevel@tonic-gate 	int error;
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate 	/*
6287c478bd9Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
6297c478bd9Sstevel@tonic-gate 	 * just increment our reference count and return.
6307c478bd9Sstevel@tonic-gate 	 */
63141efec22Sraf 	sigoff(self);
6327c478bd9Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
6337c478bd9Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
63441efec22Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
63541efec22Sraf 			sigon(self);
63641efec22Sraf 			error = EAGAIN;
63741efec22Sraf 			goto out;
6387c478bd9Sstevel@tonic-gate 		}
63941efec22Sraf 		sigon(self);
64041efec22Sraf 		error = 0;
64141efec22Sraf 		goto out;
64241efec22Sraf 	}
64341efec22Sraf 	sigon(self);
6447c478bd9Sstevel@tonic-gate 
6457c478bd9Sstevel@tonic-gate 	/*
6467c478bd9Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
6477c478bd9Sstevel@tonic-gate 	 */
6487257d1b4Sraf 	if (rw_write_held(rwlp)) {
6497c478bd9Sstevel@tonic-gate 		if (self->ul_error_detection)
6507c478bd9Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_rdlock",
6517c478bd9Sstevel@tonic-gate 			    "calling thread owns the writer lock");
65241efec22Sraf 		error = EDEADLK;
65341efec22Sraf 		goto out;
6547c478bd9Sstevel@tonic-gate 	}
6557c478bd9Sstevel@tonic-gate 
65641efec22Sraf 	if (read_lock_try(rwlp, 0))
65741efec22Sraf 		error = 0;
65841efec22Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
6597c478bd9Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
6607c478bd9Sstevel@tonic-gate 	else						/* user-level */
6617c478bd9Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
6627c478bd9Sstevel@tonic-gate 
66341efec22Sraf out:
6647c478bd9Sstevel@tonic-gate 	if (error == 0) {
66541efec22Sraf 		sigoff(self);
66641efec22Sraf 		rwl_entry(rwlp)->rd_count++;
66741efec22Sraf 		sigon(self);
6687c478bd9Sstevel@tonic-gate 		if (rwsp)
6697c478bd9Sstevel@tonic-gate 			tdb_incr(rwsp->rw_rdlock);
67041efec22Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
67141efec22Sraf 	} else {
67241efec22Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
6737c478bd9Sstevel@tonic-gate 	}
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate 	return (error);
6767c478bd9Sstevel@tonic-gate }
6777c478bd9Sstevel@tonic-gate 
6787257d1b4Sraf #pragma weak pthread_rwlock_rdlock = rw_rdlock
6797257d1b4Sraf #pragma weak _rw_rdlock = rw_rdlock
6807c478bd9Sstevel@tonic-gate int
6817257d1b4Sraf rw_rdlock(rwlock_t *rwlp)
6827c478bd9Sstevel@tonic-gate {
6837c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
6847c478bd9Sstevel@tonic-gate 	return (rw_rdlock_impl(rwlp, NULL));
6857c478bd9Sstevel@tonic-gate }
6867c478bd9Sstevel@tonic-gate 
6877c478bd9Sstevel@tonic-gate void
6887c478bd9Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp)
6897c478bd9Sstevel@tonic-gate {
6907c478bd9Sstevel@tonic-gate 	enter_critical(curthread);
6917c478bd9Sstevel@tonic-gate 	(void) rw_rdlock_impl(rwlp, NULL);
6927c478bd9Sstevel@tonic-gate }
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate int
6957257d1b4Sraf pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
6967257d1b4Sraf     const struct timespec *_RESTRICT_KYWD reltime)
6977c478bd9Sstevel@tonic-gate {
6987c478bd9Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
6997c478bd9Sstevel@tonic-gate 	int error;
7007c478bd9Sstevel@tonic-gate 
7017c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7027257d1b4Sraf 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
7037c478bd9Sstevel@tonic-gate 	if (error == ETIME)
7047c478bd9Sstevel@tonic-gate 		error = ETIMEDOUT;
7057c478bd9Sstevel@tonic-gate 	return (error);
7067c478bd9Sstevel@tonic-gate }
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate int
7097257d1b4Sraf pthread_rwlock_timedrdlock(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7107257d1b4Sraf     const struct timespec *_RESTRICT_KYWD abstime)
7117c478bd9Sstevel@tonic-gate {
7127c478bd9Sstevel@tonic-gate 	timespec_t tslocal;
7137c478bd9Sstevel@tonic-gate 	int error;
7147c478bd9Sstevel@tonic-gate 
7157c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7167c478bd9Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
7177257d1b4Sraf 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
7187c478bd9Sstevel@tonic-gate 	if (error == ETIME)
7197c478bd9Sstevel@tonic-gate 		error = ETIMEDOUT;
7207c478bd9Sstevel@tonic-gate 	return (error);
7217c478bd9Sstevel@tonic-gate }
7227c478bd9Sstevel@tonic-gate 
7237c478bd9Sstevel@tonic-gate int
7247c478bd9Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
7257c478bd9Sstevel@tonic-gate {
7267c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
7277c478bd9Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
7287c478bd9Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
7297c478bd9Sstevel@tonic-gate 	int error;
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate 	/*
7327c478bd9Sstevel@tonic-gate 	 * If we hold a readers lock on this rwlock, bail out.
7337c478bd9Sstevel@tonic-gate 	 */
7347257d1b4Sraf 	if (rw_read_held(rwlp)) {
7357c478bd9Sstevel@tonic-gate 		if (self->ul_error_detection)
7367c478bd9Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
7377c478bd9Sstevel@tonic-gate 			    "calling thread owns the readers lock");
73841efec22Sraf 		error = EDEADLK;
73941efec22Sraf 		goto out;
7407c478bd9Sstevel@tonic-gate 	}
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 	/*
7437c478bd9Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
7447c478bd9Sstevel@tonic-gate 	 */
7457257d1b4Sraf 	if (rw_write_held(rwlp)) {
7467c478bd9Sstevel@tonic-gate 		if (self->ul_error_detection)
7477c478bd9Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
7487c478bd9Sstevel@tonic-gate 			    "calling thread owns the writer lock");
74941efec22Sraf 		error = EDEADLK;
75041efec22Sraf 		goto out;
7517c478bd9Sstevel@tonic-gate 	}
7527c478bd9Sstevel@tonic-gate 
75341efec22Sraf 	if (write_lock_try(rwlp, 0))
75441efec22Sraf 		error = 0;
75541efec22Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
7567c478bd9Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
75741efec22Sraf 	else						/* user-level */
7587c478bd9Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
7597c478bd9Sstevel@tonic-gate 
76041efec22Sraf out:
76141efec22Sraf 	if (error == 0) {
76241efec22Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
76341efec22Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
76441efec22Sraf 			rwlp->rwlock_ownerpid = udp->pid;
76541efec22Sraf 		if (rwsp) {
7667c478bd9Sstevel@tonic-gate 			tdb_incr(rwsp->rw_wrlock);
7677c478bd9Sstevel@tonic-gate 			rwsp->rw_wrlock_begin_hold = gethrtime();
7687c478bd9Sstevel@tonic-gate 		}
76941efec22Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
77041efec22Sraf 	} else {
77141efec22Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
77241efec22Sraf 	}
7737c478bd9Sstevel@tonic-gate 	return (error);
7747c478bd9Sstevel@tonic-gate }
7757c478bd9Sstevel@tonic-gate 
7767257d1b4Sraf #pragma weak pthread_rwlock_wrlock = rw_wrlock
7777257d1b4Sraf #pragma weak _rw_wrlock = rw_wrlock
7787c478bd9Sstevel@tonic-gate int
7797257d1b4Sraf rw_wrlock(rwlock_t *rwlp)
7807c478bd9Sstevel@tonic-gate {
7817c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7827c478bd9Sstevel@tonic-gate 	return (rw_wrlock_impl(rwlp, NULL));
7837c478bd9Sstevel@tonic-gate }
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate void
7867c478bd9Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp)
7877c478bd9Sstevel@tonic-gate {
7887c478bd9Sstevel@tonic-gate 	enter_critical(curthread);
7897c478bd9Sstevel@tonic-gate 	(void) rw_wrlock_impl(rwlp, NULL);
7907c478bd9Sstevel@tonic-gate }
7917c478bd9Sstevel@tonic-gate 
7927c478bd9Sstevel@tonic-gate int
7937257d1b4Sraf pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7947257d1b4Sraf     const struct timespec *_RESTRICT_KYWD reltime)
7957c478bd9Sstevel@tonic-gate {
7967c478bd9Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
7977c478bd9Sstevel@tonic-gate 	int error;
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8007257d1b4Sraf 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
8017c478bd9Sstevel@tonic-gate 	if (error == ETIME)
8027c478bd9Sstevel@tonic-gate 		error = ETIMEDOUT;
8037c478bd9Sstevel@tonic-gate 	return (error);
8047c478bd9Sstevel@tonic-gate }
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate int
8077257d1b4Sraf pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
8087c478bd9Sstevel@tonic-gate {
8097c478bd9Sstevel@tonic-gate 	timespec_t tslocal;
8107c478bd9Sstevel@tonic-gate 	int error;
8117c478bd9Sstevel@tonic-gate 
8127c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8137c478bd9Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
8147257d1b4Sraf 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
8157c478bd9Sstevel@tonic-gate 	if (error == ETIME)
8167c478bd9Sstevel@tonic-gate 		error = ETIMEDOUT;
8177c478bd9Sstevel@tonic-gate 	return (error);
8187c478bd9Sstevel@tonic-gate }
8197c478bd9Sstevel@tonic-gate 
8207257d1b4Sraf #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
8217c478bd9Sstevel@tonic-gate int
8227257d1b4Sraf rw_tryrdlock(rwlock_t *rwlp)
8237c478bd9Sstevel@tonic-gate {
8247c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
8257c478bd9Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8267c478bd9Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8277c478bd9Sstevel@tonic-gate 	readlock_t *readlockp;
8287c478bd9Sstevel@tonic-gate 	int error;
8297c478bd9Sstevel@tonic-gate 
8307c478bd9Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8317c478bd9Sstevel@tonic-gate 
8327c478bd9Sstevel@tonic-gate 	if (rwsp)
8337c478bd9Sstevel@tonic-gate 		tdb_incr(rwsp->rw_rdlock_try);
8347c478bd9Sstevel@tonic-gate 
8357c478bd9Sstevel@tonic-gate 	/*
8367c478bd9Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
8377c478bd9Sstevel@tonic-gate 	 * just increment our reference count and return.
8387c478bd9Sstevel@tonic-gate 	 */
83941efec22Sraf 	sigoff(self);
8407c478bd9Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
8417c478bd9Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
84241efec22Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
84341efec22Sraf 			sigon(self);
84441efec22Sraf 			error = EAGAIN;
84541efec22Sraf 			goto out;
8467c478bd9Sstevel@tonic-gate 		}
84741efec22Sraf 		sigon(self);
84841efec22Sraf 		error = 0;
84941efec22Sraf 		goto out;
85041efec22Sraf 	}
85141efec22Sraf 	sigon(self);
8527c478bd9Sstevel@tonic-gate 
85341efec22Sraf 	if (read_lock_try(rwlp, 0))
85441efec22Sraf 		error = 0;
85541efec22Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
8567c478bd9Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
8577c478bd9Sstevel@tonic-gate 	else						/* user-level */
8587c478bd9Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
8597c478bd9Sstevel@tonic-gate 
86041efec22Sraf out:
86141efec22Sraf 	if (error == 0) {
86241efec22Sraf 		sigoff(self);
86341efec22Sraf 		rwl_entry(rwlp)->rd_count++;
86441efec22Sraf 		sigon(self);
86541efec22Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
86641efec22Sraf 	} else {
86741efec22Sraf 		if (rwsp)
8687c478bd9Sstevel@tonic-gate 			tdb_incr(rwsp->rw_rdlock_try_fail);
86941efec22Sraf 		if (error != EBUSY) {
87041efec22Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
87141efec22Sraf 			    error);
87241efec22Sraf 		}
87341efec22Sraf 	}
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate 	return (error);
8767c478bd9Sstevel@tonic-gate }
8777c478bd9Sstevel@tonic-gate 
8787257d1b4Sraf #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
8797c478bd9Sstevel@tonic-gate int
8807257d1b4Sraf rw_trywrlock(rwlock_t *rwlp)
8817c478bd9Sstevel@tonic-gate {
8827c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
8837c478bd9Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8847c478bd9Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8857c478bd9Sstevel@tonic-gate 	int error;
8867c478bd9Sstevel@tonic-gate 
88741efec22Sraf 	ASSERT(!self->ul_critical || self->ul_bindflags);
8887c478bd9Sstevel@tonic-gate 
8897c478bd9Sstevel@tonic-gate 	if (rwsp)
8907c478bd9Sstevel@tonic-gate 		tdb_incr(rwsp->rw_wrlock_try);
8917c478bd9Sstevel@tonic-gate 
89241efec22Sraf 	if (write_lock_try(rwlp, 0))
89341efec22Sraf 		error = 0;
89441efec22Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
8957c478bd9Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
89641efec22Sraf 	else						/* user-level */
8977c478bd9Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
89841efec22Sraf 
89941efec22Sraf 	if (error == 0) {
90041efec22Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
90141efec22Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
90241efec22Sraf 			rwlp->rwlock_ownerpid = udp->pid;
90341efec22Sraf 		if (rwsp)
9047c478bd9Sstevel@tonic-gate 			rwsp->rw_wrlock_begin_hold = gethrtime();
90541efec22Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
90641efec22Sraf 	} else {
90741efec22Sraf 		if (rwsp)
90841efec22Sraf 			tdb_incr(rwsp->rw_wrlock_try_fail);
90941efec22Sraf 		if (error != EBUSY) {
91041efec22Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
91141efec22Sraf 			    error);
91241efec22Sraf 		}
9137c478bd9Sstevel@tonic-gate 	}
9147c478bd9Sstevel@tonic-gate 	return (error);
9157c478bd9Sstevel@tonic-gate }
9167c478bd9Sstevel@tonic-gate 
9177257d1b4Sraf #pragma weak pthread_rwlock_unlock = rw_unlock
9187257d1b4Sraf #pragma weak _rw_unlock = rw_unlock
9197c478bd9Sstevel@tonic-gate int
9207257d1b4Sraf rw_unlock(rwlock_t *rwlp)
9217c478bd9Sstevel@tonic-gate {
92241efec22Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
92341efec22Sraf 	uint32_t readers;
9247c478bd9Sstevel@tonic-gate 	ulwp_t *self = curthread;
9257c478bd9Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
9267c478bd9Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp;
92741efec22Sraf 	queue_head_t *qp;
92841efec22Sraf 	int rd_wr;
92941efec22Sraf 	int waked = 0;
9307c478bd9Sstevel@tonic-gate 
93141efec22Sraf 	readers = *rwstate;
93241efec22Sraf 	ASSERT_CONSISTENT_STATE(readers);
93341efec22Sraf 	if (readers & URW_WRITE_LOCKED) {
93441efec22Sraf 		rd_wr = WRITE_LOCK;
93541efec22Sraf 		readers = 0;
93641efec22Sraf 	} else {
93741efec22Sraf 		rd_wr = READ_LOCK;
93841efec22Sraf 		readers &= URW_READERS_MASK;
9397c478bd9Sstevel@tonic-gate 	}
9407c478bd9Sstevel@tonic-gate 
94141efec22Sraf 	if (rd_wr == WRITE_LOCK) {
9427c478bd9Sstevel@tonic-gate 		/*
9437c478bd9Sstevel@tonic-gate 		 * Since the writer lock is held, we'd better be
9447c478bd9Sstevel@tonic-gate 		 * holding it, else we cannot legitimately be here.
9457c478bd9Sstevel@tonic-gate 		 */
9467257d1b4Sraf 		if (!rw_write_held(rwlp)) {
9477c478bd9Sstevel@tonic-gate 			if (self->ul_error_detection)
9487c478bd9Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
9497c478bd9Sstevel@tonic-gate 				    "writer lock held, "
9507c478bd9Sstevel@tonic-gate 				    "but not by the calling thread");
9517c478bd9Sstevel@tonic-gate 			return (EPERM);
9527c478bd9Sstevel@tonic-gate 		}
9537c478bd9Sstevel@tonic-gate 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
9547c478bd9Sstevel@tonic-gate 			if (rwsp->rw_wrlock_begin_hold)
9557c478bd9Sstevel@tonic-gate 				rwsp->rw_wrlock_hold_time +=
9567c478bd9Sstevel@tonic-gate 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
9577c478bd9Sstevel@tonic-gate 			rwsp->rw_wrlock_begin_hold = 0;
9587c478bd9Sstevel@tonic-gate 		}
95941efec22Sraf 		rwlp->rwlock_owner = 0;
96041efec22Sraf 		rwlp->rwlock_ownerpid = 0;
96141efec22Sraf 	} else if (readers > 0) {
9627c478bd9Sstevel@tonic-gate 		/*
9637c478bd9Sstevel@tonic-gate 		 * A readers lock is held; if we don't hold one, bail out.
9647c478bd9Sstevel@tonic-gate 		 */
96541efec22Sraf 		readlock_t *readlockp;
96641efec22Sraf 
96741efec22Sraf 		sigoff(self);
96841efec22Sraf 		readlockp = rwl_entry(rwlp);
9697c478bd9Sstevel@tonic-gate 		if (readlockp->rd_count == 0) {
97041efec22Sraf 			sigon(self);
9717c478bd9Sstevel@tonic-gate 			if (self->ul_error_detection)
9727c478bd9Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
9737c478bd9Sstevel@tonic-gate 				    "readers lock held, "
9747c478bd9Sstevel@tonic-gate 				    "but not by the calling thread");
9757c478bd9Sstevel@tonic-gate 			return (EPERM);
9767c478bd9Sstevel@tonic-gate 		}
9777c478bd9Sstevel@tonic-gate 		/*
9787c478bd9Sstevel@tonic-gate 		 * If we hold more than one readers lock on this rwlock,
9797c478bd9Sstevel@tonic-gate 		 * just decrement our reference count and return.
9807c478bd9Sstevel@tonic-gate 		 */
9817c478bd9Sstevel@tonic-gate 		if (--readlockp->rd_count != 0) {
98241efec22Sraf 			sigon(self);
98341efec22Sraf 			goto out;
9847c478bd9Sstevel@tonic-gate 		}
98541efec22Sraf 		sigon(self);
9867c478bd9Sstevel@tonic-gate 	} else {
9877c478bd9Sstevel@tonic-gate 		/*
9887c478bd9Sstevel@tonic-gate 		 * This is a usage error.
9897c478bd9Sstevel@tonic-gate 		 * No thread should release an unowned lock.
9907c478bd9Sstevel@tonic-gate 		 */
9917c478bd9Sstevel@tonic-gate 		if (self->ul_error_detection)
9927c478bd9Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
9937c478bd9Sstevel@tonic-gate 		return (EPERM);
9947c478bd9Sstevel@tonic-gate 	}
9957c478bd9Sstevel@tonic-gate 
99641efec22Sraf 	if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
99741efec22Sraf 		/* EMPTY */;
99841efec22Sraf 	} else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
99941efec22Sraf 		/* EMPTY */;
100041efec22Sraf 	} else if (rwlp->rwlock_type == USYNC_PROCESS) {
10018cd45542Sraf 		(void) mutex_lock(&rwlp->mutex);
100241efec22Sraf 		(void) __lwp_rwlock_unlock(rwlp);
10038cd45542Sraf 		(void) mutex_unlock(&rwlp->mutex);
100441efec22Sraf 		waked = 1;
10057c478bd9Sstevel@tonic-gate 	} else {
10067c478bd9Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
100741efec22Sraf 		if (rd_wr == READ_LOCK)
100841efec22Sraf 			atomic_dec_32(rwstate);
100941efec22Sraf 		else
101041efec22Sraf 			atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
10117c478bd9Sstevel@tonic-gate 		waked = rw_queue_release(qp, rwlp);
10127c478bd9Sstevel@tonic-gate 	}
10137c478bd9Sstevel@tonic-gate 
101441efec22Sraf out:
101541efec22Sraf 	DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
101641efec22Sraf 
10177c478bd9Sstevel@tonic-gate 	/*
10187c478bd9Sstevel@tonic-gate 	 * Yield to the thread we just waked up, just in case we might
10197c478bd9Sstevel@tonic-gate 	 * be about to grab the rwlock again immediately upon return.
10207c478bd9Sstevel@tonic-gate 	 * This is pretty weak but it helps on a uniprocessor and also
10217c478bd9Sstevel@tonic-gate 	 * when cpu affinity has assigned both ourself and the other
10227c478bd9Sstevel@tonic-gate 	 * thread to the same CPU.  Note that lwp_yield() will yield
10237c478bd9Sstevel@tonic-gate 	 * the processor only if the writer is at the same or higher
10247c478bd9Sstevel@tonic-gate 	 * priority than ourself.  This provides more balanced program
10257c478bd9Sstevel@tonic-gate 	 * behavior; it doesn't guarantee acquisition of the lock by
10267c478bd9Sstevel@tonic-gate 	 * the pending writer.
10277c478bd9Sstevel@tonic-gate 	 */
10287c478bd9Sstevel@tonic-gate 	if (waked)
10298cd45542Sraf 		yield();
10307c478bd9Sstevel@tonic-gate 	return (0);
10317c478bd9Sstevel@tonic-gate }
10327c478bd9Sstevel@tonic-gate 
10337c478bd9Sstevel@tonic-gate void
10347c478bd9Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp)
10357c478bd9Sstevel@tonic-gate {
10367257d1b4Sraf 	(void) rw_unlock(rwlp);
10377c478bd9Sstevel@tonic-gate 	exit_critical(curthread);
10387c478bd9Sstevel@tonic-gate }
1039