xref: /titanic_52/usr/src/lib/libc/port/threads/rwlock.c (revision c7158ae983f5a04c4a998f468ecefba6d23ba721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <sys/sdt.h>
32 
33 #define	TRY_FLAG		0x10
34 #define	READ_LOCK		0
35 #define	WRITE_LOCK		1
36 #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
37 #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
38 
39 #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
40 
41 #define	ASSERT_CONSISTENT_STATE(readers)		\
42 	ASSERT(!((readers) & URW_WRITE_LOCKED) ||	\
43 		((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
44 
45 /*
46  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
47  * We must be deferring signals for this to be safe.
48  * Else if we are returning an entry with ul_rdlockcnt == 0,
49  * it could be reassigned behind our back in a signal handler.
50  */
51 static readlock_t *
52 rwl_entry(rwlock_t *rwlp)
53 {
54 	ulwp_t *self = curthread;
55 	readlock_t *remembered = NULL;
56 	readlock_t *readlockp;
57 	uint_t nlocks;
58 
59 	/* we must be deferring signals */
60 	ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
61 
62 	if ((nlocks = self->ul_rdlockcnt) != 0)
63 		readlockp = self->ul_readlock.array;
64 	else {
65 		nlocks = 1;
66 		readlockp = &self->ul_readlock.single;
67 	}
68 
69 	for (; nlocks; nlocks--, readlockp++) {
70 		if (readlockp->rd_rwlock == rwlp)
71 			return (readlockp);
72 		if (readlockp->rd_count == 0 && remembered == NULL)
73 			remembered = readlockp;
74 	}
75 	if (remembered != NULL) {
76 		remembered->rd_rwlock = rwlp;
77 		return (remembered);
78 	}
79 
80 	/*
81 	 * No entry available.  Allocate more space, converting the single
82 	 * readlock_t entry into an array of readlock_t entries if necessary.
83 	 */
84 	if ((nlocks = self->ul_rdlockcnt) == 0) {
85 		/*
86 		 * Initial allocation of the readlock_t array.
87 		 * Convert the single entry into an array.
88 		 */
89 		self->ul_rdlockcnt = nlocks = NLOCKS;
90 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
91 		/*
92 		 * The single readlock_t becomes the first entry in the array.
93 		 */
94 		*readlockp = self->ul_readlock.single;
95 		self->ul_readlock.single.rd_count = 0;
96 		self->ul_readlock.array = readlockp;
97 		/*
98 		 * Return the next available entry in the array.
99 		 */
100 		(++readlockp)->rd_rwlock = rwlp;
101 		return (readlockp);
102 	}
103 	/*
104 	 * Reallocate the array, double the size each time.
105 	 */
106 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
107 	(void) memcpy(readlockp, self->ul_readlock.array,
108 	    nlocks * sizeof (readlock_t));
109 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
110 	self->ul_readlock.array = readlockp;
111 	self->ul_rdlockcnt *= 2;
112 	/*
113 	 * Return the next available entry in the newly allocated array.
114 	 */
115 	(readlockp += nlocks)->rd_rwlock = rwlp;
116 	return (readlockp);
117 }
118 
119 /*
120  * Free the array of rwlocks held for reading.
121  */
122 void
123 rwl_free(ulwp_t *ulwp)
124 {
125 	uint_t nlocks;
126 
127 	if ((nlocks = ulwp->ul_rdlockcnt) != 0)
128 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
129 	ulwp->ul_rdlockcnt = 0;
130 	ulwp->ul_readlock.single.rd_rwlock = NULL;
131 	ulwp->ul_readlock.single.rd_count = 0;
132 }
133 
134 /*
135  * Check if a reader version of the lock is held by the current thread.
136  * rw_read_is_held() is private to libc.
137  */
138 #pragma weak rw_read_is_held = _rw_read_held
139 #pragma weak rw_read_held = _rw_read_held
140 int
141 _rw_read_held(rwlock_t *rwlp)
142 {
143 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
144 	uint32_t readers;
145 	ulwp_t *self = curthread;
146 	readlock_t *readlockp;
147 	uint_t nlocks;
148 	int rval = 0;
149 
150 	no_preempt(self);
151 
152 	readers = *rwstate;
153 	ASSERT_CONSISTENT_STATE(readers);
154 	if (!(readers & URW_WRITE_LOCKED) &&
155 	    (readers & URW_READERS_MASK) != 0) {
156 		/*
157 		 * The lock is held for reading by some thread.
158 		 * Search our array of rwlocks held for reading for a match.
159 		 */
160 		if ((nlocks = self->ul_rdlockcnt) != 0)
161 			readlockp = self->ul_readlock.array;
162 		else {
163 			nlocks = 1;
164 			readlockp = &self->ul_readlock.single;
165 		}
166 		for (; nlocks; nlocks--, readlockp++) {
167 			if (readlockp->rd_rwlock == rwlp) {
168 				if (readlockp->rd_count)
169 					rval = 1;
170 				break;
171 			}
172 		}
173 	}
174 
175 	preempt(self);
176 	return (rval);
177 }
178 
179 /*
180  * Check if a writer version of the lock is held by the current thread.
181  * rw_write_is_held() is private to libc.
182  */
183 #pragma weak rw_write_is_held = _rw_write_held
184 #pragma weak rw_write_held = _rw_write_held
185 int
186 _rw_write_held(rwlock_t *rwlp)
187 {
188 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
189 	uint32_t readers;
190 	ulwp_t *self = curthread;
191 	int rval;
192 
193 	no_preempt(self);
194 
195 	readers = *rwstate;
196 	ASSERT_CONSISTENT_STATE(readers);
197 	rval = ((readers & URW_WRITE_LOCKED) &&
198 	    rwlp->rwlock_owner == (uintptr_t)self &&
199 	    (rwlp->rwlock_type == USYNC_THREAD ||
200 	    rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
201 
202 	preempt(self);
203 	return (rval);
204 }
205 
206 #pragma weak rwlock_init = __rwlock_init
207 #pragma weak _rwlock_init = __rwlock_init
208 /* ARGSUSED2 */
209 int
210 __rwlock_init(rwlock_t *rwlp, int type, void *arg)
211 {
212 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
213 		return (EINVAL);
214 	/*
215 	 * Once reinitialized, we can no longer be holding a read or write lock.
216 	 * We can do nothing about other threads that are holding read locks.
217 	 */
218 	sigoff(curthread);
219 	rwl_entry(rwlp)->rd_count = 0;
220 	sigon(curthread);
221 	(void) memset(rwlp, 0, sizeof (*rwlp));
222 	rwlp->rwlock_type = (uint16_t)type;
223 	rwlp->rwlock_magic = RWL_MAGIC;
224 	rwlp->mutex.mutex_type = (uint8_t)type;
225 	rwlp->mutex.mutex_flag = LOCK_INITED;
226 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
227 	return (0);
228 }
229 
230 #pragma weak rwlock_destroy = __rwlock_destroy
231 #pragma weak _rwlock_destroy = __rwlock_destroy
232 #pragma weak pthread_rwlock_destroy = __rwlock_destroy
233 #pragma weak _pthread_rwlock_destroy = __rwlock_destroy
234 int
235 __rwlock_destroy(rwlock_t *rwlp)
236 {
237 	/*
238 	 * Once destroyed, we can no longer be holding a read or write lock.
239 	 * We can do nothing about other threads that are holding read locks.
240 	 */
241 	sigoff(curthread);
242 	rwl_entry(rwlp)->rd_count = 0;
243 	sigon(curthread);
244 	rwlp->rwlock_magic = 0;
245 	tdb_sync_obj_deregister(rwlp);
246 	return (0);
247 }
248 
249 /*
250  * Attempt to acquire a readers lock.  Return true on success.
251  */
252 static int
253 read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
254 {
255 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
256 	uint32_t mask = ignore_waiters_flag?
257 	    URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
258 	uint32_t readers;
259 	ulwp_t *self = curthread;
260 
261 	no_preempt(self);
262 	while (((readers = *rwstate) & mask) == 0) {
263 		if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
264 			preempt(self);
265 			return (1);
266 		}
267 	}
268 	preempt(self);
269 	return (0);
270 }
271 
272 /*
273  * Attempt to release a reader lock.  Return true on success.
274  */
275 static int
276 read_unlock_try(rwlock_t *rwlp)
277 {
278 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
279 	uint32_t readers;
280 	ulwp_t *self = curthread;
281 
282 	no_preempt(self);
283 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
284 		if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
285 			preempt(self);
286 			return (1);
287 		}
288 	}
289 	preempt(self);
290 	return (0);
291 }
292 
293 /*
294  * Attempt to acquire a writer lock.  Return true on success.
295  */
296 static int
297 write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
298 {
299 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
300 	uint32_t mask = ignore_waiters_flag?
301 	    (URW_WRITE_LOCKED | URW_READERS_MASK) :
302 	    (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
303 	ulwp_t *self = curthread;
304 	uint32_t readers;
305 
306 	no_preempt(self);
307 	while (((readers = *rwstate) & mask) == 0) {
308 		if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
309 		    == readers) {
310 			preempt(self);
311 			return (1);
312 		}
313 	}
314 	preempt(self);
315 	return (0);
316 }
317 
318 /*
319  * Attempt to release a writer lock.  Return true on success.
320  */
321 static int
322 write_unlock_try(rwlock_t *rwlp)
323 {
324 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
325 	uint32_t readers;
326 	ulwp_t *self = curthread;
327 
328 	no_preempt(self);
329 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
330 		if (atomic_cas_32(rwstate, readers, 0) == readers) {
331 			preempt(self);
332 			return (1);
333 		}
334 	}
335 	preempt(self);
336 	return (0);
337 }
338 
339 /*
340  * Wake up thread(s) sleeping on the rwlock queue and then
341  * drop the queue lock.  Return non-zero if we wake up someone.
342  * This is called when a thread releases a lock that appears to have waiters.
343  */
344 static int
345 rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
346 {
347 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
348 	uint32_t readers;
349 	uint32_t writers;
350 	ulwp_t **ulwpp;
351 	ulwp_t *ulwp;
352 	ulwp_t *prev;
353 	int nlwpid = 0;
354 	int more;
355 	int maxlwps = MAXLWPS;
356 	lwpid_t buffer[MAXLWPS];
357 	lwpid_t *lwpid = buffer;
358 
359 	readers = *rwstate;
360 	ASSERT_CONSISTENT_STATE(readers);
361 	if (!(readers & URW_HAS_WAITERS)) {
362 		queue_unlock(qp);
363 		return (0);
364 	}
365 	readers &= URW_READERS_MASK;
366 	writers = 0;
367 
368 	/*
369 	 * Examine the queue of waiters in priority order and prepare
370 	 * to wake up as many readers as we encounter before encountering
371 	 * a writer.  If the highest priority thread on the queue is a
372 	 * writer, stop there and wake it up.
373 	 *
374 	 * We keep track of lwpids that are to be unparked in lwpid[].
375 	 * __lwp_unpark_all() is called to unpark all of them after
376 	 * they have been removed from the sleep queue and the sleep
377 	 * queue lock has been dropped.  If we run out of space in our
378 	 * on-stack buffer, we need to allocate more but we can't call
379 	 * lmalloc() because we are holding a queue lock when the overflow
380 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
381 	 * either because the application may have allocated a small
382 	 * stack and we don't want to overrun the stack.  So we call
383 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
384 	 * system call directly since that path acquires no locks.
385 	 */
386 	while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
387 		ulwp = *ulwpp;
388 		ASSERT(ulwp->ul_wchan == rwlp);
389 		if (ulwp->ul_writer) {
390 			if (writers != 0 || readers != 0)
391 				break;
392 			/* one writer to wake */
393 			writers++;
394 		} else {
395 			if (writers != 0)
396 				break;
397 			/* at least one reader to wake */
398 			readers++;
399 			if (nlwpid == maxlwps)
400 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
401 		}
402 		queue_unlink(qp, ulwpp, prev);
403 		ulwp->ul_sleepq = NULL;
404 		ulwp->ul_wchan = NULL;
405 		lwpid[nlwpid++] = ulwp->ul_lwpid;
406 	}
407 	if (ulwpp == NULL)
408 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
409 	if (nlwpid == 0) {
410 		queue_unlock(qp);
411 	} else {
412 		ulwp_t *self = curthread;
413 		no_preempt(self);
414 		queue_unlock(qp);
415 		if (nlwpid == 1)
416 			(void) __lwp_unpark(lwpid[0]);
417 		else
418 			(void) __lwp_unpark_all(lwpid, nlwpid);
419 		preempt(self);
420 	}
421 	if (lwpid != buffer)
422 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
423 	return (nlwpid != 0);
424 }
425 
426 /*
427  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
428  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
429  *
430  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
431  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
432  * released, and if they need to sleep will release the mutex first. In the
433  * event of a spurious wakeup, these will return EAGAIN (because it is much
434  * easier for us to re-acquire the mutex here).
435  */
436 int
437 shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
438 {
439 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
440 	mutex_t *mp = &rwlp->mutex;
441 	uint32_t readers;
442 	int try_flag;
443 	int error;
444 
445 	try_flag = (rd_wr & TRY_FLAG);
446 	rd_wr &= ~TRY_FLAG;
447 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
448 
449 	if (!try_flag) {
450 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
451 	}
452 
453 	do {
454 		if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
455 			error = EBUSY;
456 			break;
457 		}
458 		if ((error = mutex_lock(mp)) != 0)
459 			break;
460 		if (rd_wr == READ_LOCK) {
461 			if (read_lock_try(rwlp, 0)) {
462 				(void) mutex_unlock(mp);
463 				break;
464 			}
465 		} else {
466 			if (write_lock_try(rwlp, 0)) {
467 				(void) mutex_unlock(mp);
468 				break;
469 			}
470 		}
471 		atomic_or_32(rwstate, URW_HAS_WAITERS);
472 		readers = *rwstate;
473 		ASSERT_CONSISTENT_STATE(readers);
474 		/*
475 		 * The calls to __lwp_rwlock_*() below will release the mutex,
476 		 * so we need a dtrace probe here.
477 		 */
478 		mp->mutex_owner = 0;
479 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
480 		/*
481 		 * The waiters bit may be inaccurate.
482 		 * Only the kernel knows for sure.
483 		 */
484 		if (rd_wr == READ_LOCK) {
485 			if (try_flag)
486 				error = __lwp_rwlock_tryrdlock(rwlp);
487 			else
488 				error = __lwp_rwlock_rdlock(rwlp, tsp);
489 		} else {
490 			if (try_flag)
491 				error = __lwp_rwlock_trywrlock(rwlp);
492 			else
493 				error = __lwp_rwlock_wrlock(rwlp, tsp);
494 		}
495 	} while (error == EAGAIN || error == EINTR);
496 
497 	if (!try_flag) {
498 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
499 	}
500 
501 	return (error);
502 }
503 
504 /*
505  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
506  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
507  */
508 int
509 rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
510 {
511 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
512 	uint32_t readers;
513 	ulwp_t *self = curthread;
514 	queue_head_t *qp;
515 	ulwp_t *ulwp;
516 	int try_flag;
517 	int ignore_waiters_flag;
518 	int error = 0;
519 
520 	try_flag = (rd_wr & TRY_FLAG);
521 	rd_wr &= ~TRY_FLAG;
522 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
523 
524 	if (!try_flag) {
525 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
526 	}
527 
528 	qp = queue_lock(rwlp, MX);
529 	/* initial attempt to acquire the lock fails if there are waiters */
530 	ignore_waiters_flag = 0;
531 	while (error == 0) {
532 		if (rd_wr == READ_LOCK) {
533 			if (read_lock_try(rwlp, ignore_waiters_flag))
534 				break;
535 		} else {
536 			if (write_lock_try(rwlp, ignore_waiters_flag))
537 				break;
538 		}
539 		/* subsequent attempts do not fail due to waiters */
540 		ignore_waiters_flag = 1;
541 		atomic_or_32(rwstate, URW_HAS_WAITERS);
542 		readers = *rwstate;
543 		ASSERT_CONSISTENT_STATE(readers);
544 		if ((readers & URW_WRITE_LOCKED) ||
545 		    (rd_wr == WRITE_LOCK &&
546 		    (readers & URW_READERS_MASK) != 0))
547 			/* EMPTY */;	/* somebody holds the lock */
548 		else if ((ulwp = queue_waiter(qp)) == NULL) {
549 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
550 			continue;	/* no queued waiters, try again */
551 		} else {
552 			/*
553 			 * Do a priority check on the queued waiter (the
554 			 * highest priority thread on the queue) to see
555 			 * if we should defer to him or just grab the lock.
556 			 */
557 			int our_pri = real_priority(self);
558 			int his_pri = real_priority(ulwp);
559 
560 			if (rd_wr == WRITE_LOCK) {
561 				/*
562 				 * We defer to a queued thread that has
563 				 * a higher priority than ours.
564 				 */
565 				if (his_pri <= our_pri)
566 					continue;	/* try again */
567 			} else {
568 				/*
569 				 * We defer to a queued thread that has
570 				 * a higher priority than ours or that
571 				 * is a writer whose priority equals ours.
572 				 */
573 				if (his_pri < our_pri ||
574 				    (his_pri == our_pri && !ulwp->ul_writer))
575 					continue;	/* try again */
576 			}
577 		}
578 		/*
579 		 * We are about to block.
580 		 * If we're doing a trylock, return EBUSY instead.
581 		 */
582 		if (try_flag) {
583 			error = EBUSY;
584 			break;
585 		}
586 		/*
587 		 * Enqueue writers ahead of readers.
588 		 */
589 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
590 		enqueue(qp, self, 0);
591 		set_parking_flag(self, 1);
592 		queue_unlock(qp);
593 		if ((error = __lwp_park(tsp, 0)) == EINTR)
594 			error = ignore_waiters_flag = 0;
595 		set_parking_flag(self, 0);
596 		qp = queue_lock(rwlp, MX);
597 		if (self->ul_sleepq && dequeue_self(qp) == 0)
598 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
599 		self->ul_writer = 0;
600 	}
601 
602 	queue_unlock(qp);
603 
604 	if (!try_flag) {
605 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
606 	}
607 
608 	return (error);
609 }
610 
611 int
612 rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
613 {
614 	ulwp_t *self = curthread;
615 	uberdata_t *udp = self->ul_uberdata;
616 	readlock_t *readlockp;
617 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
618 	int error;
619 
620 	/*
621 	 * If we already hold a readers lock on this rwlock,
622 	 * just increment our reference count and return.
623 	 */
624 	sigoff(self);
625 	readlockp = rwl_entry(rwlp);
626 	if (readlockp->rd_count != 0) {
627 		if (readlockp->rd_count == READ_LOCK_MAX) {
628 			sigon(self);
629 			error = EAGAIN;
630 			goto out;
631 		}
632 		sigon(self);
633 		error = 0;
634 		goto out;
635 	}
636 	sigon(self);
637 
638 	/*
639 	 * If we hold the writer lock, bail out.
640 	 */
641 	if (rw_write_is_held(rwlp)) {
642 		if (self->ul_error_detection)
643 			rwlock_error(rwlp, "rwlock_rdlock",
644 			    "calling thread owns the writer lock");
645 		error = EDEADLK;
646 		goto out;
647 	}
648 
649 	if (read_lock_try(rwlp, 0))
650 		error = 0;
651 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
652 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
653 	else						/* user-level */
654 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
655 
656 out:
657 	if (error == 0) {
658 		sigoff(self);
659 		rwl_entry(rwlp)->rd_count++;
660 		sigon(self);
661 		if (rwsp)
662 			tdb_incr(rwsp->rw_rdlock);
663 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
664 	} else {
665 		DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
666 	}
667 
668 	return (error);
669 }
670 
671 #pragma weak rw_rdlock = __rw_rdlock
672 #pragma weak _rw_rdlock = __rw_rdlock
673 #pragma weak pthread_rwlock_rdlock = __rw_rdlock
674 #pragma weak _pthread_rwlock_rdlock = __rw_rdlock
675 int
676 __rw_rdlock(rwlock_t *rwlp)
677 {
678 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
679 	return (rw_rdlock_impl(rwlp, NULL));
680 }
681 
682 void
683 lrw_rdlock(rwlock_t *rwlp)
684 {
685 	enter_critical(curthread);
686 	(void) rw_rdlock_impl(rwlp, NULL);
687 }
688 
689 #pragma weak pthread_rwlock_reltimedrdlock_np = \
690 	_pthread_rwlock_reltimedrdlock_np
691 int
692 _pthread_rwlock_reltimedrdlock_np(rwlock_t *rwlp, const timespec_t *reltime)
693 {
694 	timespec_t tslocal = *reltime;
695 	int error;
696 
697 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
698 	error = rw_rdlock_impl(rwlp, &tslocal);
699 	if (error == ETIME)
700 		error = ETIMEDOUT;
701 	return (error);
702 }
703 
704 #pragma weak pthread_rwlock_timedrdlock = _pthread_rwlock_timedrdlock
705 int
706 _pthread_rwlock_timedrdlock(rwlock_t *rwlp, const timespec_t *abstime)
707 {
708 	timespec_t tslocal;
709 	int error;
710 
711 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
712 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
713 	error = rw_rdlock_impl(rwlp, &tslocal);
714 	if (error == ETIME)
715 		error = ETIMEDOUT;
716 	return (error);
717 }
718 
719 int
720 rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
721 {
722 	ulwp_t *self = curthread;
723 	uberdata_t *udp = self->ul_uberdata;
724 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
725 	int error;
726 
727 	/*
728 	 * If we hold a readers lock on this rwlock, bail out.
729 	 */
730 	if (rw_read_is_held(rwlp)) {
731 		if (self->ul_error_detection)
732 			rwlock_error(rwlp, "rwlock_wrlock",
733 			    "calling thread owns the readers lock");
734 		error = EDEADLK;
735 		goto out;
736 	}
737 
738 	/*
739 	 * If we hold the writer lock, bail out.
740 	 */
741 	if (rw_write_is_held(rwlp)) {
742 		if (self->ul_error_detection)
743 			rwlock_error(rwlp, "rwlock_wrlock",
744 			    "calling thread owns the writer lock");
745 		error = EDEADLK;
746 		goto out;
747 	}
748 
749 	if (write_lock_try(rwlp, 0))
750 		error = 0;
751 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
752 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
753 	else						/* user-level */
754 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
755 
756 out:
757 	if (error == 0) {
758 		rwlp->rwlock_owner = (uintptr_t)self;
759 		if (rwlp->rwlock_type == USYNC_PROCESS)
760 			rwlp->rwlock_ownerpid = udp->pid;
761 		if (rwsp) {
762 			tdb_incr(rwsp->rw_wrlock);
763 			rwsp->rw_wrlock_begin_hold = gethrtime();
764 		}
765 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
766 	} else {
767 		DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
768 	}
769 	return (error);
770 }
771 
772 #pragma weak rw_wrlock = __rw_wrlock
773 #pragma weak _rw_wrlock = __rw_wrlock
774 #pragma weak pthread_rwlock_wrlock = __rw_wrlock
775 #pragma weak _pthread_rwlock_wrlock = __rw_wrlock
776 int
777 __rw_wrlock(rwlock_t *rwlp)
778 {
779 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
780 	return (rw_wrlock_impl(rwlp, NULL));
781 }
782 
783 void
784 lrw_wrlock(rwlock_t *rwlp)
785 {
786 	enter_critical(curthread);
787 	(void) rw_wrlock_impl(rwlp, NULL);
788 }
789 
790 #pragma weak pthread_rwlock_reltimedwrlock_np = \
791 	_pthread_rwlock_reltimedwrlock_np
792 int
793 _pthread_rwlock_reltimedwrlock_np(rwlock_t *rwlp, const timespec_t *reltime)
794 {
795 	timespec_t tslocal = *reltime;
796 	int error;
797 
798 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
799 	error = rw_wrlock_impl(rwlp, &tslocal);
800 	if (error == ETIME)
801 		error = ETIMEDOUT;
802 	return (error);
803 }
804 
805 #pragma weak pthread_rwlock_timedwrlock = _pthread_rwlock_timedwrlock
806 int
807 _pthread_rwlock_timedwrlock(rwlock_t *rwlp, const timespec_t *abstime)
808 {
809 	timespec_t tslocal;
810 	int error;
811 
812 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
813 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
814 	error = rw_wrlock_impl(rwlp, &tslocal);
815 	if (error == ETIME)
816 		error = ETIMEDOUT;
817 	return (error);
818 }
819 
820 #pragma weak rw_tryrdlock = __rw_tryrdlock
821 #pragma weak _rw_tryrdlock = __rw_tryrdlock
822 #pragma weak pthread_rwlock_tryrdlock = __rw_tryrdlock
823 #pragma weak _pthread_rwlock_tryrdlock = __rw_tryrdlock
824 int
825 __rw_tryrdlock(rwlock_t *rwlp)
826 {
827 	ulwp_t *self = curthread;
828 	uberdata_t *udp = self->ul_uberdata;
829 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
830 	readlock_t *readlockp;
831 	int error;
832 
833 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
834 
835 	if (rwsp)
836 		tdb_incr(rwsp->rw_rdlock_try);
837 
838 	/*
839 	 * If we already hold a readers lock on this rwlock,
840 	 * just increment our reference count and return.
841 	 */
842 	sigoff(self);
843 	readlockp = rwl_entry(rwlp);
844 	if (readlockp->rd_count != 0) {
845 		if (readlockp->rd_count == READ_LOCK_MAX) {
846 			sigon(self);
847 			error = EAGAIN;
848 			goto out;
849 		}
850 		sigon(self);
851 		error = 0;
852 		goto out;
853 	}
854 	sigon(self);
855 
856 	if (read_lock_try(rwlp, 0))
857 		error = 0;
858 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
859 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
860 	else						/* user-level */
861 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
862 
863 out:
864 	if (error == 0) {
865 		sigoff(self);
866 		rwl_entry(rwlp)->rd_count++;
867 		sigon(self);
868 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
869 	} else {
870 		if (rwsp)
871 			tdb_incr(rwsp->rw_rdlock_try_fail);
872 		if (error != EBUSY) {
873 			DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
874 			    error);
875 		}
876 	}
877 
878 	return (error);
879 }
880 
881 #pragma weak rw_trywrlock = __rw_trywrlock
882 #pragma weak _rw_trywrlock = __rw_trywrlock
883 #pragma weak pthread_rwlock_trywrlock = __rw_trywrlock
884 #pragma weak _pthread_rwlock_trywrlock = __rw_trywrlock
885 int
886 __rw_trywrlock(rwlock_t *rwlp)
887 {
888 	ulwp_t *self = curthread;
889 	uberdata_t *udp = self->ul_uberdata;
890 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
891 	int error;
892 
893 	ASSERT(!self->ul_critical || self->ul_bindflags);
894 
895 	if (rwsp)
896 		tdb_incr(rwsp->rw_wrlock_try);
897 
898 	if (write_lock_try(rwlp, 0))
899 		error = 0;
900 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
901 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
902 	else						/* user-level */
903 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
904 
905 	if (error == 0) {
906 		rwlp->rwlock_owner = (uintptr_t)self;
907 		if (rwlp->rwlock_type == USYNC_PROCESS)
908 			rwlp->rwlock_ownerpid = udp->pid;
909 		if (rwsp)
910 			rwsp->rw_wrlock_begin_hold = gethrtime();
911 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
912 	} else {
913 		if (rwsp)
914 			tdb_incr(rwsp->rw_wrlock_try_fail);
915 		if (error != EBUSY) {
916 			DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
917 			    error);
918 		}
919 	}
920 	return (error);
921 }
922 
923 #pragma weak rw_unlock = __rw_unlock
924 #pragma weak _rw_unlock = __rw_unlock
925 #pragma weak pthread_rwlock_unlock = __rw_unlock
926 #pragma weak _pthread_rwlock_unlock = __rw_unlock
927 int
928 __rw_unlock(rwlock_t *rwlp)
929 {
930 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
931 	uint32_t readers;
932 	ulwp_t *self = curthread;
933 	uberdata_t *udp = self->ul_uberdata;
934 	tdb_rwlock_stats_t *rwsp;
935 	queue_head_t *qp;
936 	int rd_wr;
937 	int waked = 0;
938 
939 	readers = *rwstate;
940 	ASSERT_CONSISTENT_STATE(readers);
941 	if (readers & URW_WRITE_LOCKED) {
942 		rd_wr = WRITE_LOCK;
943 		readers = 0;
944 	} else {
945 		rd_wr = READ_LOCK;
946 		readers &= URW_READERS_MASK;
947 	}
948 
949 	if (rd_wr == WRITE_LOCK) {
950 		/*
951 		 * Since the writer lock is held, we'd better be
952 		 * holding it, else we cannot legitimately be here.
953 		 */
954 		if (!rw_write_is_held(rwlp)) {
955 			if (self->ul_error_detection)
956 				rwlock_error(rwlp, "rwlock_unlock",
957 				    "writer lock held, "
958 				    "but not by the calling thread");
959 			return (EPERM);
960 		}
961 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
962 			if (rwsp->rw_wrlock_begin_hold)
963 				rwsp->rw_wrlock_hold_time +=
964 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
965 			rwsp->rw_wrlock_begin_hold = 0;
966 		}
967 		rwlp->rwlock_owner = 0;
968 		rwlp->rwlock_ownerpid = 0;
969 	} else if (readers > 0) {
970 		/*
971 		 * A readers lock is held; if we don't hold one, bail out.
972 		 */
973 		readlock_t *readlockp;
974 
975 		sigoff(self);
976 		readlockp = rwl_entry(rwlp);
977 		if (readlockp->rd_count == 0) {
978 			sigon(self);
979 			if (self->ul_error_detection)
980 				rwlock_error(rwlp, "rwlock_unlock",
981 				    "readers lock held, "
982 				    "but not by the calling thread");
983 			return (EPERM);
984 		}
985 		/*
986 		 * If we hold more than one readers lock on this rwlock,
987 		 * just decrement our reference count and return.
988 		 */
989 		if (--readlockp->rd_count != 0) {
990 			sigon(self);
991 			goto out;
992 		}
993 		sigon(self);
994 	} else {
995 		/*
996 		 * This is a usage error.
997 		 * No thread should release an unowned lock.
998 		 */
999 		if (self->ul_error_detection)
1000 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
1001 		return (EPERM);
1002 	}
1003 
1004 	if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
1005 		/* EMPTY */;
1006 	} else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
1007 		/* EMPTY */;
1008 	} else if (rwlp->rwlock_type == USYNC_PROCESS) {
1009 		(void) mutex_lock(&rwlp->mutex);
1010 		(void) __lwp_rwlock_unlock(rwlp);
1011 		(void) mutex_unlock(&rwlp->mutex);
1012 		waked = 1;
1013 	} else {
1014 		qp = queue_lock(rwlp, MX);
1015 		if (rd_wr == READ_LOCK)
1016 			atomic_dec_32(rwstate);
1017 		else
1018 			atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
1019 		waked = rw_queue_release(qp, rwlp);
1020 	}
1021 
1022 out:
1023 	DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
1024 
1025 	/*
1026 	 * Yield to the thread we just waked up, just in case we might
1027 	 * be about to grab the rwlock again immediately upon return.
1028 	 * This is pretty weak but it helps on a uniprocessor and also
1029 	 * when cpu affinity has assigned both ourself and the other
1030 	 * thread to the same CPU.  Note that lwp_yield() will yield
1031 	 * the processor only if the writer is at the same or higher
1032 	 * priority than ourself.  This provides more balanced program
1033 	 * behavior; it doesn't guarantee acquisition of the lock by
1034 	 * the pending writer.
1035 	 */
1036 	if (waked)
1037 		yield();
1038 	return (0);
1039 }
1040 
1041 void
1042 lrw_unlock(rwlock_t *rwlp)
1043 {
1044 	(void) __rw_unlock(rwlp);
1045 	exit_critical(curthread);
1046 }
1047