xref: /titanic_52/usr/src/lib/libc/port/threads/synch.c (revision 9db67a327daf1243e630c20b81978ffd2a7baad7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <sys/rtpriocntl.h>
32 #include <sys/sdt.h>
33 #include <atomic.h>
34 
35 #if defined(THREAD_DEBUG)
36 #define	INCR32(x)	(((x) != UINT32_MAX)? (x)++ : 0)
37 #define	INCR(x)		((x)++)
38 #define	DECR(x)		((x)--)
39 #define	MAXINCR(m, x)	((m < ++x)? (m = x) : 0)
40 #else
41 #define	INCR32(x)
42 #define	INCR(x)
43 #define	DECR(x)
44 #define	MAXINCR(m, x)
45 #endif
46 
47 /*
48  * This mutex is initialized to be held by lwp#1.
49  * It is used to block a thread that has returned from a mutex_lock()
50  * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error.
51  */
52 mutex_t	stall_mutex = DEFAULTMUTEX;
53 
54 static int shared_mutex_held(mutex_t *);
55 static int mutex_queuelock_adaptive(mutex_t *);
56 static void mutex_wakeup_all(mutex_t *);
57 
58 /*
59  * Lock statistics support functions.
60  */
61 void
62 record_begin_hold(tdb_mutex_stats_t *msp)
63 {
64 	tdb_incr(msp->mutex_lock);
65 	msp->mutex_begin_hold = gethrtime();
66 }
67 
68 hrtime_t
69 record_hold_time(tdb_mutex_stats_t *msp)
70 {
71 	hrtime_t now = gethrtime();
72 
73 	if (msp->mutex_begin_hold)
74 		msp->mutex_hold_time += now - msp->mutex_begin_hold;
75 	msp->mutex_begin_hold = 0;
76 	return (now);
77 }
78 
79 /*
80  * Called once at library initialization.
81  */
82 void
83 mutex_setup(void)
84 {
85 	if (set_lock_byte(&stall_mutex.mutex_lockw))
86 		thr_panic("mutex_setup() cannot acquire stall_mutex");
87 	stall_mutex.mutex_owner = (uintptr_t)curthread;
88 }
89 
90 /*
91  * The default spin count of 1000 is experimentally determined.
92  * On sun4u machines with any number of processors it could be raised
93  * to 10,000 but that (experimentally) makes almost no difference.
94  * The environment variable:
95  *	_THREAD_ADAPTIVE_SPIN=count
96  * can be used to override and set the count in the range [0 .. 1,000,000].
97  */
98 int	thread_adaptive_spin = 1000;
99 uint_t	thread_max_spinners = 100;
100 int	thread_queue_verify = 0;
101 static	int	ncpus;
102 
103 /*
104  * Distinguish spinning for queue locks from spinning for regular locks.
105  * We try harder to acquire queue locks by spinning.
106  * The environment variable:
107  *	_THREAD_QUEUE_SPIN=count
108  * can be used to override and set the count in the range [0 .. 1,000,000].
109  */
110 int	thread_queue_spin = 10000;
111 
112 #define	ALL_ATTRIBUTES				\
113 	(LOCK_RECURSIVE | LOCK_ERRORCHECK |	\
114 	LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT |	\
115 	LOCK_ROBUST)
116 
117 /*
118  * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST,
119  * augmented by zero or more the flags:
120  *	LOCK_RECURSIVE
121  *	LOCK_ERRORCHECK
122  *	LOCK_PRIO_INHERIT
123  *	LOCK_PRIO_PROTECT
124  *	LOCK_ROBUST
125  */
126 #pragma weak _mutex_init = mutex_init
127 /* ARGSUSED2 */
128 int
129 mutex_init(mutex_t *mp, int type, void *arg)
130 {
131 	int basetype = (type & ~ALL_ATTRIBUTES);
132 	const pcclass_t *pccp;
133 	int error = 0;
134 	int ceil;
135 
136 	if (basetype == USYNC_PROCESS_ROBUST) {
137 		/*
138 		 * USYNC_PROCESS_ROBUST is a deprecated historical type.
139 		 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but
140 		 * retain the USYNC_PROCESS_ROBUST flag so we can return
141 		 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST
142 		 * mutexes will ever draw ELOCKUNMAPPED).
143 		 */
144 		type |= (USYNC_PROCESS | LOCK_ROBUST);
145 		basetype = USYNC_PROCESS;
146 	}
147 
148 	if (type & LOCK_PRIO_PROTECT)
149 		pccp = get_info_by_policy(SCHED_FIFO);
150 	if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) ||
151 	    (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT))
152 	    == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) ||
153 	    ((type & LOCK_PRIO_PROTECT) &&
154 	    ((ceil = *(int *)arg) < pccp->pcc_primin ||
155 	    ceil > pccp->pcc_primax))) {
156 		error = EINVAL;
157 	} else if (type & LOCK_ROBUST) {
158 		/*
159 		 * Callers of mutex_init() with the LOCK_ROBUST attribute
160 		 * are required to pass an initially all-zero mutex.
161 		 * Multiple calls to mutex_init() are allowed; all but
162 		 * the first return EBUSY.  A call to mutex_init() is
163 		 * allowed to make an inconsistent robust lock consistent
164 		 * (for historical usage, even though the proper interface
165 		 * for this is mutex_consistent()).  Note that we use
166 		 * atomic_or_16() to set the LOCK_INITED flag so as
167 		 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc).
168 		 */
169 		if (!(mp->mutex_flag & LOCK_INITED)) {
170 			mp->mutex_type = (uint8_t)type;
171 			atomic_or_16(&mp->mutex_flag, LOCK_INITED);
172 			mp->mutex_magic = MUTEX_MAGIC;
173 		} else if (type != mp->mutex_type ||
174 		    ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) {
175 			error = EINVAL;
176 		} else if (mutex_consistent(mp) != 0) {
177 			error = EBUSY;
178 		}
179 		/* register a process robust mutex with the kernel */
180 		if (basetype == USYNC_PROCESS)
181 			register_lock(mp);
182 	} else {
183 		(void) memset(mp, 0, sizeof (*mp));
184 		mp->mutex_type = (uint8_t)type;
185 		mp->mutex_flag = LOCK_INITED;
186 		mp->mutex_magic = MUTEX_MAGIC;
187 	}
188 
189 	if (error == 0 && (type & LOCK_PRIO_PROTECT)) {
190 		mp->mutex_ceiling = ceil;
191 	}
192 
193 	return (error);
194 }
195 
196 /*
197  * Delete mp from list of ceiling mutexes owned by curthread.
198  * Return 1 if the head of the chain was updated.
199  */
200 int
201 _ceil_mylist_del(mutex_t *mp)
202 {
203 	ulwp_t *self = curthread;
204 	mxchain_t **mcpp;
205 	mxchain_t *mcp;
206 
207 	for (mcpp = &self->ul_mxchain;
208 	    (mcp = *mcpp) != NULL;
209 	    mcpp = &mcp->mxchain_next) {
210 		if (mcp->mxchain_mx == mp) {
211 			*mcpp = mcp->mxchain_next;
212 			lfree(mcp, sizeof (*mcp));
213 			return (mcpp == &self->ul_mxchain);
214 		}
215 	}
216 	return (0);
217 }
218 
219 /*
220  * Add mp to the list of ceiling mutexes owned by curthread.
221  * Return ENOMEM if no memory could be allocated.
222  */
223 int
224 _ceil_mylist_add(mutex_t *mp)
225 {
226 	ulwp_t *self = curthread;
227 	mxchain_t *mcp;
228 
229 	if ((mcp = lmalloc(sizeof (*mcp))) == NULL)
230 		return (ENOMEM);
231 	mcp->mxchain_mx = mp;
232 	mcp->mxchain_next = self->ul_mxchain;
233 	self->ul_mxchain = mcp;
234 	return (0);
235 }
236 
237 /*
238  * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below.
239  */
240 static void
241 set_rt_priority(ulwp_t *self, int prio)
242 {
243 	pcparms_t pcparm;
244 
245 	pcparm.pc_cid = self->ul_rtclassid;
246 	((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE;
247 	((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
248 	(void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm);
249 }
250 
251 /*
252  * Inherit priority from ceiling.
253  * This changes the effective priority, not the assigned priority.
254  */
255 void
256 _ceil_prio_inherit(int prio)
257 {
258 	ulwp_t *self = curthread;
259 
260 	self->ul_epri = prio;
261 	set_rt_priority(self, prio);
262 }
263 
264 /*
265  * Waive inherited ceiling priority.  Inherit from head of owned ceiling locks
266  * if holding at least one ceiling lock.  If no ceiling locks are held at this
267  * point, disinherit completely, reverting back to assigned priority.
268  */
269 void
270 _ceil_prio_waive(void)
271 {
272 	ulwp_t *self = curthread;
273 	mxchain_t *mcp = self->ul_mxchain;
274 	int prio;
275 
276 	if (mcp == NULL) {
277 		prio = self->ul_pri;
278 		self->ul_epri = 0;
279 	} else {
280 		prio = mcp->mxchain_mx->mutex_ceiling;
281 		self->ul_epri = prio;
282 	}
283 	set_rt_priority(self, prio);
284 }
285 
286 /*
287  * Clear the lock byte.  Retain the waiters byte and the spinners byte.
288  * Return the old value of the lock word.
289  */
290 static uint32_t
291 clear_lockbyte(volatile uint32_t *lockword)
292 {
293 	uint32_t old;
294 	uint32_t new;
295 
296 	do {
297 		old = *lockword;
298 		new = old & ~LOCKMASK;
299 	} while (atomic_cas_32(lockword, old, new) != old);
300 
301 	return (old);
302 }
303 
304 /*
305  * Same as clear_lockbyte(), but operates on mutex_lockword64.
306  * The mutex_ownerpid field is cleared along with the lock byte.
307  */
308 static uint64_t
309 clear_lockbyte64(volatile uint64_t *lockword64)
310 {
311 	uint64_t old;
312 	uint64_t new;
313 
314 	do {
315 		old = *lockword64;
316 		new = old & ~LOCKMASK64;
317 	} while (atomic_cas_64(lockword64, old, new) != old);
318 
319 	return (old);
320 }
321 
322 /*
323  * Similar to set_lock_byte(), which only tries to set the lock byte.
324  * Here, we attempt to set the lock byte AND the mutex_ownerpid,
325  * keeping the remaining bytes constant.
326  */
327 static int
328 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid)
329 {
330 	uint64_t old;
331 	uint64_t new;
332 
333 	old = *lockword64 & ~LOCKMASK64;
334 	new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64;
335 	if (atomic_cas_64(lockword64, old, new) == old)
336 		return (LOCKCLEAR);
337 
338 	return (LOCKSET);
339 }
340 
341 /*
342  * Increment the spinners count in the mutex lock word.
343  * Return 0 on success.  Return -1 if the count would overflow.
344  */
345 static int
346 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners)
347 {
348 	uint32_t old;
349 	uint32_t new;
350 
351 	do {
352 		old = *lockword;
353 		if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners)
354 			return (-1);
355 		new = old + (1 << SPINNERSHIFT);
356 	} while (atomic_cas_32(lockword, old, new) != old);
357 
358 	return (0);
359 }
360 
361 /*
362  * Decrement the spinners count in the mutex lock word.
363  * Return the new value of the lock word.
364  */
365 static uint32_t
366 spinners_decr(volatile uint32_t *lockword)
367 {
368 	uint32_t old;
369 	uint32_t new;
370 
371 	do {
372 		new = old = *lockword;
373 		if (new & SPINNERMASK)
374 			new -= (1 << SPINNERSHIFT);
375 	} while (atomic_cas_32(lockword, old, new) != old);
376 
377 	return (new);
378 }
379 
380 /*
381  * Non-preemptive spin locks.  Used by queue_lock().
382  * No lock statistics are gathered for these locks.
383  * No DTrace probes are provided for these locks.
384  */
385 void
386 spin_lock_set(mutex_t *mp)
387 {
388 	ulwp_t *self = curthread;
389 
390 	no_preempt(self);
391 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
392 		mp->mutex_owner = (uintptr_t)self;
393 		return;
394 	}
395 	/*
396 	 * Spin for a while, attempting to acquire the lock.
397 	 */
398 	INCR32(self->ul_spin_lock_spin);
399 	if (mutex_queuelock_adaptive(mp) == 0 ||
400 	    set_lock_byte(&mp->mutex_lockw) == 0) {
401 		mp->mutex_owner = (uintptr_t)self;
402 		return;
403 	}
404 	/*
405 	 * Try harder if we were previously at a no premption level.
406 	 */
407 	if (self->ul_preempt > 1) {
408 		INCR32(self->ul_spin_lock_spin2);
409 		if (mutex_queuelock_adaptive(mp) == 0 ||
410 		    set_lock_byte(&mp->mutex_lockw) == 0) {
411 			mp->mutex_owner = (uintptr_t)self;
412 			return;
413 		}
414 	}
415 	/*
416 	 * Give up and block in the kernel for the mutex.
417 	 */
418 	INCR32(self->ul_spin_lock_sleep);
419 	(void) ___lwp_mutex_timedlock(mp, NULL);
420 	mp->mutex_owner = (uintptr_t)self;
421 }
422 
423 void
424 spin_lock_clear(mutex_t *mp)
425 {
426 	ulwp_t *self = curthread;
427 
428 	mp->mutex_owner = 0;
429 	if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) {
430 		(void) ___lwp_mutex_wakeup(mp, 0);
431 		INCR32(self->ul_spin_lock_wakeup);
432 	}
433 	preempt(self);
434 }
435 
436 /*
437  * Allocate the sleep queue hash table.
438  */
439 void
440 queue_alloc(void)
441 {
442 	ulwp_t *self = curthread;
443 	uberdata_t *udp = self->ul_uberdata;
444 	queue_head_t *qp;
445 	void *data;
446 	int i;
447 
448 	/*
449 	 * No locks are needed; we call here only when single-threaded.
450 	 */
451 	ASSERT(self == udp->ulwp_one);
452 	ASSERT(!udp->uberflags.uf_mt);
453 	if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t),
454 	    PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0))
455 	    == MAP_FAILED)
456 		thr_panic("cannot allocate thread queue_head table");
457 	udp->queue_head = qp = (queue_head_t *)data;
458 	for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
459 		qp->qh_type = (i < QHASHSIZE)? MX : CV;
460 		qp->qh_lock.mutex_flag = LOCK_INITED;
461 		qp->qh_lock.mutex_magic = MUTEX_MAGIC;
462 		qp->qh_hlist = &qp->qh_def_root;
463 #if defined(THREAD_DEBUG)
464 		qp->qh_hlen = 1;
465 		qp->qh_hmax = 1;
466 #endif
467 	}
468 }
469 
470 #if defined(THREAD_DEBUG)
471 
472 /*
473  * Debugging: verify correctness of a sleep queue.
474  */
475 void
476 QVERIFY(queue_head_t *qp)
477 {
478 	ulwp_t *self = curthread;
479 	uberdata_t *udp = self->ul_uberdata;
480 	queue_root_t *qrp;
481 	ulwp_t *ulwp;
482 	ulwp_t *prev;
483 	uint_t index;
484 	uint32_t cnt;
485 	char qtype;
486 	void *wchan;
487 
488 	ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE);
489 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
490 	for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) {
491 		cnt++;
492 		ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) ||
493 		    (qrp->qr_head == NULL && qrp->qr_tail == NULL));
494 	}
495 	ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt);
496 	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
497 	ASSERT(qp->qh_type == qtype);
498 	if (!thread_queue_verify)
499 		return;
500 	/* real expensive stuff, only for _THREAD_QUEUE_VERIFY */
501 	for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) {
502 		for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL;
503 		    prev = ulwp, ulwp = ulwp->ul_link) {
504 			cnt++;
505 			if (ulwp->ul_writer)
506 				ASSERT(prev == NULL || prev->ul_writer);
507 			ASSERT(ulwp->ul_qtype == qtype);
508 			ASSERT(ulwp->ul_wchan != NULL);
509 			ASSERT(ulwp->ul_sleepq == qp);
510 			wchan = ulwp->ul_wchan;
511 			ASSERT(qrp->qr_wchan == wchan);
512 			index = QUEUE_HASH(wchan, qtype);
513 			ASSERT(&udp->queue_head[index] == qp);
514 		}
515 		ASSERT(qrp->qr_tail == prev);
516 	}
517 	ASSERT(qp->qh_qlen == cnt);
518 }
519 
520 #else	/* THREAD_DEBUG */
521 
522 #define	QVERIFY(qp)
523 
524 #endif	/* THREAD_DEBUG */
525 
526 /*
527  * Acquire a queue head.
528  */
529 queue_head_t *
530 queue_lock(void *wchan, int qtype)
531 {
532 	uberdata_t *udp = curthread->ul_uberdata;
533 	queue_head_t *qp;
534 	queue_root_t *qrp;
535 
536 	ASSERT(qtype == MX || qtype == CV);
537 
538 	/*
539 	 * It is possible that we could be called while still single-threaded.
540 	 * If so, we call queue_alloc() to allocate the queue_head[] array.
541 	 */
542 	if ((qp = udp->queue_head) == NULL) {
543 		queue_alloc();
544 		qp = udp->queue_head;
545 	}
546 	qp += QUEUE_HASH(wchan, qtype);
547 	spin_lock_set(&qp->qh_lock);
548 	for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next)
549 		if (qrp->qr_wchan == wchan)
550 			break;
551 	if (qrp == NULL && qp->qh_def_root.qr_head == NULL) {
552 		/* the default queue root is available; use it */
553 		qrp = &qp->qh_def_root;
554 		qrp->qr_wchan = wchan;
555 		ASSERT(qrp->qr_next == NULL);
556 		ASSERT(qrp->qr_tail == NULL &&
557 		    qrp->qr_rtcount == 0 && qrp->qr_qlen == 0);
558 	}
559 	qp->qh_wchan = wchan;	/* valid until queue_unlock() is called */
560 	qp->qh_root = qrp;	/* valid until queue_unlock() is called */
561 	INCR32(qp->qh_lockcount);
562 	QVERIFY(qp);
563 	return (qp);
564 }
565 
566 /*
567  * Release a queue head.
568  */
569 void
570 queue_unlock(queue_head_t *qp)
571 {
572 	QVERIFY(qp);
573 	spin_lock_clear(&qp->qh_lock);
574 }
575 
576 /*
577  * For rwlock queueing, we must queue writers ahead of readers of the
578  * same priority.  We do this by making writers appear to have a half
579  * point higher priority for purposes of priority comparisons below.
580  */
581 #define	CMP_PRIO(ulwp)	((real_priority(ulwp) << 1) + (ulwp)->ul_writer)
582 
583 void
584 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo)
585 {
586 	queue_root_t *qrp;
587 	ulwp_t **ulwpp;
588 	ulwp_t *next;
589 	int pri = CMP_PRIO(ulwp);
590 
591 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
592 	ASSERT(ulwp->ul_sleepq != qp);
593 
594 	if ((qrp = qp->qh_root) == NULL) {
595 		/* use the thread's queue root for the linkage */
596 		qrp = &ulwp->ul_queue_root;
597 		qrp->qr_next = qp->qh_hlist;
598 		qrp->qr_prev = NULL;
599 		qrp->qr_head = NULL;
600 		qrp->qr_tail = NULL;
601 		qrp->qr_wchan = qp->qh_wchan;
602 		qrp->qr_rtcount = 0;
603 		qrp->qr_qlen = 0;
604 		qrp->qr_qmax = 0;
605 		qp->qh_hlist->qr_prev = qrp;
606 		qp->qh_hlist = qrp;
607 		qp->qh_root = qrp;
608 		MAXINCR(qp->qh_hmax, qp->qh_hlen);
609 	}
610 
611 	/*
612 	 * LIFO queue ordering is unfair and can lead to starvation,
613 	 * but it gives better performance for heavily contended locks.
614 	 * We use thread_queue_fifo (range is 0..8) to determine
615 	 * the frequency of FIFO vs LIFO queuing:
616 	 *	0 : every 256th time	(almost always LIFO)
617 	 *	1 : every 128th time
618 	 *	2 : every 64th  time
619 	 *	3 : every 32nd  time
620 	 *	4 : every 16th  time	(the default value, mostly LIFO)
621 	 *	5 : every 8th   time
622 	 *	6 : every 4th   time
623 	 *	7 : every 2nd   time
624 	 *	8 : every time		(never LIFO, always FIFO)
625 	 * Note that there is always some degree of FIFO ordering.
626 	 * This breaks live lock conditions that occur in applications
627 	 * that are written assuming (incorrectly) that threads acquire
628 	 * locks fairly, that is, in roughly round-robin order.
629 	 * In any event, the queue is maintained in kernel priority order.
630 	 *
631 	 * If force_fifo is non-zero, fifo queueing is forced.
632 	 * SUSV3 requires this for semaphores.
633 	 */
634 	if (qrp->qr_head == NULL) {
635 		/*
636 		 * The queue is empty.  LIFO/FIFO doesn't matter.
637 		 */
638 		ASSERT(qrp->qr_tail == NULL);
639 		ulwpp = &qrp->qr_head;
640 	} else if (force_fifo |
641 	    (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) {
642 		/*
643 		 * Enqueue after the last thread whose priority is greater
644 		 * than or equal to the priority of the thread being queued.
645 		 * Attempt first to go directly onto the tail of the queue.
646 		 */
647 		if (pri <= CMP_PRIO(qrp->qr_tail))
648 			ulwpp = &qrp->qr_tail->ul_link;
649 		else {
650 			for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL;
651 			    ulwpp = &next->ul_link)
652 				if (pri > CMP_PRIO(next))
653 					break;
654 		}
655 	} else {
656 		/*
657 		 * Enqueue before the first thread whose priority is less
658 		 * than or equal to the priority of the thread being queued.
659 		 * Hopefully we can go directly onto the head of the queue.
660 		 */
661 		for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL;
662 		    ulwpp = &next->ul_link)
663 			if (pri >= CMP_PRIO(next))
664 				break;
665 	}
666 	if ((ulwp->ul_link = *ulwpp) == NULL)
667 		qrp->qr_tail = ulwp;
668 	*ulwpp = ulwp;
669 
670 	ulwp->ul_sleepq = qp;
671 	ulwp->ul_wchan = qp->qh_wchan;
672 	ulwp->ul_qtype = qp->qh_type;
673 	if ((ulwp->ul_schedctl != NULL &&
674 	    ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) |
675 	    ulwp->ul_pilocks) {
676 		ulwp->ul_rtqueued = 1;
677 		qrp->qr_rtcount++;
678 	}
679 	MAXINCR(qrp->qr_qmax, qrp->qr_qlen);
680 	MAXINCR(qp->qh_qmax, qp->qh_qlen);
681 }
682 
683 /*
684  * Helper function for queue_slot() and queue_slot_rt().
685  * Try to find a non-suspended thread on the queue.
686  */
687 static ulwp_t **
688 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt)
689 {
690 	ulwp_t *ulwp;
691 	ulwp_t **foundpp = NULL;
692 	int priority = -1;
693 	ulwp_t *prev;
694 	int tpri;
695 
696 	for (prev = NULL;
697 	    (ulwp = *ulwpp) != NULL;
698 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
699 		if (ulwp->ul_stop)	/* skip suspended threads */
700 			continue;
701 		tpri = rt? CMP_PRIO(ulwp) : 0;
702 		if (tpri > priority) {
703 			foundpp = ulwpp;
704 			*prevp = prev;
705 			priority = tpri;
706 			if (!rt)
707 				break;
708 		}
709 	}
710 	return (foundpp);
711 }
712 
713 /*
714  * For real-time, we search the entire queue because the dispatch
715  * (kernel) priorities may have changed since enqueueing.
716  */
717 static ulwp_t **
718 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp)
719 {
720 	ulwp_t **ulwpp = ulwpp_org;
721 	ulwp_t *ulwp = *ulwpp;
722 	ulwp_t **foundpp = ulwpp;
723 	int priority = CMP_PRIO(ulwp);
724 	ulwp_t *prev;
725 	int tpri;
726 
727 	for (prev = ulwp, ulwpp = &ulwp->ul_link;
728 	    (ulwp = *ulwpp) != NULL;
729 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
730 		tpri = CMP_PRIO(ulwp);
731 		if (tpri > priority) {
732 			foundpp = ulwpp;
733 			*prevp = prev;
734 			priority = tpri;
735 		}
736 	}
737 	ulwp = *foundpp;
738 
739 	/*
740 	 * Try not to return a suspended thread.
741 	 * This mimics the old libthread's behavior.
742 	 */
743 	if (ulwp->ul_stop &&
744 	    (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) {
745 		foundpp = ulwpp;
746 		ulwp = *foundpp;
747 	}
748 	ulwp->ul_rt = 1;
749 	return (foundpp);
750 }
751 
752 ulwp_t **
753 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more)
754 {
755 	queue_root_t *qrp;
756 	ulwp_t **ulwpp;
757 	ulwp_t *ulwp;
758 	int rt;
759 
760 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
761 
762 	if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) {
763 		*more = 0;
764 		return (NULL);		/* no lwps on the queue */
765 	}
766 	rt = (qrp->qr_rtcount != 0);
767 	*prevp = NULL;
768 	if (ulwp->ul_link == NULL) {	/* only one lwp on the queue */
769 		*more = 0;
770 		ulwp->ul_rt = rt;
771 		return (&qrp->qr_head);
772 	}
773 	*more = 1;
774 
775 	if (rt)		/* real-time queue */
776 		return (queue_slot_rt(&qrp->qr_head, prevp));
777 	/*
778 	 * Try not to return a suspended thread.
779 	 * This mimics the old libthread's behavior.
780 	 */
781 	if (ulwp->ul_stop &&
782 	    (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) {
783 		ulwp = *ulwpp;
784 		ulwp->ul_rt = 0;
785 		return (ulwpp);
786 	}
787 	/*
788 	 * The common case; just pick the first thread on the queue.
789 	 */
790 	ulwp->ul_rt = 0;
791 	return (&qrp->qr_head);
792 }
793 
794 /*
795  * Common code for unlinking an lwp from a user-level sleep queue.
796  */
797 void
798 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev)
799 {
800 	queue_root_t *qrp = qp->qh_root;
801 	queue_root_t *nqrp;
802 	ulwp_t *ulwp = *ulwpp;
803 	ulwp_t *next;
804 
805 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
806 	ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan);
807 
808 	DECR(qp->qh_qlen);
809 	DECR(qrp->qr_qlen);
810 	if (ulwp->ul_rtqueued) {
811 		ulwp->ul_rtqueued = 0;
812 		qrp->qr_rtcount--;
813 	}
814 	next = ulwp->ul_link;
815 	*ulwpp = next;
816 	ulwp->ul_link = NULL;
817 	if (qrp->qr_tail == ulwp)
818 		qrp->qr_tail = prev;
819 	if (qrp == &ulwp->ul_queue_root) {
820 		/*
821 		 * We can't continue to use the unlinked thread's
822 		 * queue root for the linkage.
823 		 */
824 		queue_root_t *qr_next = qrp->qr_next;
825 		queue_root_t *qr_prev = qrp->qr_prev;
826 
827 		if (qrp->qr_tail) {
828 			/* switch to using the last thread's queue root */
829 			ASSERT(qrp->qr_qlen != 0);
830 			nqrp = &qrp->qr_tail->ul_queue_root;
831 			*nqrp = *qrp;
832 			if (qr_next)
833 				qr_next->qr_prev = nqrp;
834 			if (qr_prev)
835 				qr_prev->qr_next = nqrp;
836 			else
837 				qp->qh_hlist = nqrp;
838 			qp->qh_root = nqrp;
839 		} else {
840 			/* empty queue root; just delete from the hash list */
841 			ASSERT(qrp->qr_qlen == 0);
842 			if (qr_next)
843 				qr_next->qr_prev = qr_prev;
844 			if (qr_prev)
845 				qr_prev->qr_next = qr_next;
846 			else
847 				qp->qh_hlist = qr_next;
848 			qp->qh_root = NULL;
849 			DECR(qp->qh_hlen);
850 		}
851 	}
852 }
853 
854 ulwp_t *
855 dequeue(queue_head_t *qp, int *more)
856 {
857 	ulwp_t **ulwpp;
858 	ulwp_t *ulwp;
859 	ulwp_t *prev;
860 
861 	if ((ulwpp = queue_slot(qp, &prev, more)) == NULL)
862 		return (NULL);
863 	ulwp = *ulwpp;
864 	queue_unlink(qp, ulwpp, prev);
865 	ulwp->ul_sleepq = NULL;
866 	ulwp->ul_wchan = NULL;
867 	return (ulwp);
868 }
869 
870 /*
871  * Return a pointer to the highest priority thread sleeping on wchan.
872  */
873 ulwp_t *
874 queue_waiter(queue_head_t *qp)
875 {
876 	ulwp_t **ulwpp;
877 	ulwp_t *prev;
878 	int more;
879 
880 	if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL)
881 		return (NULL);
882 	return (*ulwpp);
883 }
884 
885 int
886 dequeue_self(queue_head_t *qp)
887 {
888 	ulwp_t *self = curthread;
889 	queue_root_t *qrp;
890 	ulwp_t **ulwpp;
891 	ulwp_t *ulwp;
892 	ulwp_t *prev;
893 	int found = 0;
894 
895 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
896 
897 	/* find self on the sleep queue */
898 	if ((qrp = qp->qh_root) != NULL) {
899 		for (prev = NULL, ulwpp = &qrp->qr_head;
900 		    (ulwp = *ulwpp) != NULL;
901 		    prev = ulwp, ulwpp = &ulwp->ul_link) {
902 			if (ulwp == self) {
903 				queue_unlink(qp, ulwpp, prev);
904 				self->ul_cvmutex = NULL;
905 				self->ul_sleepq = NULL;
906 				self->ul_wchan = NULL;
907 				found = 1;
908 				break;
909 			}
910 		}
911 	}
912 
913 	if (!found)
914 		thr_panic("dequeue_self(): curthread not found on queue");
915 
916 	return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL);
917 }
918 
919 /*
920  * Called from call_user_handler() and _thrp_suspend() to take
921  * ourself off of our sleep queue so we can grab locks.
922  */
923 void
924 unsleep_self(void)
925 {
926 	ulwp_t *self = curthread;
927 	queue_head_t *qp;
928 
929 	/*
930 	 * Calling enter_critical()/exit_critical() here would lead
931 	 * to recursion.  Just manipulate self->ul_critical directly.
932 	 */
933 	self->ul_critical++;
934 	while (self->ul_sleepq != NULL) {
935 		qp = queue_lock(self->ul_wchan, self->ul_qtype);
936 		/*
937 		 * We may have been moved from a CV queue to a
938 		 * mutex queue while we were attempting queue_lock().
939 		 * If so, just loop around and try again.
940 		 * dequeue_self() clears self->ul_sleepq.
941 		 */
942 		if (qp == self->ul_sleepq)
943 			(void) dequeue_self(qp);
944 		queue_unlock(qp);
945 	}
946 	self->ul_writer = 0;
947 	self->ul_critical--;
948 }
949 
950 /*
951  * Common code for calling the the ___lwp_mutex_timedlock() system call.
952  * Returns with mutex_owner and mutex_ownerpid set correctly.
953  */
954 static int
955 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp)
956 {
957 	ulwp_t *self = curthread;
958 	uberdata_t *udp = self->ul_uberdata;
959 	int mtype = mp->mutex_type;
960 	hrtime_t begin_sleep;
961 	int acquired;
962 	int error;
963 
964 	self->ul_sp = stkptr();
965 	self->ul_wchan = mp;
966 	if (__td_event_report(self, TD_SLEEP, udp)) {
967 		self->ul_td_evbuf.eventnum = TD_SLEEP;
968 		self->ul_td_evbuf.eventdata = mp;
969 		tdb_event(TD_SLEEP, udp);
970 	}
971 	if (msp) {
972 		tdb_incr(msp->mutex_sleep);
973 		begin_sleep = gethrtime();
974 	}
975 
976 	DTRACE_PROBE1(plockstat, mutex__block, mp);
977 
978 	for (;;) {
979 		/*
980 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
981 		 * means we successfully acquired the lock.
982 		 */
983 		if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 &&
984 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
985 			acquired = 0;
986 			break;
987 		}
988 
989 		if (mtype & USYNC_PROCESS) {
990 			/*
991 			 * Defend against forkall().  We may be the child,
992 			 * in which case we don't actually own the mutex.
993 			 */
994 			enter_critical(self);
995 			if (mp->mutex_ownerpid == udp->pid) {
996 				mp->mutex_owner = (uintptr_t)self;
997 				exit_critical(self);
998 				acquired = 1;
999 				break;
1000 			}
1001 			exit_critical(self);
1002 		} else {
1003 			mp->mutex_owner = (uintptr_t)self;
1004 			acquired = 1;
1005 			break;
1006 		}
1007 	}
1008 	if (msp)
1009 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
1010 	self->ul_wchan = NULL;
1011 	self->ul_sp = 0;
1012 
1013 	if (acquired) {
1014 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
1015 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1016 	} else {
1017 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
1018 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1019 	}
1020 
1021 	return (error);
1022 }
1023 
1024 /*
1025  * Common code for calling the ___lwp_mutex_trylock() system call.
1026  * Returns with mutex_owner and mutex_ownerpid set correctly.
1027  */
1028 int
1029 mutex_trylock_kernel(mutex_t *mp)
1030 {
1031 	ulwp_t *self = curthread;
1032 	uberdata_t *udp = self->ul_uberdata;
1033 	int mtype = mp->mutex_type;
1034 	int error;
1035 	int acquired;
1036 
1037 	for (;;) {
1038 		/*
1039 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
1040 		 * means we successfully acquired the lock.
1041 		 */
1042 		if ((error = ___lwp_mutex_trylock(mp)) != 0 &&
1043 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
1044 			acquired = 0;
1045 			break;
1046 		}
1047 
1048 		if (mtype & USYNC_PROCESS) {
1049 			/*
1050 			 * Defend against forkall().  We may be the child,
1051 			 * in which case we don't actually own the mutex.
1052 			 */
1053 			enter_critical(self);
1054 			if (mp->mutex_ownerpid == udp->pid) {
1055 				mp->mutex_owner = (uintptr_t)self;
1056 				exit_critical(self);
1057 				acquired = 1;
1058 				break;
1059 			}
1060 			exit_critical(self);
1061 		} else {
1062 			mp->mutex_owner = (uintptr_t)self;
1063 			acquired = 1;
1064 			break;
1065 		}
1066 	}
1067 
1068 	if (acquired) {
1069 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1070 	} else if (error != EBUSY) {
1071 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1072 	}
1073 
1074 	return (error);
1075 }
1076 
1077 volatile sc_shared_t *
1078 setup_schedctl(void)
1079 {
1080 	ulwp_t *self = curthread;
1081 	volatile sc_shared_t *scp;
1082 	sc_shared_t *tmp;
1083 
1084 	if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */
1085 	    !self->ul_vfork &&			/* not a child of vfork() */
1086 	    !self->ul_schedctl_called) {	/* haven't been called before */
1087 		enter_critical(self);
1088 		self->ul_schedctl_called = &self->ul_uberdata->uberflags;
1089 		if ((tmp = __schedctl()) != (sc_shared_t *)(-1))
1090 			self->ul_schedctl = scp = tmp;
1091 		exit_critical(self);
1092 	}
1093 	/*
1094 	 * Unless the call to setup_schedctl() is surrounded
1095 	 * by enter_critical()/exit_critical(), the address
1096 	 * we are returning could be invalid due to a forkall()
1097 	 * having occurred in another thread.
1098 	 */
1099 	return (scp);
1100 }
1101 
1102 /*
1103  * Interfaces from libsched, incorporated into libc.
1104  * libsched.so.1 is now a filter library onto libc.
1105  */
1106 #pragma weak schedctl_lookup = schedctl_init
1107 schedctl_t *
1108 schedctl_init(void)
1109 {
1110 	volatile sc_shared_t *scp = setup_schedctl();
1111 	return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl);
1112 }
1113 
1114 void
1115 schedctl_exit(void)
1116 {
1117 }
1118 
1119 /*
1120  * Contract private interface for java.
1121  * Set up the schedctl data if it doesn't exist yet.
1122  * Return a pointer to the pointer to the schedctl data.
1123  */
1124 volatile sc_shared_t *volatile *
1125 _thr_schedctl(void)
1126 {
1127 	ulwp_t *self = curthread;
1128 	volatile sc_shared_t *volatile *ptr;
1129 
1130 	if (self->ul_vfork)
1131 		return (NULL);
1132 	if (*(ptr = &self->ul_schedctl) == NULL)
1133 		(void) setup_schedctl();
1134 	return (ptr);
1135 }
1136 
1137 /*
1138  * Block signals and attempt to block preemption.
1139  * no_preempt()/preempt() must be used in pairs but can be nested.
1140  */
1141 void
1142 no_preempt(ulwp_t *self)
1143 {
1144 	volatile sc_shared_t *scp;
1145 
1146 	if (self->ul_preempt++ == 0) {
1147 		enter_critical(self);
1148 		if ((scp = self->ul_schedctl) != NULL ||
1149 		    (scp = setup_schedctl()) != NULL) {
1150 			/*
1151 			 * Save the pre-existing preempt value.
1152 			 */
1153 			self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt;
1154 			scp->sc_preemptctl.sc_nopreempt = 1;
1155 		}
1156 	}
1157 }
1158 
1159 /*
1160  * Undo the effects of no_preempt().
1161  */
1162 void
1163 preempt(ulwp_t *self)
1164 {
1165 	volatile sc_shared_t *scp;
1166 
1167 	ASSERT(self->ul_preempt > 0);
1168 	if (--self->ul_preempt == 0) {
1169 		if ((scp = self->ul_schedctl) != NULL) {
1170 			/*
1171 			 * Restore the pre-existing preempt value.
1172 			 */
1173 			scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt;
1174 			if (scp->sc_preemptctl.sc_yield &&
1175 			    scp->sc_preemptctl.sc_nopreempt == 0) {
1176 				yield();
1177 				if (scp->sc_preemptctl.sc_yield) {
1178 					/*
1179 					 * Shouldn't happen.  This is either
1180 					 * a race condition or the thread
1181 					 * just entered the real-time class.
1182 					 */
1183 					yield();
1184 					scp->sc_preemptctl.sc_yield = 0;
1185 				}
1186 			}
1187 		}
1188 		exit_critical(self);
1189 	}
1190 }
1191 
1192 /*
1193  * If a call to preempt() would cause the current thread to yield or to
1194  * take deferred actions in exit_critical(), then unpark the specified
1195  * lwp so it can run while we delay.  Return the original lwpid if the
1196  * unpark was not performed, else return zero.  The tests are a repeat
1197  * of some of the tests in preempt(), above.  This is a statistical
1198  * optimization solely for cond_sleep_queue(), below.
1199  */
1200 static lwpid_t
1201 preempt_unpark(ulwp_t *self, lwpid_t lwpid)
1202 {
1203 	volatile sc_shared_t *scp = self->ul_schedctl;
1204 
1205 	ASSERT(self->ul_preempt == 1 && self->ul_critical > 0);
1206 	if ((scp != NULL && scp->sc_preemptctl.sc_yield) ||
1207 	    (self->ul_curplease && self->ul_critical == 1)) {
1208 		(void) __lwp_unpark(lwpid);
1209 		lwpid = 0;
1210 	}
1211 	return (lwpid);
1212 }
1213 
1214 /*
1215  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
1216  * If this fails, return EBUSY and let the caller deal with it.
1217  * If this succeeds, return 0 with mutex_owner set to curthread.
1218  */
1219 static int
1220 mutex_trylock_adaptive(mutex_t *mp, int tryhard)
1221 {
1222 	ulwp_t *self = curthread;
1223 	int error = EBUSY;
1224 	ulwp_t *ulwp;
1225 	volatile sc_shared_t *scp;
1226 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
1227 	volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner;
1228 	uint32_t new_lockword;
1229 	int count = 0;
1230 	int max_count;
1231 	uint8_t max_spinners;
1232 
1233 	ASSERT(!(mp->mutex_type & USYNC_PROCESS));
1234 
1235 	if (MUTEX_OWNER(mp) == self)
1236 		return (EBUSY);
1237 
1238 	/* short-cut, not definitive (see below) */
1239 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
1240 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1241 		error = ENOTRECOVERABLE;
1242 		goto done;
1243 	}
1244 
1245 	/*
1246 	 * Make one attempt to acquire the lock before
1247 	 * incurring the overhead of the spin loop.
1248 	 */
1249 	if (set_lock_byte(lockp) == 0) {
1250 		*ownerp = (uintptr_t)self;
1251 		error = 0;
1252 		goto done;
1253 	}
1254 	if (!tryhard)
1255 		goto done;
1256 	if (ncpus == 0)
1257 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1258 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
1259 		max_spinners = ncpus - 1;
1260 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
1261 	if (max_count == 0)
1262 		goto done;
1263 
1264 	/*
1265 	 * This spin loop is unfair to lwps that have already dropped into
1266 	 * the kernel to sleep.  They will starve on a highly-contended mutex.
1267 	 * This is just too bad.  The adaptive spin algorithm is intended
1268 	 * to allow programs with highly-contended locks (that is, broken
1269 	 * programs) to execute with reasonable speed despite their contention.
1270 	 * Being fair would reduce the speed of such programs and well-written
1271 	 * programs will not suffer in any case.
1272 	 */
1273 	enter_critical(self);
1274 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) {
1275 		exit_critical(self);
1276 		goto done;
1277 	}
1278 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
1279 	for (count = 1; ; count++) {
1280 		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
1281 			*ownerp = (uintptr_t)self;
1282 			error = 0;
1283 			break;
1284 		}
1285 		if (count == max_count)
1286 			break;
1287 		SMT_PAUSE();
1288 		/*
1289 		 * Stop spinning if the mutex owner is not running on
1290 		 * a processor; it will not drop the lock any time soon
1291 		 * and we would just be wasting time to keep spinning.
1292 		 *
1293 		 * Note that we are looking at another thread (ulwp_t)
1294 		 * without ensuring that the other thread does not exit.
1295 		 * The scheme relies on ulwp_t structures never being
1296 		 * deallocated by the library (the library employs a free
1297 		 * list of ulwp_t structs that are reused when new threads
1298 		 * are created) and on schedctl shared memory never being
1299 		 * deallocated once created via __schedctl().
1300 		 *
1301 		 * Thus, the worst that can happen when the spinning thread
1302 		 * looks at the owner's schedctl data is that it is looking
1303 		 * at some other thread's schedctl data.  This almost never
1304 		 * happens and is benign when it does.
1305 		 */
1306 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1307 		    ((scp = ulwp->ul_schedctl) == NULL ||
1308 		    scp->sc_state != SC_ONPROC))
1309 			break;
1310 	}
1311 	new_lockword = spinners_decr(&mp->mutex_lockword);
1312 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
1313 		/*
1314 		 * We haven't yet acquired the lock, the lock
1315 		 * is free, and there are no other spinners.
1316 		 * Make one final attempt to acquire the lock.
1317 		 *
1318 		 * This isn't strictly necessary since mutex_lock_queue()
1319 		 * (the next action this thread will take if it doesn't
1320 		 * acquire the lock here) makes one attempt to acquire
1321 		 * the lock before putting the thread to sleep.
1322 		 *
1323 		 * If the next action for this thread (on failure here)
1324 		 * were not to call mutex_lock_queue(), this would be
1325 		 * necessary for correctness, to avoid ending up with an
1326 		 * unheld mutex with waiters but no one to wake them up.
1327 		 */
1328 		if (set_lock_byte(lockp) == 0) {
1329 			*ownerp = (uintptr_t)self;
1330 			error = 0;
1331 		}
1332 		count++;
1333 	}
1334 	exit_critical(self);
1335 
1336 done:
1337 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1338 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1339 		/*
1340 		 * We shouldn't own the mutex.
1341 		 * Just clear the lock; everyone has already been waked up.
1342 		 */
1343 		mp->mutex_owner = 0;
1344 		(void) clear_lockbyte(&mp->mutex_lockword);
1345 		error = ENOTRECOVERABLE;
1346 	}
1347 
1348 	if (error) {
1349 		if (count) {
1350 			DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1351 		}
1352 		if (error != EBUSY) {
1353 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1354 		}
1355 	} else {
1356 		if (count) {
1357 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
1358 		}
1359 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
1360 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
1361 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1362 			error = EOWNERDEAD;
1363 		}
1364 	}
1365 
1366 	return (error);
1367 }
1368 
1369 /*
1370  * Same as mutex_trylock_adaptive(), except specifically for queue locks.
1371  * The owner field is not set here; the caller (spin_lock_set()) sets it.
1372  */
1373 static int
1374 mutex_queuelock_adaptive(mutex_t *mp)
1375 {
1376 	ulwp_t *ulwp;
1377 	volatile sc_shared_t *scp;
1378 	volatile uint8_t *lockp;
1379 	volatile uint64_t *ownerp;
1380 	int count = curthread->ul_queue_spin;
1381 
1382 	ASSERT(mp->mutex_type == USYNC_THREAD);
1383 
1384 	if (count == 0)
1385 		return (EBUSY);
1386 
1387 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
1388 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
1389 	while (--count >= 0) {
1390 		if (*lockp == 0 && set_lock_byte(lockp) == 0)
1391 			return (0);
1392 		SMT_PAUSE();
1393 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1394 		    ((scp = ulwp->ul_schedctl) == NULL ||
1395 		    scp->sc_state != SC_ONPROC))
1396 			break;
1397 	}
1398 
1399 	return (EBUSY);
1400 }
1401 
1402 /*
1403  * Like mutex_trylock_adaptive(), but for process-shared mutexes.
1404  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
1405  * If this fails, return EBUSY and let the caller deal with it.
1406  * If this succeeds, return 0 with mutex_owner set to curthread
1407  * and mutex_ownerpid set to the current pid.
1408  */
1409 static int
1410 mutex_trylock_process(mutex_t *mp, int tryhard)
1411 {
1412 	ulwp_t *self = curthread;
1413 	uberdata_t *udp = self->ul_uberdata;
1414 	int error = EBUSY;
1415 	volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64;
1416 	uint32_t new_lockword;
1417 	int count = 0;
1418 	int max_count;
1419 	uint8_t max_spinners;
1420 
1421 	ASSERT(mp->mutex_type & USYNC_PROCESS);
1422 
1423 	if (shared_mutex_held(mp))
1424 		return (EBUSY);
1425 
1426 	/* short-cut, not definitive (see below) */
1427 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
1428 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1429 		error = ENOTRECOVERABLE;
1430 		goto done;
1431 	}
1432 
1433 	/*
1434 	 * Make one attempt to acquire the lock before
1435 	 * incurring the overhead of the spin loop.
1436 	 */
1437 	enter_critical(self);
1438 	if (set_lock_byte64(lockp, udp->pid) == 0) {
1439 		mp->mutex_owner = (uintptr_t)self;
1440 		/* mp->mutex_ownerpid was set by set_lock_byte64() */
1441 		exit_critical(self);
1442 		error = 0;
1443 		goto done;
1444 	}
1445 	exit_critical(self);
1446 	if (!tryhard)
1447 		goto done;
1448 	if (ncpus == 0)
1449 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1450 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
1451 		max_spinners = ncpus - 1;
1452 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
1453 	if (max_count == 0)
1454 		goto done;
1455 
1456 	/*
1457 	 * This is a process-shared mutex.
1458 	 * We cannot know if the owner is running on a processor.
1459 	 * We just spin and hope that it is on a processor.
1460 	 */
1461 	enter_critical(self);
1462 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) {
1463 		exit_critical(self);
1464 		goto done;
1465 	}
1466 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
1467 	for (count = 1; ; count++) {
1468 		if ((*lockp & LOCKMASK64) == 0 &&
1469 		    set_lock_byte64(lockp, udp->pid) == 0) {
1470 			mp->mutex_owner = (uintptr_t)self;
1471 			/* mp->mutex_ownerpid was set by set_lock_byte64() */
1472 			error = 0;
1473 			break;
1474 		}
1475 		if (count == max_count)
1476 			break;
1477 		SMT_PAUSE();
1478 	}
1479 	new_lockword = spinners_decr(&mp->mutex_lockword);
1480 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
1481 		/*
1482 		 * We haven't yet acquired the lock, the lock
1483 		 * is free, and there are no other spinners.
1484 		 * Make one final attempt to acquire the lock.
1485 		 *
1486 		 * This isn't strictly necessary since mutex_lock_kernel()
1487 		 * (the next action this thread will take if it doesn't
1488 		 * acquire the lock here) makes one attempt to acquire
1489 		 * the lock before putting the thread to sleep.
1490 		 *
1491 		 * If the next action for this thread (on failure here)
1492 		 * were not to call mutex_lock_kernel(), this would be
1493 		 * necessary for correctness, to avoid ending up with an
1494 		 * unheld mutex with waiters but no one to wake them up.
1495 		 */
1496 		if (set_lock_byte64(lockp, udp->pid) == 0) {
1497 			mp->mutex_owner = (uintptr_t)self;
1498 			/* mp->mutex_ownerpid was set by set_lock_byte64() */
1499 			error = 0;
1500 		}
1501 		count++;
1502 	}
1503 	exit_critical(self);
1504 
1505 done:
1506 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1507 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1508 		/*
1509 		 * We shouldn't own the mutex.
1510 		 * Just clear the lock; everyone has already been waked up.
1511 		 */
1512 		mp->mutex_owner = 0;
1513 		/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
1514 		(void) clear_lockbyte64(&mp->mutex_lockword64);
1515 		error = ENOTRECOVERABLE;
1516 	}
1517 
1518 	if (error) {
1519 		if (count) {
1520 			DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1521 		}
1522 		if (error != EBUSY) {
1523 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1524 		}
1525 	} else {
1526 		if (count) {
1527 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
1528 		}
1529 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
1530 		if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
1531 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1532 			if (mp->mutex_flag & LOCK_OWNERDEAD)
1533 				error = EOWNERDEAD;
1534 			else if (mp->mutex_type & USYNC_PROCESS_ROBUST)
1535 				error = ELOCKUNMAPPED;
1536 			else
1537 				error = EOWNERDEAD;
1538 		}
1539 	}
1540 
1541 	return (error);
1542 }
1543 
1544 /*
1545  * Mutex wakeup code for releasing a USYNC_THREAD mutex.
1546  * Returns the lwpid of the thread that was dequeued, if any.
1547  * The caller of mutex_wakeup() must call __lwp_unpark(lwpid)
1548  * to wake up the specified lwp.
1549  */
1550 static lwpid_t
1551 mutex_wakeup(mutex_t *mp)
1552 {
1553 	lwpid_t lwpid = 0;
1554 	int more;
1555 	queue_head_t *qp;
1556 	ulwp_t *ulwp;
1557 
1558 	/*
1559 	 * Dequeue a waiter from the sleep queue.  Don't touch the mutex
1560 	 * waiters bit if no one was found on the queue because the mutex
1561 	 * might have been deallocated or reallocated for another purpose.
1562 	 */
1563 	qp = queue_lock(mp, MX);
1564 	if ((ulwp = dequeue(qp, &more)) != NULL) {
1565 		lwpid = ulwp->ul_lwpid;
1566 		mp->mutex_waiters = more;
1567 	}
1568 	queue_unlock(qp);
1569 	return (lwpid);
1570 }
1571 
1572 /*
1573  * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex.
1574  */
1575 static void
1576 mutex_wakeup_all(mutex_t *mp)
1577 {
1578 	queue_head_t *qp;
1579 	queue_root_t *qrp;
1580 	int nlwpid = 0;
1581 	int maxlwps = MAXLWPS;
1582 	ulwp_t *ulwp;
1583 	lwpid_t buffer[MAXLWPS];
1584 	lwpid_t *lwpid = buffer;
1585 
1586 	/*
1587 	 * Walk the list of waiters and prepare to wake up all of them.
1588 	 * The waiters flag has already been cleared from the mutex.
1589 	 *
1590 	 * We keep track of lwpids that are to be unparked in lwpid[].
1591 	 * __lwp_unpark_all() is called to unpark all of them after
1592 	 * they have been removed from the sleep queue and the sleep
1593 	 * queue lock has been dropped.  If we run out of space in our
1594 	 * on-stack buffer, we need to allocate more but we can't call
1595 	 * lmalloc() because we are holding a queue lock when the overflow
1596 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
1597 	 * either because the application may have allocated a small
1598 	 * stack and we don't want to overrun the stack.  So we call
1599 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
1600 	 * system call directly since that path acquires no locks.
1601 	 */
1602 	qp = queue_lock(mp, MX);
1603 	for (;;) {
1604 		if ((qrp = qp->qh_root) == NULL ||
1605 		    (ulwp = qrp->qr_head) == NULL)
1606 			break;
1607 		ASSERT(ulwp->ul_wchan == mp);
1608 		queue_unlink(qp, &qrp->qr_head, NULL);
1609 		ulwp->ul_sleepq = NULL;
1610 		ulwp->ul_wchan = NULL;
1611 		if (nlwpid == maxlwps)
1612 			lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
1613 		lwpid[nlwpid++] = ulwp->ul_lwpid;
1614 	}
1615 
1616 	if (nlwpid == 0) {
1617 		queue_unlock(qp);
1618 	} else {
1619 		mp->mutex_waiters = 0;
1620 		no_preempt(curthread);
1621 		queue_unlock(qp);
1622 		if (nlwpid == 1)
1623 			(void) __lwp_unpark(lwpid[0]);
1624 		else
1625 			(void) __lwp_unpark_all(lwpid, nlwpid);
1626 		preempt(curthread);
1627 	}
1628 
1629 	if (lwpid != buffer)
1630 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
1631 }
1632 
1633 /*
1634  * Release a process-private mutex.
1635  * As an optimization, if there are waiters but there are also spinners
1636  * attempting to acquire the mutex, then don't bother waking up a waiter;
1637  * one of the spinners will acquire the mutex soon and it would be a waste
1638  * of resources to wake up some thread just to have it spin for a while
1639  * and then possibly go back to sleep.  See mutex_trylock_adaptive().
1640  */
1641 static lwpid_t
1642 mutex_unlock_queue(mutex_t *mp, int release_all)
1643 {
1644 	lwpid_t lwpid = 0;
1645 	uint32_t old_lockword;
1646 
1647 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1648 	mp->mutex_owner = 0;
1649 	old_lockword = clear_lockbyte(&mp->mutex_lockword);
1650 	if ((old_lockword & WAITERMASK) &&
1651 	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
1652 		ulwp_t *self = curthread;
1653 		no_preempt(self);	/* ensure a prompt wakeup */
1654 		if (release_all)
1655 			mutex_wakeup_all(mp);
1656 		else
1657 			lwpid = mutex_wakeup(mp);
1658 		if (lwpid == 0)
1659 			preempt(self);
1660 	}
1661 	return (lwpid);
1662 }
1663 
1664 /*
1665  * Like mutex_unlock_queue(), but for process-shared mutexes.
1666  */
1667 static void
1668 mutex_unlock_process(mutex_t *mp, int release_all)
1669 {
1670 	uint64_t old_lockword64;
1671 
1672 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1673 	mp->mutex_owner = 0;
1674 	/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
1675 	old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64);
1676 	if ((old_lockword64 & WAITERMASK64) &&
1677 	    (release_all || (old_lockword64 & SPINNERMASK64) == 0)) {
1678 		ulwp_t *self = curthread;
1679 		no_preempt(self);	/* ensure a prompt wakeup */
1680 		(void) ___lwp_mutex_wakeup(mp, release_all);
1681 		preempt(self);
1682 	}
1683 }
1684 
1685 void
1686 stall(void)
1687 {
1688 	for (;;)
1689 		(void) mutex_lock_kernel(&stall_mutex, NULL, NULL);
1690 }
1691 
1692 /*
1693  * Acquire a USYNC_THREAD mutex via user-level sleep queues.
1694  * We failed set_lock_byte(&mp->mutex_lockw) before coming here.
1695  * If successful, returns with mutex_owner set correctly.
1696  */
1697 int
1698 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
1699 	timespec_t *tsp)
1700 {
1701 	uberdata_t *udp = curthread->ul_uberdata;
1702 	queue_head_t *qp;
1703 	hrtime_t begin_sleep;
1704 	int error = 0;
1705 
1706 	self->ul_sp = stkptr();
1707 	if (__td_event_report(self, TD_SLEEP, udp)) {
1708 		self->ul_wchan = mp;
1709 		self->ul_td_evbuf.eventnum = TD_SLEEP;
1710 		self->ul_td_evbuf.eventdata = mp;
1711 		tdb_event(TD_SLEEP, udp);
1712 	}
1713 	if (msp) {
1714 		tdb_incr(msp->mutex_sleep);
1715 		begin_sleep = gethrtime();
1716 	}
1717 
1718 	DTRACE_PROBE1(plockstat, mutex__block, mp);
1719 
1720 	/*
1721 	 * Put ourself on the sleep queue, and while we are
1722 	 * unable to grab the lock, go park in the kernel.
1723 	 * Take ourself off the sleep queue after we acquire the lock.
1724 	 * The waiter bit can be set/cleared only while holding the queue lock.
1725 	 */
1726 	qp = queue_lock(mp, MX);
1727 	enqueue(qp, self, 0);
1728 	mp->mutex_waiters = 1;
1729 	for (;;) {
1730 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1731 			mp->mutex_owner = (uintptr_t)self;
1732 			mp->mutex_waiters = dequeue_self(qp);
1733 			break;
1734 		}
1735 		set_parking_flag(self, 1);
1736 		queue_unlock(qp);
1737 		/*
1738 		 * __lwp_park() will return the residual time in tsp
1739 		 * if we are unparked before the timeout expires.
1740 		 */
1741 		error = __lwp_park(tsp, 0);
1742 		set_parking_flag(self, 0);
1743 		/*
1744 		 * We could have taken a signal or suspended ourself.
1745 		 * If we did, then we removed ourself from the queue.
1746 		 * Someone else may have removed us from the queue
1747 		 * as a consequence of mutex_unlock().  We may have
1748 		 * gotten a timeout from __lwp_park().  Or we may still
1749 		 * be on the queue and this is just a spurious wakeup.
1750 		 */
1751 		qp = queue_lock(mp, MX);
1752 		if (self->ul_sleepq == NULL) {
1753 			if (error) {
1754 				mp->mutex_waiters = queue_waiter(qp)? 1 : 0;
1755 				if (error != EINTR)
1756 					break;
1757 				error = 0;
1758 			}
1759 			if (set_lock_byte(&mp->mutex_lockw) == 0) {
1760 				mp->mutex_owner = (uintptr_t)self;
1761 				break;
1762 			}
1763 			enqueue(qp, self, 0);
1764 			mp->mutex_waiters = 1;
1765 		}
1766 		ASSERT(self->ul_sleepq == qp &&
1767 		    self->ul_qtype == MX &&
1768 		    self->ul_wchan == mp);
1769 		if (error) {
1770 			if (error != EINTR) {
1771 				mp->mutex_waiters = dequeue_self(qp);
1772 				break;
1773 			}
1774 			error = 0;
1775 		}
1776 	}
1777 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
1778 	    self->ul_wchan == NULL);
1779 	self->ul_sp = 0;
1780 	queue_unlock(qp);
1781 
1782 	if (msp)
1783 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
1784 
1785 	ASSERT(error == 0 || error == EINVAL || error == ETIME);
1786 
1787 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1788 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1789 		/*
1790 		 * We shouldn't own the mutex.
1791 		 * Just clear the lock; everyone has already been waked up.
1792 		 */
1793 		mp->mutex_owner = 0;
1794 		(void) clear_lockbyte(&mp->mutex_lockword);
1795 		error = ENOTRECOVERABLE;
1796 	}
1797 
1798 	if (error) {
1799 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
1800 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1801 	} else {
1802 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
1803 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1804 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
1805 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1806 			error = EOWNERDEAD;
1807 		}
1808 	}
1809 
1810 	return (error);
1811 }
1812 
1813 static int
1814 mutex_recursion(mutex_t *mp, int mtype, int try)
1815 {
1816 	ASSERT(mutex_held(mp));
1817 	ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK));
1818 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1819 
1820 	if (mtype & LOCK_RECURSIVE) {
1821 		if (mp->mutex_rcount == RECURSION_MAX) {
1822 			DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN);
1823 			return (EAGAIN);
1824 		}
1825 		mp->mutex_rcount++;
1826 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0);
1827 		return (0);
1828 	}
1829 	if (try == MUTEX_LOCK) {
1830 		DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1831 		return (EDEADLK);
1832 	}
1833 	return (EBUSY);
1834 }
1835 
1836 /*
1837  * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so
1838  * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary.
1839  * We use tdb_hash_lock here and in the synch object tracking code in
1840  * the tdb_agent.c file.  There is no conflict between these two usages.
1841  */
1842 void
1843 register_lock(mutex_t *mp)
1844 {
1845 	uberdata_t *udp = curthread->ul_uberdata;
1846 	uint_t hash = LOCK_HASH(mp);
1847 	robust_t *rlp;
1848 	robust_t **rlpp;
1849 	robust_t **table;
1850 
1851 	if ((table = udp->robustlocks) == NULL) {
1852 		lmutex_lock(&udp->tdb_hash_lock);
1853 		if ((table = udp->robustlocks) == NULL) {
1854 			table = lmalloc(LOCKHASHSZ * sizeof (robust_t *));
1855 			membar_producer();
1856 			udp->robustlocks = table;
1857 		}
1858 		lmutex_unlock(&udp->tdb_hash_lock);
1859 	}
1860 	membar_consumer();
1861 
1862 	/*
1863 	 * First search the registered table with no locks held.
1864 	 * This is safe because the table never shrinks
1865 	 * and we can only get a false negative.
1866 	 */
1867 	for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) {
1868 		if (rlp->robust_lock == mp)	/* already registered */
1869 			return;
1870 	}
1871 
1872 	/*
1873 	 * The lock was not found.
1874 	 * Repeat the operation with tdb_hash_lock held.
1875 	 */
1876 	lmutex_lock(&udp->tdb_hash_lock);
1877 
1878 	for (rlpp = &table[hash];
1879 	    (rlp = *rlpp) != NULL;
1880 	    rlpp = &rlp->robust_next) {
1881 		if (rlp->robust_lock == mp) {	/* already registered */
1882 			lmutex_unlock(&udp->tdb_hash_lock);
1883 			return;
1884 		}
1885 	}
1886 
1887 	/*
1888 	 * The lock has never been registered.
1889 	 * Register it now and add it to the table.
1890 	 */
1891 	(void) ___lwp_mutex_register(mp);
1892 	rlp = lmalloc(sizeof (*rlp));
1893 	rlp->robust_lock = mp;
1894 	membar_producer();
1895 	*rlpp = rlp;
1896 
1897 	lmutex_unlock(&udp->tdb_hash_lock);
1898 }
1899 
1900 /*
1901  * This is called in the child of fork()/forkall() to start over
1902  * with a clean slate.  (Each process must register its own locks.)
1903  * No locks are needed because all other threads are suspended or gone.
1904  */
1905 void
1906 unregister_locks(void)
1907 {
1908 	uberdata_t *udp = curthread->ul_uberdata;
1909 	uint_t hash;
1910 	robust_t **table;
1911 	robust_t *rlp;
1912 	robust_t *next;
1913 
1914 	if ((table = udp->robustlocks) != NULL) {
1915 		for (hash = 0; hash < LOCKHASHSZ; hash++) {
1916 			rlp = table[hash];
1917 			while (rlp != NULL) {
1918 				next = rlp->robust_next;
1919 				lfree(rlp, sizeof (*rlp));
1920 				rlp = next;
1921 			}
1922 		}
1923 		lfree(table, LOCKHASHSZ * sizeof (robust_t *));
1924 		udp->robustlocks = NULL;
1925 	}
1926 }
1927 
1928 /*
1929  * Returns with mutex_owner set correctly.
1930  */
1931 int
1932 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
1933 {
1934 	ulwp_t *self = curthread;
1935 	uberdata_t *udp = self->ul_uberdata;
1936 	int mtype = mp->mutex_type;
1937 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
1938 	int error = 0;
1939 	int noceil = try & MUTEX_NOCEIL;
1940 	uint8_t ceil;
1941 	int myprio;
1942 
1943 	try &= ~MUTEX_NOCEIL;
1944 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1945 
1946 	if (!self->ul_schedctl_called)
1947 		(void) setup_schedctl();
1948 
1949 	if (msp && try == MUTEX_TRY)
1950 		tdb_incr(msp->mutex_try);
1951 
1952 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_held(mp))
1953 		return (mutex_recursion(mp, mtype, try));
1954 
1955 	if (self->ul_error_detection && try == MUTEX_LOCK &&
1956 	    tsp == NULL && mutex_held(mp))
1957 		lock_error(mp, "mutex_lock", NULL, NULL);
1958 
1959 	if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) {
1960 		update_sched(self);
1961 		if (self->ul_cid != self->ul_rtclassid) {
1962 			DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM);
1963 			return (EPERM);
1964 		}
1965 		ceil = mp->mutex_ceiling;
1966 		myprio = self->ul_epri? self->ul_epri : self->ul_pri;
1967 		if (myprio > ceil) {
1968 			DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL);
1969 			return (EINVAL);
1970 		}
1971 		if ((error = _ceil_mylist_add(mp)) != 0) {
1972 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1973 			return (error);
1974 		}
1975 		if (myprio < ceil)
1976 			_ceil_prio_inherit(ceil);
1977 	}
1978 
1979 	if ((mtype & (USYNC_PROCESS | LOCK_ROBUST))
1980 	    == (USYNC_PROCESS | LOCK_ROBUST))
1981 		register_lock(mp);
1982 
1983 	if (mtype & LOCK_PRIO_INHERIT) {
1984 		/* go straight to the kernel */
1985 		if (try == MUTEX_TRY)
1986 			error = mutex_trylock_kernel(mp);
1987 		else	/* MUTEX_LOCK */
1988 			error = mutex_lock_kernel(mp, tsp, msp);
1989 		/*
1990 		 * The kernel never sets or clears the lock byte
1991 		 * for LOCK_PRIO_INHERIT mutexes.
1992 		 * Set it here for consistency.
1993 		 */
1994 		switch (error) {
1995 		case 0:
1996 			self->ul_pilocks++;
1997 			mp->mutex_lockw = LOCKSET;
1998 			break;
1999 		case EOWNERDEAD:
2000 		case ELOCKUNMAPPED:
2001 			self->ul_pilocks++;
2002 			mp->mutex_lockw = LOCKSET;
2003 			/* FALLTHROUGH */
2004 		case ENOTRECOVERABLE:
2005 			ASSERT(mtype & LOCK_ROBUST);
2006 			break;
2007 		case EDEADLK:
2008 			if (try == MUTEX_LOCK)
2009 				stall();
2010 			error = EBUSY;
2011 			break;
2012 		}
2013 	} else if (mtype & USYNC_PROCESS) {
2014 		error = mutex_trylock_process(mp, try == MUTEX_LOCK);
2015 		if (error == EBUSY && try == MUTEX_LOCK)
2016 			error = mutex_lock_kernel(mp, tsp, msp);
2017 	} else {	/* USYNC_THREAD */
2018 		error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK);
2019 		if (error == EBUSY && try == MUTEX_LOCK)
2020 			error = mutex_lock_queue(self, msp, mp, tsp);
2021 	}
2022 
2023 	switch (error) {
2024 	case 0:
2025 	case EOWNERDEAD:
2026 	case ELOCKUNMAPPED:
2027 		if (mtype & LOCK_ROBUST)
2028 			remember_lock(mp);
2029 		if (msp)
2030 			record_begin_hold(msp);
2031 		break;
2032 	default:
2033 		if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) {
2034 			(void) _ceil_mylist_del(mp);
2035 			if (myprio < ceil)
2036 				_ceil_prio_waive();
2037 		}
2038 		if (try == MUTEX_TRY) {
2039 			if (msp)
2040 				tdb_incr(msp->mutex_try_fail);
2041 			if (__td_event_report(self, TD_LOCK_TRY, udp)) {
2042 				self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
2043 				tdb_event(TD_LOCK_TRY, udp);
2044 			}
2045 		}
2046 		break;
2047 	}
2048 
2049 	return (error);
2050 }
2051 
2052 int
2053 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try)
2054 {
2055 	ulwp_t *self = curthread;
2056 	uberdata_t *udp = self->ul_uberdata;
2057 
2058 	/*
2059 	 * We know that USYNC_PROCESS is set in mtype and that
2060 	 * zero, one, or both of the flags LOCK_RECURSIVE and
2061 	 * LOCK_ERRORCHECK are set, and that no other flags are set.
2062 	 */
2063 	ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0);
2064 	enter_critical(self);
2065 	if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) {
2066 		mp->mutex_owner = (uintptr_t)self;
2067 		/* mp->mutex_ownerpid was set by set_lock_byte64() */
2068 		exit_critical(self);
2069 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2070 		return (0);
2071 	}
2072 	exit_critical(self);
2073 
2074 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp))
2075 		return (mutex_recursion(mp, mtype, try));
2076 
2077 	if (try == MUTEX_LOCK) {
2078 		if (mutex_trylock_process(mp, 1) == 0)
2079 			return (0);
2080 		return (mutex_lock_kernel(mp, tsp, NULL));
2081 	}
2082 
2083 	if (__td_event_report(self, TD_LOCK_TRY, udp)) {
2084 		self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
2085 		tdb_event(TD_LOCK_TRY, udp);
2086 	}
2087 	return (EBUSY);
2088 }
2089 
2090 static int
2091 mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
2092 {
2093 	ulwp_t *self = curthread;
2094 	int mtype = mp->mutex_type;
2095 	uberflags_t *gflags;
2096 
2097 	/*
2098 	 * Optimize the case of USYNC_THREAD, including
2099 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2100 	 * no error detection, no lock statistics,
2101 	 * and the process has only a single thread.
2102 	 * (Most likely a traditional single-threaded application.)
2103 	 */
2104 	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2105 	    self->ul_uberdata->uberflags.uf_all) == 0) {
2106 		/*
2107 		 * Only one thread exists so we don't need an atomic operation.
2108 		 */
2109 		if (mp->mutex_lockw == 0) {
2110 			mp->mutex_lockw = LOCKSET;
2111 			mp->mutex_owner = (uintptr_t)self;
2112 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2113 			return (0);
2114 		}
2115 		if (mtype && MUTEX_OWNER(mp) == self)
2116 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
2117 		/*
2118 		 * We have reached a deadlock, probably because the
2119 		 * process is executing non-async-signal-safe code in
2120 		 * a signal handler and is attempting to acquire a lock
2121 		 * that it already owns.  This is not surprising, given
2122 		 * bad programming practices over the years that has
2123 		 * resulted in applications calling printf() and such
2124 		 * in their signal handlers.  Unless the user has told
2125 		 * us that the signal handlers are safe by setting:
2126 		 *	export _THREAD_ASYNC_SAFE=1
2127 		 * we return EDEADLK rather than actually deadlocking.
2128 		 */
2129 		if (tsp == NULL &&
2130 		    MUTEX_OWNER(mp) == self && !self->ul_async_safe) {
2131 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
2132 			return (EDEADLK);
2133 		}
2134 	}
2135 
2136 	/*
2137 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2138 	 * no error detection, and no lock statistics.
2139 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2140 	 */
2141 	if ((gflags = self->ul_schedctl_called) != NULL &&
2142 	    (gflags->uf_trs_ted |
2143 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
2144 		if (mtype & USYNC_PROCESS)
2145 			return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK));
2146 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2147 			mp->mutex_owner = (uintptr_t)self;
2148 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2149 			return (0);
2150 		}
2151 		if (mtype && MUTEX_OWNER(mp) == self)
2152 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
2153 		if (mutex_trylock_adaptive(mp, 1) != 0)
2154 			return (mutex_lock_queue(self, NULL, mp, tsp));
2155 		return (0);
2156 	}
2157 
2158 	/* else do it the long way */
2159 	return (mutex_lock_internal(mp, tsp, MUTEX_LOCK));
2160 }
2161 
2162 #pragma weak pthread_mutex_lock = mutex_lock
2163 #pragma weak _mutex_lock = mutex_lock
2164 int
2165 mutex_lock(mutex_t *mp)
2166 {
2167 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2168 	return (mutex_lock_impl(mp, NULL));
2169 }
2170 
2171 int
2172 pthread_mutex_timedlock(pthread_mutex_t *_RESTRICT_KYWD mp,
2173 	const struct timespec *_RESTRICT_KYWD abstime)
2174 {
2175 	timespec_t tslocal;
2176 	int error;
2177 
2178 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2179 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
2180 	error = mutex_lock_impl((mutex_t *)mp, &tslocal);
2181 	if (error == ETIME)
2182 		error = ETIMEDOUT;
2183 	return (error);
2184 }
2185 
2186 int
2187 pthread_mutex_reltimedlock_np(pthread_mutex_t *_RESTRICT_KYWD mp,
2188 	const struct timespec *_RESTRICT_KYWD reltime)
2189 {
2190 	timespec_t tslocal;
2191 	int error;
2192 
2193 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2194 	tslocal = *reltime;
2195 	error = mutex_lock_impl((mutex_t *)mp, &tslocal);
2196 	if (error == ETIME)
2197 		error = ETIMEDOUT;
2198 	return (error);
2199 }
2200 
2201 #pragma weak pthread_mutex_trylock = mutex_trylock
2202 int
2203 mutex_trylock(mutex_t *mp)
2204 {
2205 	ulwp_t *self = curthread;
2206 	uberdata_t *udp = self->ul_uberdata;
2207 	int mtype = mp->mutex_type;
2208 	uberflags_t *gflags;
2209 
2210 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2211 
2212 	/*
2213 	 * Optimize the case of USYNC_THREAD, including
2214 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2215 	 * no error detection, no lock statistics,
2216 	 * and the process has only a single thread.
2217 	 * (Most likely a traditional single-threaded application.)
2218 	 */
2219 	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2220 	    udp->uberflags.uf_all) == 0) {
2221 		/*
2222 		 * Only one thread exists so we don't need an atomic operation.
2223 		 */
2224 		if (mp->mutex_lockw == 0) {
2225 			mp->mutex_lockw = LOCKSET;
2226 			mp->mutex_owner = (uintptr_t)self;
2227 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2228 			return (0);
2229 		}
2230 		if (mtype && MUTEX_OWNER(mp) == self)
2231 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
2232 		return (EBUSY);
2233 	}
2234 
2235 	/*
2236 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2237 	 * no error detection, and no lock statistics.
2238 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2239 	 */
2240 	if ((gflags = self->ul_schedctl_called) != NULL &&
2241 	    (gflags->uf_trs_ted |
2242 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
2243 		if (mtype & USYNC_PROCESS)
2244 			return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY));
2245 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2246 			mp->mutex_owner = (uintptr_t)self;
2247 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2248 			return (0);
2249 		}
2250 		if (mtype && MUTEX_OWNER(mp) == self)
2251 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
2252 		if (__td_event_report(self, TD_LOCK_TRY, udp)) {
2253 			self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
2254 			tdb_event(TD_LOCK_TRY, udp);
2255 		}
2256 		return (EBUSY);
2257 	}
2258 
2259 	/* else do it the long way */
2260 	return (mutex_lock_internal(mp, NULL, MUTEX_TRY));
2261 }
2262 
2263 int
2264 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags)
2265 {
2266 	ulwp_t *self = curthread;
2267 	uberdata_t *udp = self->ul_uberdata;
2268 	int mtype = mp->mutex_type;
2269 	tdb_mutex_stats_t *msp;
2270 	int error = 0;
2271 	int release_all;
2272 	lwpid_t lwpid;
2273 
2274 	if ((mtype & LOCK_ERRORCHECK) && !mutex_held(mp))
2275 		return (EPERM);
2276 
2277 	if (self->ul_error_detection && !mutex_held(mp))
2278 		lock_error(mp, "mutex_unlock", NULL, NULL);
2279 
2280 	if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2281 		mp->mutex_rcount--;
2282 		DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2283 		return (0);
2284 	}
2285 
2286 	if ((msp = MUTEX_STATS(mp, udp)) != NULL)
2287 		(void) record_hold_time(msp);
2288 
2289 	if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) &&
2290 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
2291 		ASSERT(mp->mutex_type & LOCK_ROBUST);
2292 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2293 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
2294 	}
2295 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
2296 
2297 	if (mtype & LOCK_PRIO_INHERIT) {
2298 		no_preempt(self);
2299 		mp->mutex_owner = 0;
2300 		/* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */
2301 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2302 		mp->mutex_lockw = LOCKCLEAR;
2303 		self->ul_pilocks--;
2304 		error = ___lwp_mutex_unlock(mp);
2305 		preempt(self);
2306 	} else if (mtype & USYNC_PROCESS) {
2307 		mutex_unlock_process(mp, release_all);
2308 	} else {	/* USYNC_THREAD */
2309 		if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) {
2310 			(void) __lwp_unpark(lwpid);
2311 			preempt(self);
2312 		}
2313 	}
2314 
2315 	if (mtype & LOCK_ROBUST)
2316 		forget_lock(mp);
2317 
2318 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
2319 		_ceil_prio_waive();
2320 
2321 	return (error);
2322 }
2323 
2324 #pragma weak pthread_mutex_unlock = mutex_unlock
2325 #pragma weak _mutex_unlock = mutex_unlock
2326 int
2327 mutex_unlock(mutex_t *mp)
2328 {
2329 	ulwp_t *self = curthread;
2330 	int mtype = mp->mutex_type;
2331 	uberflags_t *gflags;
2332 	lwpid_t lwpid;
2333 	short el;
2334 
2335 	/*
2336 	 * Optimize the case of USYNC_THREAD, including
2337 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2338 	 * no error detection, no lock statistics,
2339 	 * and the process has only a single thread.
2340 	 * (Most likely a traditional single-threaded application.)
2341 	 */
2342 	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2343 	    self->ul_uberdata->uberflags.uf_all) == 0) {
2344 		if (mtype) {
2345 			/*
2346 			 * At this point we know that one or both of the
2347 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2348 			 */
2349 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2350 				return (EPERM);
2351 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2352 				mp->mutex_rcount--;
2353 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2354 				return (0);
2355 			}
2356 		}
2357 		/*
2358 		 * Only one thread exists so we don't need an atomic operation.
2359 		 * Also, there can be no waiters.
2360 		 */
2361 		mp->mutex_owner = 0;
2362 		mp->mutex_lockword = 0;
2363 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2364 		return (0);
2365 	}
2366 
2367 	/*
2368 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2369 	 * no error detection, and no lock statistics.
2370 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2371 	 */
2372 	if ((gflags = self->ul_schedctl_called) != NULL) {
2373 		if (((el = gflags->uf_trs_ted) | mtype) == 0) {
2374 fast_unlock:
2375 			if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
2376 				(void) __lwp_unpark(lwpid);
2377 				preempt(self);
2378 			}
2379 			return (0);
2380 		}
2381 		if (el)		/* error detection or lock statistics */
2382 			goto slow_unlock;
2383 		if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2384 			/*
2385 			 * At this point we know that one or both of the
2386 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2387 			 */
2388 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2389 				return (EPERM);
2390 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2391 				mp->mutex_rcount--;
2392 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2393 				return (0);
2394 			}
2395 			goto fast_unlock;
2396 		}
2397 		if ((mtype &
2398 		    ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2399 			/*
2400 			 * At this point we know that zero, one, or both of the
2401 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and
2402 			 * that the USYNC_PROCESS flag is set.
2403 			 */
2404 			if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp))
2405 				return (EPERM);
2406 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2407 				mp->mutex_rcount--;
2408 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2409 				return (0);
2410 			}
2411 			mutex_unlock_process(mp, 0);
2412 			return (0);
2413 		}
2414 	}
2415 
2416 	/* else do it the long way */
2417 slow_unlock:
2418 	return (mutex_unlock_internal(mp, 0));
2419 }
2420 
2421 /*
2422  * Internally to the library, almost all mutex lock/unlock actions
2423  * go through these lmutex_ functions, to protect critical regions.
2424  * We replicate a bit of code from mutex_lock() and mutex_unlock()
2425  * to make these functions faster since we know that the mutex type
2426  * of all internal locks is USYNC_THREAD.  We also know that internal
2427  * locking can never fail, so we panic if it does.
2428  */
2429 void
2430 lmutex_lock(mutex_t *mp)
2431 {
2432 	ulwp_t *self = curthread;
2433 	uberdata_t *udp = self->ul_uberdata;
2434 
2435 	ASSERT(mp->mutex_type == USYNC_THREAD);
2436 
2437 	enter_critical(self);
2438 	/*
2439 	 * Optimize the case of no lock statistics and only a single thread.
2440 	 * (Most likely a traditional single-threaded application.)
2441 	 */
2442 	if (udp->uberflags.uf_all == 0) {
2443 		/*
2444 		 * Only one thread exists; the mutex must be free.
2445 		 */
2446 		ASSERT(mp->mutex_lockw == 0);
2447 		mp->mutex_lockw = LOCKSET;
2448 		mp->mutex_owner = (uintptr_t)self;
2449 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2450 	} else {
2451 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2452 
2453 		if (!self->ul_schedctl_called)
2454 			(void) setup_schedctl();
2455 
2456 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2457 			mp->mutex_owner = (uintptr_t)self;
2458 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2459 		} else if (mutex_trylock_adaptive(mp, 1) != 0) {
2460 			(void) mutex_lock_queue(self, msp, mp, NULL);
2461 		}
2462 
2463 		if (msp)
2464 			record_begin_hold(msp);
2465 	}
2466 }
2467 
2468 void
2469 lmutex_unlock(mutex_t *mp)
2470 {
2471 	ulwp_t *self = curthread;
2472 	uberdata_t *udp = self->ul_uberdata;
2473 
2474 	ASSERT(mp->mutex_type == USYNC_THREAD);
2475 
2476 	/*
2477 	 * Optimize the case of no lock statistics and only a single thread.
2478 	 * (Most likely a traditional single-threaded application.)
2479 	 */
2480 	if (udp->uberflags.uf_all == 0) {
2481 		/*
2482 		 * Only one thread exists so there can be no waiters.
2483 		 */
2484 		mp->mutex_owner = 0;
2485 		mp->mutex_lockword = 0;
2486 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2487 	} else {
2488 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2489 		lwpid_t lwpid;
2490 
2491 		if (msp)
2492 			(void) record_hold_time(msp);
2493 		if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
2494 			(void) __lwp_unpark(lwpid);
2495 			preempt(self);
2496 		}
2497 	}
2498 	exit_critical(self);
2499 }
2500 
2501 /*
2502  * For specialized code in libc, like the asynchronous i/o code,
2503  * the following sig_*() locking primitives are used in order
2504  * to make the code asynchronous signal safe.  Signals are
2505  * deferred while locks acquired by these functions are held.
2506  */
2507 void
2508 sig_mutex_lock(mutex_t *mp)
2509 {
2510 	sigoff(curthread);
2511 	(void) mutex_lock(mp);
2512 }
2513 
2514 void
2515 sig_mutex_unlock(mutex_t *mp)
2516 {
2517 	(void) mutex_unlock(mp);
2518 	sigon(curthread);
2519 }
2520 
2521 int
2522 sig_mutex_trylock(mutex_t *mp)
2523 {
2524 	int error;
2525 
2526 	sigoff(curthread);
2527 	if ((error = mutex_trylock(mp)) != 0)
2528 		sigon(curthread);
2529 	return (error);
2530 }
2531 
2532 /*
2533  * sig_cond_wait() is a cancellation point.
2534  */
2535 int
2536 sig_cond_wait(cond_t *cv, mutex_t *mp)
2537 {
2538 	int error;
2539 
2540 	ASSERT(curthread->ul_sigdefer != 0);
2541 	pthread_testcancel();
2542 	error = __cond_wait(cv, mp);
2543 	if (error == EINTR && curthread->ul_cursig) {
2544 		sig_mutex_unlock(mp);
2545 		/* take the deferred signal here */
2546 		sig_mutex_lock(mp);
2547 	}
2548 	pthread_testcancel();
2549 	return (error);
2550 }
2551 
2552 /*
2553  * sig_cond_reltimedwait() is a cancellation point.
2554  */
2555 int
2556 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts)
2557 {
2558 	int error;
2559 
2560 	ASSERT(curthread->ul_sigdefer != 0);
2561 	pthread_testcancel();
2562 	error = __cond_reltimedwait(cv, mp, ts);
2563 	if (error == EINTR && curthread->ul_cursig) {
2564 		sig_mutex_unlock(mp);
2565 		/* take the deferred signal here */
2566 		sig_mutex_lock(mp);
2567 	}
2568 	pthread_testcancel();
2569 	return (error);
2570 }
2571 
2572 /*
2573  * For specialized code in libc, like the stdio code.
2574  * the following cancel_safe_*() locking primitives are used in
2575  * order to make the code cancellation-safe.  Cancellation is
2576  * deferred while locks acquired by these functions are held.
2577  */
2578 void
2579 cancel_safe_mutex_lock(mutex_t *mp)
2580 {
2581 	(void) mutex_lock(mp);
2582 	curthread->ul_libc_locks++;
2583 }
2584 
2585 int
2586 cancel_safe_mutex_trylock(mutex_t *mp)
2587 {
2588 	int error;
2589 
2590 	if ((error = mutex_trylock(mp)) == 0)
2591 		curthread->ul_libc_locks++;
2592 	return (error);
2593 }
2594 
2595 void
2596 cancel_safe_mutex_unlock(mutex_t *mp)
2597 {
2598 	ulwp_t *self = curthread;
2599 
2600 	ASSERT(self->ul_libc_locks != 0);
2601 
2602 	(void) mutex_unlock(mp);
2603 
2604 	/*
2605 	 * Decrement the count of locks held by cancel_safe_mutex_lock().
2606 	 * If we are then in a position to terminate cleanly and
2607 	 * if there is a pending cancellation and cancellation
2608 	 * is not disabled and we received EINTR from a recent
2609 	 * system call then perform the cancellation action now.
2610 	 */
2611 	if (--self->ul_libc_locks == 0 &&
2612 	    !(self->ul_vfork | self->ul_nocancel |
2613 	    self->ul_critical | self->ul_sigdefer) &&
2614 	    cancel_active())
2615 		pthread_exit(PTHREAD_CANCELED);
2616 }
2617 
2618 static int
2619 shared_mutex_held(mutex_t *mparg)
2620 {
2621 	/*
2622 	 * The 'volatile' is necessary to make sure the compiler doesn't
2623 	 * reorder the tests of the various components of the mutex.
2624 	 * They must be tested in this order:
2625 	 *	mutex_lockw
2626 	 *	mutex_owner
2627 	 *	mutex_ownerpid
2628 	 * This relies on the fact that everywhere mutex_lockw is cleared,
2629 	 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw
2630 	 * is cleared, and that everywhere mutex_lockw is set, mutex_owner
2631 	 * and mutex_ownerpid are set after mutex_lockw is set, and that
2632 	 * mutex_lockw is set or cleared with a memory barrier.
2633 	 */
2634 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2635 	ulwp_t *self = curthread;
2636 	uberdata_t *udp = self->ul_uberdata;
2637 
2638 	return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid);
2639 }
2640 
2641 #pragma weak _mutex_held = mutex_held
2642 int
2643 mutex_held(mutex_t *mparg)
2644 {
2645 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2646 
2647 	if (mparg->mutex_type & USYNC_PROCESS)
2648 		return (shared_mutex_held(mparg));
2649 	return (MUTEX_OWNED(mp, curthread));
2650 }
2651 
2652 #pragma weak pthread_mutex_destroy = mutex_destroy
2653 #pragma weak _mutex_destroy = mutex_destroy
2654 int
2655 mutex_destroy(mutex_t *mp)
2656 {
2657 	if (mp->mutex_type & USYNC_PROCESS)
2658 		forget_lock(mp);
2659 	(void) memset(mp, 0, sizeof (*mp));
2660 	tdb_sync_obj_deregister(mp);
2661 	return (0);
2662 }
2663 
2664 #pragma weak pthread_mutex_consistent_np = mutex_consistent
2665 int
2666 mutex_consistent(mutex_t *mp)
2667 {
2668 	/*
2669 	 * Do this only for an inconsistent, initialized robust lock
2670 	 * that we hold.  For all other cases, return EINVAL.
2671 	 */
2672 	if (mutex_held(mp) &&
2673 	    (mp->mutex_type & LOCK_ROBUST) &&
2674 	    (mp->mutex_flag & LOCK_INITED) &&
2675 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
2676 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2677 		mp->mutex_rcount = 0;
2678 		return (0);
2679 	}
2680 	return (EINVAL);
2681 }
2682 
2683 /*
2684  * Spin locks are separate from ordinary mutexes,
2685  * but we use the same data structure for them.
2686  */
2687 
2688 int
2689 pthread_spin_init(pthread_spinlock_t *lock, int pshared)
2690 {
2691 	mutex_t *mp = (mutex_t *)lock;
2692 
2693 	(void) memset(mp, 0, sizeof (*mp));
2694 	if (pshared == PTHREAD_PROCESS_SHARED)
2695 		mp->mutex_type = USYNC_PROCESS;
2696 	else
2697 		mp->mutex_type = USYNC_THREAD;
2698 	mp->mutex_flag = LOCK_INITED;
2699 	mp->mutex_magic = MUTEX_MAGIC;
2700 	return (0);
2701 }
2702 
2703 int
2704 pthread_spin_destroy(pthread_spinlock_t *lock)
2705 {
2706 	(void) memset(lock, 0, sizeof (*lock));
2707 	return (0);
2708 }
2709 
2710 int
2711 pthread_spin_trylock(pthread_spinlock_t *lock)
2712 {
2713 	mutex_t *mp = (mutex_t *)lock;
2714 	ulwp_t *self = curthread;
2715 	int error = 0;
2716 
2717 	no_preempt(self);
2718 	if (set_lock_byte(&mp->mutex_lockw) != 0)
2719 		error = EBUSY;
2720 	else {
2721 		mp->mutex_owner = (uintptr_t)self;
2722 		if (mp->mutex_type == USYNC_PROCESS)
2723 			mp->mutex_ownerpid = self->ul_uberdata->pid;
2724 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2725 	}
2726 	preempt(self);
2727 	return (error);
2728 }
2729 
2730 int
2731 pthread_spin_lock(pthread_spinlock_t *lock)
2732 {
2733 	mutex_t *mp = (mutex_t *)lock;
2734 	ulwp_t *self = curthread;
2735 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
2736 	int count = 0;
2737 
2738 	ASSERT(!self->ul_critical || self->ul_bindflags);
2739 
2740 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
2741 
2742 	/*
2743 	 * We don't care whether the owner is running on a processor.
2744 	 * We just spin because that's what this interface requires.
2745 	 */
2746 	for (;;) {
2747 		if (*lockp == 0) {	/* lock byte appears to be clear */
2748 			no_preempt(self);
2749 			if (set_lock_byte(lockp) == 0)
2750 				break;
2751 			preempt(self);
2752 		}
2753 		if (count < INT_MAX)
2754 			count++;
2755 		SMT_PAUSE();
2756 	}
2757 	mp->mutex_owner = (uintptr_t)self;
2758 	if (mp->mutex_type == USYNC_PROCESS)
2759 		mp->mutex_ownerpid = self->ul_uberdata->pid;
2760 	preempt(self);
2761 	if (count) {
2762 		DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
2763 	}
2764 	DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
2765 	return (0);
2766 }
2767 
2768 int
2769 pthread_spin_unlock(pthread_spinlock_t *lock)
2770 {
2771 	mutex_t *mp = (mutex_t *)lock;
2772 	ulwp_t *self = curthread;
2773 
2774 	no_preempt(self);
2775 	mp->mutex_owner = 0;
2776 	mp->mutex_ownerpid = 0;
2777 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2778 	(void) atomic_swap_32(&mp->mutex_lockword, 0);
2779 	preempt(self);
2780 	return (0);
2781 }
2782 
2783 #define	INITIAL_LOCKS	8	/* initial size of ul_heldlocks.array */
2784 
2785 /*
2786  * Find/allocate an entry for 'lock' in our array of held locks.
2787  */
2788 static mutex_t **
2789 find_lock_entry(mutex_t *lock)
2790 {
2791 	ulwp_t *self = curthread;
2792 	mutex_t **remembered = NULL;
2793 	mutex_t **lockptr;
2794 	uint_t nlocks;
2795 
2796 	if ((nlocks = self->ul_heldlockcnt) != 0)
2797 		lockptr = self->ul_heldlocks.array;
2798 	else {
2799 		nlocks = 1;
2800 		lockptr = &self->ul_heldlocks.single;
2801 	}
2802 
2803 	for (; nlocks; nlocks--, lockptr++) {
2804 		if (*lockptr == lock)
2805 			return (lockptr);
2806 		if (*lockptr == NULL && remembered == NULL)
2807 			remembered = lockptr;
2808 	}
2809 	if (remembered != NULL) {
2810 		*remembered = lock;
2811 		return (remembered);
2812 	}
2813 
2814 	/*
2815 	 * No entry available.  Allocate more space, converting
2816 	 * the single entry into an array of entries if necessary.
2817 	 */
2818 	if ((nlocks = self->ul_heldlockcnt) == 0) {
2819 		/*
2820 		 * Initial allocation of the array.
2821 		 * Convert the single entry into an array.
2822 		 */
2823 		self->ul_heldlockcnt = nlocks = INITIAL_LOCKS;
2824 		lockptr = lmalloc(nlocks * sizeof (mutex_t *));
2825 		/*
2826 		 * The single entry becomes the first entry in the array.
2827 		 */
2828 		*lockptr = self->ul_heldlocks.single;
2829 		self->ul_heldlocks.array = lockptr;
2830 		/*
2831 		 * Return the next available entry in the array.
2832 		 */
2833 		*++lockptr = lock;
2834 		return (lockptr);
2835 	}
2836 	/*
2837 	 * Reallocate the array, double the size each time.
2838 	 */
2839 	lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *));
2840 	(void) memcpy(lockptr, self->ul_heldlocks.array,
2841 	    nlocks * sizeof (mutex_t *));
2842 	lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
2843 	self->ul_heldlocks.array = lockptr;
2844 	self->ul_heldlockcnt *= 2;
2845 	/*
2846 	 * Return the next available entry in the newly allocated array.
2847 	 */
2848 	*(lockptr += nlocks) = lock;
2849 	return (lockptr);
2850 }
2851 
2852 /*
2853  * Insert 'lock' into our list of held locks.
2854  * Currently only used for LOCK_ROBUST mutexes.
2855  */
2856 void
2857 remember_lock(mutex_t *lock)
2858 {
2859 	(void) find_lock_entry(lock);
2860 }
2861 
2862 /*
2863  * Remove 'lock' from our list of held locks.
2864  * Currently only used for LOCK_ROBUST mutexes.
2865  */
2866 void
2867 forget_lock(mutex_t *lock)
2868 {
2869 	*find_lock_entry(lock) = NULL;
2870 }
2871 
2872 /*
2873  * Free the array of held locks.
2874  */
2875 void
2876 heldlock_free(ulwp_t *ulwp)
2877 {
2878 	uint_t nlocks;
2879 
2880 	if ((nlocks = ulwp->ul_heldlockcnt) != 0)
2881 		lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
2882 	ulwp->ul_heldlockcnt = 0;
2883 	ulwp->ul_heldlocks.array = NULL;
2884 }
2885 
2886 /*
2887  * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD.
2888  * Called from _thrp_exit() to deal with abandoned locks.
2889  */
2890 void
2891 heldlock_exit(void)
2892 {
2893 	ulwp_t *self = curthread;
2894 	mutex_t **lockptr;
2895 	uint_t nlocks;
2896 	mutex_t *mp;
2897 
2898 	if ((nlocks = self->ul_heldlockcnt) != 0)
2899 		lockptr = self->ul_heldlocks.array;
2900 	else {
2901 		nlocks = 1;
2902 		lockptr = &self->ul_heldlocks.single;
2903 	}
2904 
2905 	for (; nlocks; nlocks--, lockptr++) {
2906 		/*
2907 		 * The kernel takes care of transitioning held
2908 		 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD.
2909 		 * We avoid that case here.
2910 		 */
2911 		if ((mp = *lockptr) != NULL &&
2912 		    mutex_held(mp) &&
2913 		    (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) ==
2914 		    LOCK_ROBUST) {
2915 			mp->mutex_rcount = 0;
2916 			if (!(mp->mutex_flag & LOCK_UNMAPPED))
2917 				mp->mutex_flag |= LOCK_OWNERDEAD;
2918 			(void) mutex_unlock_internal(mp, 1);
2919 		}
2920 	}
2921 
2922 	heldlock_free(self);
2923 }
2924 
2925 #pragma weak _cond_init = cond_init
2926 /* ARGSUSED2 */
2927 int
2928 cond_init(cond_t *cvp, int type, void *arg)
2929 {
2930 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2931 		return (EINVAL);
2932 	(void) memset(cvp, 0, sizeof (*cvp));
2933 	cvp->cond_type = (uint16_t)type;
2934 	cvp->cond_magic = COND_MAGIC;
2935 	return (0);
2936 }
2937 
2938 /*
2939  * cond_sleep_queue(): utility function for cond_wait_queue().
2940  *
2941  * Go to sleep on a condvar sleep queue, expect to be waked up
2942  * by someone calling cond_signal() or cond_broadcast() or due
2943  * to receiving a UNIX signal or being cancelled, or just simply
2944  * due to a spurious wakeup (like someome calling forkall()).
2945  *
2946  * The associated mutex is *not* reacquired before returning.
2947  * That must be done by the caller of cond_sleep_queue().
2948  */
2949 static int
2950 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2951 {
2952 	ulwp_t *self = curthread;
2953 	queue_head_t *qp;
2954 	queue_head_t *mqp;
2955 	lwpid_t lwpid;
2956 	int signalled;
2957 	int error;
2958 	int cv_wake;
2959 	int release_all;
2960 
2961 	/*
2962 	 * Put ourself on the CV sleep queue, unlock the mutex, then
2963 	 * park ourself and unpark a candidate lwp to grab the mutex.
2964 	 * We must go onto the CV sleep queue before dropping the
2965 	 * mutex in order to guarantee atomicity of the operation.
2966 	 */
2967 	self->ul_sp = stkptr();
2968 	qp = queue_lock(cvp, CV);
2969 	enqueue(qp, self, 0);
2970 	cvp->cond_waiters_user = 1;
2971 	self->ul_cvmutex = mp;
2972 	self->ul_cv_wake = cv_wake = (tsp != NULL);
2973 	self->ul_signalled = 0;
2974 	if (mp->mutex_flag & LOCK_OWNERDEAD) {
2975 		mp->mutex_flag &= ~LOCK_OWNERDEAD;
2976 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
2977 	}
2978 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
2979 	lwpid = mutex_unlock_queue(mp, release_all);
2980 	for (;;) {
2981 		set_parking_flag(self, 1);
2982 		queue_unlock(qp);
2983 		if (lwpid != 0) {
2984 			lwpid = preempt_unpark(self, lwpid);
2985 			preempt(self);
2986 		}
2987 		/*
2988 		 * We may have a deferred signal present,
2989 		 * in which case we should return EINTR.
2990 		 * Also, we may have received a SIGCANCEL; if so
2991 		 * and we are cancelable we should return EINTR.
2992 		 * We force an immediate EINTR return from
2993 		 * __lwp_park() by turning our parking flag off.
2994 		 */
2995 		if (self->ul_cursig != 0 ||
2996 		    (self->ul_cancelable && self->ul_cancel_pending))
2997 			set_parking_flag(self, 0);
2998 		/*
2999 		 * __lwp_park() will return the residual time in tsp
3000 		 * if we are unparked before the timeout expires.
3001 		 */
3002 		error = __lwp_park(tsp, lwpid);
3003 		set_parking_flag(self, 0);
3004 		lwpid = 0;	/* unpark the other lwp only once */
3005 		/*
3006 		 * We were waked up by cond_signal(), cond_broadcast(),
3007 		 * by an interrupt or timeout (EINTR or ETIME),
3008 		 * or we may just have gotten a spurious wakeup.
3009 		 */
3010 		qp = queue_lock(cvp, CV);
3011 		if (!cv_wake)
3012 			mqp = queue_lock(mp, MX);
3013 		if (self->ul_sleepq == NULL)
3014 			break;
3015 		/*
3016 		 * We are on either the condvar sleep queue or the
3017 		 * mutex sleep queue.  Break out of the sleep if we
3018 		 * were interrupted or we timed out (EINTR or ETIME).
3019 		 * Else this is a spurious wakeup; continue the loop.
3020 		 */
3021 		if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */
3022 			if (error) {
3023 				mp->mutex_waiters = dequeue_self(mqp);
3024 				break;
3025 			}
3026 			tsp = NULL;	/* no more timeout */
3027 		} else if (self->ul_sleepq == qp) {	/* condvar queue */
3028 			if (error) {
3029 				cvp->cond_waiters_user = dequeue_self(qp);
3030 				break;
3031 			}
3032 			/*
3033 			 * Else a spurious wakeup on the condvar queue.
3034 			 * __lwp_park() has already adjusted the timeout.
3035 			 */
3036 		} else {
3037 			thr_panic("cond_sleep_queue(): thread not on queue");
3038 		}
3039 		if (!cv_wake)
3040 			queue_unlock(mqp);
3041 	}
3042 
3043 	self->ul_sp = 0;
3044 	self->ul_cv_wake = 0;
3045 	ASSERT(self->ul_cvmutex == NULL);
3046 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
3047 	    self->ul_wchan == NULL);
3048 
3049 	signalled = self->ul_signalled;
3050 	self->ul_signalled = 0;
3051 	queue_unlock(qp);
3052 	if (!cv_wake)
3053 		queue_unlock(mqp);
3054 
3055 	/*
3056 	 * If we were concurrently cond_signal()d and any of:
3057 	 * received a UNIX signal, were cancelled, or got a timeout,
3058 	 * then perform another cond_signal() to avoid consuming it.
3059 	 */
3060 	if (error && signalled)
3061 		(void) cond_signal(cvp);
3062 
3063 	return (error);
3064 }
3065 
3066 int
3067 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3068 {
3069 	ulwp_t *self = curthread;
3070 	int error;
3071 	int merror;
3072 
3073 	/*
3074 	 * The old thread library was programmed to defer signals
3075 	 * while in cond_wait() so that the associated mutex would
3076 	 * be guaranteed to be held when the application signal
3077 	 * handler was invoked.
3078 	 *
3079 	 * We do not behave this way by default; the state of the
3080 	 * associated mutex in the signal handler is undefined.
3081 	 *
3082 	 * To accommodate applications that depend on the old
3083 	 * behavior, the _THREAD_COND_WAIT_DEFER environment
3084 	 * variable can be set to 1 and we will behave in the
3085 	 * old way with respect to cond_wait().
3086 	 */
3087 	if (self->ul_cond_wait_defer)
3088 		sigoff(self);
3089 
3090 	error = cond_sleep_queue(cvp, mp, tsp);
3091 
3092 	/*
3093 	 * Reacquire the mutex.
3094 	 */
3095 	if ((merror = mutex_lock_impl(mp, NULL)) != 0)
3096 		error = merror;
3097 
3098 	/*
3099 	 * Take any deferred signal now, after we have reacquired the mutex.
3100 	 */
3101 	if (self->ul_cond_wait_defer)
3102 		sigon(self);
3103 
3104 	return (error);
3105 }
3106 
3107 /*
3108  * cond_sleep_kernel(): utility function for cond_wait_kernel().
3109  * See the comment ahead of cond_sleep_queue(), above.
3110  */
3111 static int
3112 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3113 {
3114 	int mtype = mp->mutex_type;
3115 	ulwp_t *self = curthread;
3116 	int error;
3117 
3118 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
3119 		_ceil_prio_waive();
3120 
3121 	self->ul_sp = stkptr();
3122 	self->ul_wchan = cvp;
3123 	mp->mutex_owner = 0;
3124 	/* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */
3125 	if (mtype & LOCK_PRIO_INHERIT) {
3126 		mp->mutex_lockw = LOCKCLEAR;
3127 		self->ul_pilocks--;
3128 	}
3129 	/*
3130 	 * ___lwp_cond_wait() returns immediately with EINTR if
3131 	 * set_parking_flag(self,0) is called on this lwp before it
3132 	 * goes to sleep in the kernel.  sigacthandler() calls this
3133 	 * when a deferred signal is noted.  This assures that we don't
3134 	 * get stuck in ___lwp_cond_wait() with all signals blocked
3135 	 * due to taking a deferred signal before going to sleep.
3136 	 */
3137 	set_parking_flag(self, 1);
3138 	if (self->ul_cursig != 0 ||
3139 	    (self->ul_cancelable && self->ul_cancel_pending))
3140 		set_parking_flag(self, 0);
3141 	error = ___lwp_cond_wait(cvp, mp, tsp, 1);
3142 	set_parking_flag(self, 0);
3143 	self->ul_sp = 0;
3144 	self->ul_wchan = NULL;
3145 	return (error);
3146 }
3147 
3148 int
3149 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3150 {
3151 	ulwp_t *self = curthread;
3152 	int error;
3153 	int merror;
3154 
3155 	/*
3156 	 * See the large comment in cond_wait_queue(), above.
3157 	 */
3158 	if (self->ul_cond_wait_defer)
3159 		sigoff(self);
3160 
3161 	error = cond_sleep_kernel(cvp, mp, tsp);
3162 
3163 	/*
3164 	 * Override the return code from ___lwp_cond_wait()
3165 	 * with any non-zero return code from mutex_lock().
3166 	 * This addresses robust lock failures in particular;
3167 	 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE
3168 	 * errors in order to take corrective action.
3169 	 */
3170 	if ((merror = mutex_lock_impl(mp, NULL)) != 0)
3171 		error = merror;
3172 
3173 	/*
3174 	 * Take any deferred signal now, after we have reacquired the mutex.
3175 	 */
3176 	if (self->ul_cond_wait_defer)
3177 		sigon(self);
3178 
3179 	return (error);
3180 }
3181 
3182 /*
3183  * Common code for cond_wait() and cond_timedwait()
3184  */
3185 int
3186 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3187 {
3188 	int mtype = mp->mutex_type;
3189 	hrtime_t begin_sleep = 0;
3190 	ulwp_t *self = curthread;
3191 	uberdata_t *udp = self->ul_uberdata;
3192 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3193 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
3194 	uint8_t rcount;
3195 	int error = 0;
3196 
3197 	/*
3198 	 * The SUSV3 Posix spec for pthread_cond_timedwait() states:
3199 	 *	Except in the case of [ETIMEDOUT], all these error checks
3200 	 *	shall act as if they were performed immediately at the
3201 	 *	beginning of processing for the function and shall cause
3202 	 *	an error return, in effect, prior to modifying the state
3203 	 *	of the mutex specified by mutex or the condition variable
3204 	 *	specified by cond.
3205 	 * Therefore, we must return EINVAL now if the timout is invalid.
3206 	 */
3207 	if (tsp != NULL &&
3208 	    (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC))
3209 		return (EINVAL);
3210 
3211 	if (__td_event_report(self, TD_SLEEP, udp)) {
3212 		self->ul_sp = stkptr();
3213 		self->ul_wchan = cvp;
3214 		self->ul_td_evbuf.eventnum = TD_SLEEP;
3215 		self->ul_td_evbuf.eventdata = cvp;
3216 		tdb_event(TD_SLEEP, udp);
3217 		self->ul_sp = 0;
3218 	}
3219 	if (csp) {
3220 		if (tsp)
3221 			tdb_incr(csp->cond_timedwait);
3222 		else
3223 			tdb_incr(csp->cond_wait);
3224 	}
3225 	if (msp)
3226 		begin_sleep = record_hold_time(msp);
3227 	else if (csp)
3228 		begin_sleep = gethrtime();
3229 
3230 	if (self->ul_error_detection) {
3231 		if (!mutex_held(mp))
3232 			lock_error(mp, "cond_wait", cvp, NULL);
3233 		if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0)
3234 			lock_error(mp, "recursive mutex in cond_wait",
3235 			    cvp, NULL);
3236 		if (cvp->cond_type & USYNC_PROCESS) {
3237 			if (!(mtype & USYNC_PROCESS))
3238 				lock_error(mp, "cond_wait", cvp,
3239 				    "condvar process-shared, "
3240 				    "mutex process-private");
3241 		} else {
3242 			if (mtype & USYNC_PROCESS)
3243 				lock_error(mp, "cond_wait", cvp,
3244 				    "condvar process-private, "
3245 				    "mutex process-shared");
3246 		}
3247 	}
3248 
3249 	/*
3250 	 * We deal with recursive mutexes by completely
3251 	 * dropping the lock and restoring the recursion
3252 	 * count after waking up.  This is arguably wrong,
3253 	 * but it obeys the principle of least astonishment.
3254 	 */
3255 	rcount = mp->mutex_rcount;
3256 	mp->mutex_rcount = 0;
3257 	if ((mtype &
3258 	    (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) |
3259 	    (cvp->cond_type & USYNC_PROCESS))
3260 		error = cond_wait_kernel(cvp, mp, tsp);
3261 	else
3262 		error = cond_wait_queue(cvp, mp, tsp);
3263 	mp->mutex_rcount = rcount;
3264 
3265 	if (csp) {
3266 		hrtime_t lapse = gethrtime() - begin_sleep;
3267 		if (tsp == NULL)
3268 			csp->cond_wait_sleep_time += lapse;
3269 		else {
3270 			csp->cond_timedwait_sleep_time += lapse;
3271 			if (error == ETIME)
3272 				tdb_incr(csp->cond_timedwait_timeout);
3273 		}
3274 	}
3275 	return (error);
3276 }
3277 
3278 /*
3279  * cond_wait() is a cancellation point but __cond_wait() is not.
3280  * Internally, libc calls the non-cancellation version.
3281  * Other libraries need to use pthread_setcancelstate(), as appropriate,
3282  * since __cond_wait() is not exported from libc.
3283  */
3284 int
3285 __cond_wait(cond_t *cvp, mutex_t *mp)
3286 {
3287 	ulwp_t *self = curthread;
3288 	uberdata_t *udp = self->ul_uberdata;
3289 	uberflags_t *gflags;
3290 
3291 	/*
3292 	 * Optimize the common case of USYNC_THREAD plus
3293 	 * no error detection, no lock statistics, and no event tracing.
3294 	 */
3295 	if ((gflags = self->ul_schedctl_called) != NULL &&
3296 	    (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted |
3297 	    self->ul_td_events_enable |
3298 	    udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0)
3299 		return (cond_wait_queue(cvp, mp, NULL));
3300 
3301 	/*
3302 	 * Else do it the long way.
3303 	 */
3304 	return (cond_wait_common(cvp, mp, NULL));
3305 }
3306 
3307 #pragma weak _cond_wait = cond_wait
3308 int
3309 cond_wait(cond_t *cvp, mutex_t *mp)
3310 {
3311 	int error;
3312 
3313 	_cancelon();
3314 	error = __cond_wait(cvp, mp);
3315 	if (error == EINTR)
3316 		_canceloff();
3317 	else
3318 		_canceloff_nocancel();
3319 	return (error);
3320 }
3321 
3322 /*
3323  * pthread_cond_wait() is a cancellation point.
3324  */
3325 int
3326 pthread_cond_wait(pthread_cond_t *_RESTRICT_KYWD cvp,
3327 	pthread_mutex_t *_RESTRICT_KYWD mp)
3328 {
3329 	int error;
3330 
3331 	error = cond_wait((cond_t *)cvp, (mutex_t *)mp);
3332 	return ((error == EINTR)? 0 : error);
3333 }
3334 
3335 /*
3336  * cond_timedwait() is a cancellation point but __cond_timedwait() is not.
3337  */
3338 int
3339 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3340 {
3341 	clockid_t clock_id = cvp->cond_clockid;
3342 	timespec_t reltime;
3343 	int error;
3344 
3345 	if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES)
3346 		clock_id = CLOCK_REALTIME;
3347 	abstime_to_reltime(clock_id, abstime, &reltime);
3348 	error = cond_wait_common(cvp, mp, &reltime);
3349 	if (error == ETIME && clock_id == CLOCK_HIGHRES) {
3350 		/*
3351 		 * Don't return ETIME if we didn't really get a timeout.
3352 		 * This can happen if we return because someone resets
3353 		 * the system clock.  Just return zero in this case,
3354 		 * giving a spurious wakeup but not a timeout.
3355 		 */
3356 		if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC +
3357 		    abstime->tv_nsec > gethrtime())
3358 			error = 0;
3359 	}
3360 	return (error);
3361 }
3362 
3363 int
3364 cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3365 {
3366 	int error;
3367 
3368 	_cancelon();
3369 	error = __cond_timedwait(cvp, mp, abstime);
3370 	if (error == EINTR)
3371 		_canceloff();
3372 	else
3373 		_canceloff_nocancel();
3374 	return (error);
3375 }
3376 
3377 /*
3378  * pthread_cond_timedwait() is a cancellation point.
3379  */
3380 int
3381 pthread_cond_timedwait(pthread_cond_t *_RESTRICT_KYWD cvp,
3382 	pthread_mutex_t *_RESTRICT_KYWD mp,
3383 	const struct timespec *_RESTRICT_KYWD abstime)
3384 {
3385 	int error;
3386 
3387 	error = cond_timedwait((cond_t *)cvp, (mutex_t *)mp, abstime);
3388 	if (error == ETIME)
3389 		error = ETIMEDOUT;
3390 	else if (error == EINTR)
3391 		error = 0;
3392 	return (error);
3393 }
3394 
3395 /*
3396  * cond_reltimedwait() is a cancellation point but __cond_reltimedwait() is not.
3397  */
3398 int
3399 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
3400 {
3401 	timespec_t tslocal = *reltime;
3402 
3403 	return (cond_wait_common(cvp, mp, &tslocal));
3404 }
3405 
3406 int
3407 cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
3408 {
3409 	int error;
3410 
3411 	_cancelon();
3412 	error = __cond_reltimedwait(cvp, mp, reltime);
3413 	if (error == EINTR)
3414 		_canceloff();
3415 	else
3416 		_canceloff_nocancel();
3417 	return (error);
3418 }
3419 
3420 int
3421 pthread_cond_reltimedwait_np(pthread_cond_t *_RESTRICT_KYWD cvp,
3422 	pthread_mutex_t *_RESTRICT_KYWD mp,
3423 	const struct timespec *_RESTRICT_KYWD reltime)
3424 {
3425 	int error;
3426 
3427 	error = cond_reltimedwait((cond_t *)cvp, (mutex_t *)mp, reltime);
3428 	if (error == ETIME)
3429 		error = ETIMEDOUT;
3430 	else if (error == EINTR)
3431 		error = 0;
3432 	return (error);
3433 }
3434 
3435 #pragma weak pthread_cond_signal = cond_signal
3436 #pragma weak _cond_signal = cond_signal
3437 int
3438 cond_signal(cond_t *cvp)
3439 {
3440 	ulwp_t *self = curthread;
3441 	uberdata_t *udp = self->ul_uberdata;
3442 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3443 	int error = 0;
3444 	int more;
3445 	lwpid_t lwpid;
3446 	queue_head_t *qp;
3447 	mutex_t *mp;
3448 	queue_head_t *mqp;
3449 	ulwp_t **ulwpp;
3450 	ulwp_t *ulwp;
3451 	ulwp_t *prev;
3452 
3453 	if (csp)
3454 		tdb_incr(csp->cond_signal);
3455 
3456 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
3457 		error = _lwp_cond_signal(cvp);
3458 
3459 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
3460 		return (error);
3461 
3462 	/*
3463 	 * Move someone from the condvar sleep queue to the mutex sleep
3464 	 * queue for the mutex that he will acquire on being waked up.
3465 	 * We can do this only if we own the mutex he will acquire.
3466 	 * If we do not own the mutex, or if his ul_cv_wake flag
3467 	 * is set, just dequeue and unpark him.
3468 	 */
3469 	qp = queue_lock(cvp, CV);
3470 	ulwpp = queue_slot(qp, &prev, &more);
3471 	cvp->cond_waiters_user = more;
3472 	if (ulwpp == NULL) {	/* no one on the sleep queue */
3473 		queue_unlock(qp);
3474 		return (error);
3475 	}
3476 	ulwp = *ulwpp;
3477 
3478 	/*
3479 	 * Inform the thread that he was the recipient of a cond_signal().
3480 	 * This lets him deal with cond_signal() and, concurrently,
3481 	 * one or more of a cancellation, a UNIX signal, or a timeout.
3482 	 * These latter conditions must not consume a cond_signal().
3483 	 */
3484 	ulwp->ul_signalled = 1;
3485 
3486 	/*
3487 	 * Dequeue the waiter but leave his ul_sleepq non-NULL
3488 	 * while we move him to the mutex queue so that he can
3489 	 * deal properly with spurious wakeups.
3490 	 */
3491 	queue_unlink(qp, ulwpp, prev);
3492 
3493 	mp = ulwp->ul_cvmutex;		/* the mutex he will acquire */
3494 	ulwp->ul_cvmutex = NULL;
3495 	ASSERT(mp != NULL);
3496 
3497 	if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3498 		/* just wake him up */
3499 		lwpid = ulwp->ul_lwpid;
3500 		no_preempt(self);
3501 		ulwp->ul_sleepq = NULL;
3502 		ulwp->ul_wchan = NULL;
3503 		queue_unlock(qp);
3504 		(void) __lwp_unpark(lwpid);
3505 		preempt(self);
3506 	} else {
3507 		/* move him to the mutex queue */
3508 		mqp = queue_lock(mp, MX);
3509 		enqueue(mqp, ulwp, 0);
3510 		mp->mutex_waiters = 1;
3511 		queue_unlock(mqp);
3512 		queue_unlock(qp);
3513 	}
3514 
3515 	return (error);
3516 }
3517 
3518 /*
3519  * Utility function called by mutex_wakeup_all(), cond_broadcast(),
3520  * and rw_queue_release() to (re)allocate a big buffer to hold the
3521  * lwpids of all the threads to be set running after they are removed
3522  * from their sleep queues.  Since we are holding a queue lock, we
3523  * cannot call any function that might acquire a lock.  mmap(), munmap(),
3524  * lwp_unpark_all() are simple system calls and are safe in this regard.
3525  */
3526 lwpid_t *
3527 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr)
3528 {
3529 	/*
3530 	 * Allocate NEWLWPS ids on the first overflow.
3531 	 * Double the allocation each time after that.
3532 	 */
3533 	int nlwpid = *nlwpid_ptr;
3534 	int maxlwps = *maxlwps_ptr;
3535 	int first_allocation;
3536 	int newlwps;
3537 	void *vaddr;
3538 
3539 	ASSERT(nlwpid == maxlwps);
3540 
3541 	first_allocation = (maxlwps == MAXLWPS);
3542 	newlwps = first_allocation? NEWLWPS : 2 * maxlwps;
3543 	vaddr = mmap(NULL, newlwps * sizeof (lwpid_t),
3544 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
3545 
3546 	if (vaddr == MAP_FAILED) {
3547 		/*
3548 		 * Let's hope this never happens.
3549 		 * If it does, then we have a terrible
3550 		 * thundering herd on our hands.
3551 		 */
3552 		(void) __lwp_unpark_all(lwpid, nlwpid);
3553 		*nlwpid_ptr = 0;
3554 	} else {
3555 		(void) memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t));
3556 		if (!first_allocation)
3557 			(void) munmap((caddr_t)lwpid,
3558 			    maxlwps * sizeof (lwpid_t));
3559 		lwpid = vaddr;
3560 		*maxlwps_ptr = newlwps;
3561 	}
3562 
3563 	return (lwpid);
3564 }
3565 
3566 #pragma weak pthread_cond_broadcast = cond_broadcast
3567 #pragma weak _cond_broadcast = cond_broadcast
3568 int
3569 cond_broadcast(cond_t *cvp)
3570 {
3571 	ulwp_t *self = curthread;
3572 	uberdata_t *udp = self->ul_uberdata;
3573 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3574 	int error = 0;
3575 	queue_head_t *qp;
3576 	queue_root_t *qrp;
3577 	mutex_t *mp;
3578 	mutex_t *mp_cache = NULL;
3579 	queue_head_t *mqp = NULL;
3580 	ulwp_t *ulwp;
3581 	int nlwpid = 0;
3582 	int maxlwps = MAXLWPS;
3583 	lwpid_t buffer[MAXLWPS];
3584 	lwpid_t *lwpid = buffer;
3585 
3586 	if (csp)
3587 		tdb_incr(csp->cond_broadcast);
3588 
3589 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
3590 		error = _lwp_cond_broadcast(cvp);
3591 
3592 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
3593 		return (error);
3594 
3595 	/*
3596 	 * Move everyone from the condvar sleep queue to the mutex sleep
3597 	 * queue for the mutex that they will acquire on being waked up.
3598 	 * We can do this only if we own the mutex they will acquire.
3599 	 * If we do not own the mutex, or if their ul_cv_wake flag
3600 	 * is set, just dequeue and unpark them.
3601 	 *
3602 	 * We keep track of lwpids that are to be unparked in lwpid[].
3603 	 * __lwp_unpark_all() is called to unpark all of them after
3604 	 * they have been removed from the sleep queue and the sleep
3605 	 * queue lock has been dropped.  If we run out of space in our
3606 	 * on-stack buffer, we need to allocate more but we can't call
3607 	 * lmalloc() because we are holding a queue lock when the overflow
3608 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
3609 	 * either because the application may have allocated a small
3610 	 * stack and we don't want to overrun the stack.  So we call
3611 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
3612 	 * system call directly since that path acquires no locks.
3613 	 */
3614 	qp = queue_lock(cvp, CV);
3615 	cvp->cond_waiters_user = 0;
3616 	for (;;) {
3617 		if ((qrp = qp->qh_root) == NULL ||
3618 		    (ulwp = qrp->qr_head) == NULL)
3619 			break;
3620 		ASSERT(ulwp->ul_wchan == cvp);
3621 		queue_unlink(qp, &qrp->qr_head, NULL);
3622 		mp = ulwp->ul_cvmutex;		/* his mutex */
3623 		ulwp->ul_cvmutex = NULL;
3624 		ASSERT(mp != NULL);
3625 		if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3626 			/* just wake him up */
3627 			ulwp->ul_sleepq = NULL;
3628 			ulwp->ul_wchan = NULL;
3629 			if (nlwpid == maxlwps)
3630 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
3631 			lwpid[nlwpid++] = ulwp->ul_lwpid;
3632 		} else {
3633 			/* move him to the mutex queue */
3634 			if (mp != mp_cache) {
3635 				mp_cache = mp;
3636 				if (mqp != NULL)
3637 					queue_unlock(mqp);
3638 				mqp = queue_lock(mp, MX);
3639 			}
3640 			enqueue(mqp, ulwp, 0);
3641 			mp->mutex_waiters = 1;
3642 		}
3643 	}
3644 	if (mqp != NULL)
3645 		queue_unlock(mqp);
3646 	if (nlwpid == 0) {
3647 		queue_unlock(qp);
3648 	} else {
3649 		no_preempt(self);
3650 		queue_unlock(qp);
3651 		if (nlwpid == 1)
3652 			(void) __lwp_unpark(lwpid[0]);
3653 		else
3654 			(void) __lwp_unpark_all(lwpid, nlwpid);
3655 		preempt(self);
3656 	}
3657 	if (lwpid != buffer)
3658 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
3659 	return (error);
3660 }
3661 
3662 #pragma weak pthread_cond_destroy = cond_destroy
3663 int
3664 cond_destroy(cond_t *cvp)
3665 {
3666 	cvp->cond_magic = 0;
3667 	tdb_sync_obj_deregister(cvp);
3668 	return (0);
3669 }
3670 
3671 #if defined(THREAD_DEBUG)
3672 void
3673 assert_no_libc_locks_held(void)
3674 {
3675 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
3676 }
3677 
3678 /* protected by link_lock */
3679 uint64_t spin_lock_spin;
3680 uint64_t spin_lock_spin2;
3681 uint64_t spin_lock_sleep;
3682 uint64_t spin_lock_wakeup;
3683 
3684 /*
3685  * Record spin lock statistics.
3686  * Called by a thread exiting itself in thrp_exit().
3687  * Also called via atexit() from the thread calling
3688  * exit() to do all the other threads as well.
3689  */
3690 void
3691 record_spin_locks(ulwp_t *ulwp)
3692 {
3693 	spin_lock_spin += ulwp->ul_spin_lock_spin;
3694 	spin_lock_spin2 += ulwp->ul_spin_lock_spin2;
3695 	spin_lock_sleep += ulwp->ul_spin_lock_sleep;
3696 	spin_lock_wakeup += ulwp->ul_spin_lock_wakeup;
3697 	ulwp->ul_spin_lock_spin = 0;
3698 	ulwp->ul_spin_lock_spin2 = 0;
3699 	ulwp->ul_spin_lock_sleep = 0;
3700 	ulwp->ul_spin_lock_wakeup = 0;
3701 }
3702 
3703 /*
3704  * atexit function:  dump the queue statistics to stderr.
3705  */
3706 #include <stdio.h>
3707 void
3708 dump_queue_statistics(void)
3709 {
3710 	uberdata_t *udp = curthread->ul_uberdata;
3711 	queue_head_t *qp;
3712 	int qn;
3713 	uint64_t spin_lock_total = 0;
3714 
3715 	if (udp->queue_head == NULL || thread_queue_dump == 0)
3716 		return;
3717 
3718 	if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 ||
3719 	    fprintf(stderr, "queue#   lockcount    max qlen    max hlen\n") < 0)
3720 		return;
3721 	for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) {
3722 		if (qp->qh_lockcount == 0)
3723 			continue;
3724 		spin_lock_total += qp->qh_lockcount;
3725 		if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn,
3726 		    (u_longlong_t)qp->qh_lockcount,
3727 		    qp->qh_qmax, qp->qh_hmax) < 0)
3728 			return;
3729 	}
3730 
3731 	if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 ||
3732 	    fprintf(stderr, "queue#   lockcount    max qlen    max hlen\n") < 0)
3733 		return;
3734 	for (qn = 0; qn < QHASHSIZE; qn++, qp++) {
3735 		if (qp->qh_lockcount == 0)
3736 			continue;
3737 		spin_lock_total += qp->qh_lockcount;
3738 		if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn,
3739 		    (u_longlong_t)qp->qh_lockcount,
3740 		    qp->qh_qmax, qp->qh_hmax) < 0)
3741 			return;
3742 	}
3743 
3744 	(void) fprintf(stderr, "\n  spin_lock_total  = %10llu\n",
3745 	    (u_longlong_t)spin_lock_total);
3746 	(void) fprintf(stderr, "  spin_lock_spin   = %10llu\n",
3747 	    (u_longlong_t)spin_lock_spin);
3748 	(void) fprintf(stderr, "  spin_lock_spin2  = %10llu\n",
3749 	    (u_longlong_t)spin_lock_spin2);
3750 	(void) fprintf(stderr, "  spin_lock_sleep  = %10llu\n",
3751 	    (u_longlong_t)spin_lock_sleep);
3752 	(void) fprintf(stderr, "  spin_lock_wakeup = %10llu\n",
3753 	    (u_longlong_t)spin_lock_wakeup);
3754 }
3755 #endif
3756