xref: /titanic_52/usr/src/lib/libc/port/threads/synch.c (revision bea83d026ee1bd1b2a2419e1d0232f107a5d7d9b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/sdt.h>
30 
31 #include "lint.h"
32 #include "thr_uberdata.h"
33 
34 /*
35  * This mutex is initialized to be held by lwp#1.
36  * It is used to block a thread that has returned from a mutex_lock()
37  * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error.
38  */
39 mutex_t	stall_mutex = DEFAULTMUTEX;
40 
41 static int shared_mutex_held(mutex_t *);
42 static int mutex_unlock_internal(mutex_t *, int);
43 static int mutex_queuelock_adaptive(mutex_t *);
44 static void mutex_wakeup_all(mutex_t *);
45 
46 /*
47  * Lock statistics support functions.
48  */
49 void
50 record_begin_hold(tdb_mutex_stats_t *msp)
51 {
52 	tdb_incr(msp->mutex_lock);
53 	msp->mutex_begin_hold = gethrtime();
54 }
55 
56 hrtime_t
57 record_hold_time(tdb_mutex_stats_t *msp)
58 {
59 	hrtime_t now = gethrtime();
60 
61 	if (msp->mutex_begin_hold)
62 		msp->mutex_hold_time += now - msp->mutex_begin_hold;
63 	msp->mutex_begin_hold = 0;
64 	return (now);
65 }
66 
67 /*
68  * Called once at library initialization.
69  */
70 void
71 mutex_setup(void)
72 {
73 	if (set_lock_byte(&stall_mutex.mutex_lockw))
74 		thr_panic("mutex_setup() cannot acquire stall_mutex");
75 	stall_mutex.mutex_owner = (uintptr_t)curthread;
76 }
77 
78 /*
79  * The default spin count of 1000 is experimentally determined.
80  * On sun4u machines with any number of processors it could be raised
81  * to 10,000 but that (experimentally) makes almost no difference.
82  * The environment variable:
83  *	_THREAD_ADAPTIVE_SPIN=count
84  * can be used to override and set the count in the range [0 .. 1,000,000].
85  */
86 int	thread_adaptive_spin = 1000;
87 uint_t	thread_max_spinners = 100;
88 int	thread_queue_verify = 0;
89 static	int	ncpus;
90 
91 /*
92  * Distinguish spinning for queue locks from spinning for regular locks.
93  * We try harder to acquire queue locks by spinning.
94  * The environment variable:
95  *	_THREAD_QUEUE_SPIN=count
96  * can be used to override and set the count in the range [0 .. 1,000,000].
97  */
98 int	thread_queue_spin = 10000;
99 
100 #define	ALL_ATTRIBUTES				\
101 	(LOCK_RECURSIVE | LOCK_ERRORCHECK |	\
102 	LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT |	\
103 	LOCK_ROBUST)
104 
105 /*
106  * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST,
107  * augmented by zero or more the flags:
108  *	LOCK_RECURSIVE
109  *	LOCK_ERRORCHECK
110  *	LOCK_PRIO_INHERIT
111  *	LOCK_PRIO_PROTECT
112  *	LOCK_ROBUST
113  */
114 #pragma weak _private_mutex_init = __mutex_init
115 #pragma weak mutex_init = __mutex_init
116 #pragma weak _mutex_init = __mutex_init
117 /* ARGSUSED2 */
118 int
119 __mutex_init(mutex_t *mp, int type, void *arg)
120 {
121 	int basetype = (type & ~ALL_ATTRIBUTES);
122 	int error = 0;
123 
124 	if (basetype == USYNC_PROCESS_ROBUST) {
125 		/*
126 		 * USYNC_PROCESS_ROBUST is a deprecated historical type.
127 		 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but
128 		 * retain the USYNC_PROCESS_ROBUST flag so we can return
129 		 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST
130 		 * mutexes will ever draw ELOCKUNMAPPED).
131 		 */
132 		type |= (USYNC_PROCESS | LOCK_ROBUST);
133 		basetype = USYNC_PROCESS;
134 	}
135 
136 	if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) ||
137 	    (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT))
138 	    == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) {
139 		error = EINVAL;
140 	} else if (type & LOCK_ROBUST) {
141 		/*
142 		 * Callers of mutex_init() with the LOCK_ROBUST attribute
143 		 * are required to pass an initially all-zero mutex.
144 		 * Multiple calls to mutex_init() are allowed; all but
145 		 * the first return EBUSY.  A call to mutex_init() is
146 		 * allowed to make an inconsistent robust lock consistent
147 		 * (for historical usage, even though the proper interface
148 		 * for this is mutex_consistent()).  Note that we use
149 		 * atomic_or_16() to set the LOCK_INITED flag so as
150 		 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc).
151 		 */
152 		extern void _atomic_or_16(volatile uint16_t *, uint16_t);
153 		if (!(mp->mutex_flag & LOCK_INITED)) {
154 			mp->mutex_type = (uint8_t)type;
155 			_atomic_or_16(&mp->mutex_flag, LOCK_INITED);
156 			mp->mutex_magic = MUTEX_MAGIC;
157 		} else if (type != mp->mutex_type ||
158 		    ((type & LOCK_PRIO_PROTECT) &&
159 		    mp->mutex_ceiling != (*(int *)arg))) {
160 			error = EINVAL;
161 		} else if (__mutex_consistent(mp) != 0) {
162 			error = EBUSY;
163 		}
164 		/* register a process robust mutex with the kernel */
165 		if (basetype == USYNC_PROCESS)
166 			register_lock(mp);
167 	} else {
168 		(void) _memset(mp, 0, sizeof (*mp));
169 		mp->mutex_type = (uint8_t)type;
170 		mp->mutex_flag = LOCK_INITED;
171 		mp->mutex_magic = MUTEX_MAGIC;
172 	}
173 
174 	if (error == 0 && (type & LOCK_PRIO_PROTECT))
175 		mp->mutex_ceiling = (uint8_t)(*(int *)arg);
176 
177 	return (error);
178 }
179 
180 /*
181  * Delete mp from list of ceil mutexes owned by curthread.
182  * Return 1 if the head of the chain was updated.
183  */
184 int
185 _ceil_mylist_del(mutex_t *mp)
186 {
187 	ulwp_t *self = curthread;
188 	mxchain_t **mcpp;
189 	mxchain_t *mcp;
190 
191 	mcpp = &self->ul_mxchain;
192 	while ((*mcpp)->mxchain_mx != mp)
193 		mcpp = &(*mcpp)->mxchain_next;
194 	mcp = *mcpp;
195 	*mcpp = mcp->mxchain_next;
196 	lfree(mcp, sizeof (*mcp));
197 	return (mcpp == &self->ul_mxchain);
198 }
199 
200 /*
201  * Add mp to head of list of ceil mutexes owned by curthread.
202  * Return ENOMEM if no memory could be allocated.
203  */
204 int
205 _ceil_mylist_add(mutex_t *mp)
206 {
207 	ulwp_t *self = curthread;
208 	mxchain_t *mcp;
209 
210 	if ((mcp = lmalloc(sizeof (*mcp))) == NULL)
211 		return (ENOMEM);
212 	mcp->mxchain_mx = mp;
213 	mcp->mxchain_next = self->ul_mxchain;
214 	self->ul_mxchain = mcp;
215 	return (0);
216 }
217 
218 /*
219  * Inherit priority from ceiling.  The inheritance impacts the effective
220  * priority, not the assigned priority.  See _thread_setschedparam_main().
221  */
222 void
223 _ceil_prio_inherit(int ceil)
224 {
225 	ulwp_t *self = curthread;
226 	struct sched_param param;
227 
228 	(void) _memset(&param, 0, sizeof (param));
229 	param.sched_priority = ceil;
230 	if (_thread_setschedparam_main(self->ul_lwpid,
231 	    self->ul_policy, &param, PRIO_INHERIT)) {
232 		/*
233 		 * Panic since unclear what error code to return.
234 		 * If we do return the error codes returned by above
235 		 * called routine, update the man page...
236 		 */
237 		thr_panic("_thread_setschedparam_main() fails");
238 	}
239 }
240 
241 /*
242  * Waive inherited ceiling priority.  Inherit from head of owned ceiling locks
243  * if holding at least one ceiling lock.  If no ceiling locks are held at this
244  * point, disinherit completely, reverting back to assigned priority.
245  */
246 void
247 _ceil_prio_waive(void)
248 {
249 	ulwp_t *self = curthread;
250 	struct sched_param param;
251 
252 	(void) _memset(&param, 0, sizeof (param));
253 	if (self->ul_mxchain == NULL) {
254 		/*
255 		 * No ceil locks held.  Zero the epri, revert back to ul_pri.
256 		 * Since thread's hash lock is not held, one cannot just
257 		 * read ul_pri here...do it in the called routine...
258 		 */
259 		param.sched_priority = self->ul_pri;	/* ignored */
260 		if (_thread_setschedparam_main(self->ul_lwpid,
261 		    self->ul_policy, &param, PRIO_DISINHERIT))
262 			thr_panic("_thread_setschedparam_main() fails");
263 	} else {
264 		/*
265 		 * Set priority to that of the mutex at the head
266 		 * of the ceilmutex chain.
267 		 */
268 		param.sched_priority =
269 		    self->ul_mxchain->mxchain_mx->mutex_ceiling;
270 		if (_thread_setschedparam_main(self->ul_lwpid,
271 		    self->ul_policy, &param, PRIO_INHERIT))
272 			thr_panic("_thread_setschedparam_main() fails");
273 	}
274 }
275 
276 /*
277  * Clear the lock byte.  Retain the waiters byte and the spinners byte.
278  * Return the old value of the lock word.
279  */
280 static uint32_t
281 clear_lockbyte(volatile uint32_t *lockword)
282 {
283 	uint32_t old;
284 	uint32_t new;
285 
286 	do {
287 		old = *lockword;
288 		new = old & ~LOCKMASK;
289 	} while (atomic_cas_32(lockword, old, new) != old);
290 
291 	return (old);
292 }
293 
294 /*
295  * Increment the spinners count in the mutex lock word.
296  * Return 0 on success.  Return -1 if the count would overflow.
297  */
298 static int
299 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners)
300 {
301 	uint32_t old;
302 	uint32_t new;
303 
304 	do {
305 		old = *lockword;
306 		if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners)
307 			return (-1);
308 		new = old + (1 << SPINNERSHIFT);
309 	} while (atomic_cas_32(lockword, old, new) != old);
310 
311 	return (0);
312 }
313 
314 /*
315  * Decrement the spinners count in the mutex lock word.
316  * Return the new value of the lock word.
317  */
318 static uint32_t
319 spinners_decr(volatile uint32_t *lockword)
320 {
321 	uint32_t old;
322 	uint32_t new;
323 
324 	do {
325 		new = old = *lockword;
326 		if (new & SPINNERMASK)
327 			new -= (1 << SPINNERSHIFT);
328 	} while (atomic_cas_32(lockword, old, new) != old);
329 
330 	return (new);
331 }
332 
333 /*
334  * Non-preemptive spin locks.  Used by queue_lock().
335  * No lock statistics are gathered for these locks.
336  * No DTrace probes are provided for these locks.
337  */
338 void
339 spin_lock_set(mutex_t *mp)
340 {
341 	ulwp_t *self = curthread;
342 
343 	no_preempt(self);
344 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
345 		mp->mutex_owner = (uintptr_t)self;
346 		return;
347 	}
348 	/*
349 	 * Spin for a while, attempting to acquire the lock.
350 	 */
351 	if (self->ul_spin_lock_spin != UINT_MAX)
352 		self->ul_spin_lock_spin++;
353 	if (mutex_queuelock_adaptive(mp) == 0 ||
354 	    set_lock_byte(&mp->mutex_lockw) == 0) {
355 		mp->mutex_owner = (uintptr_t)self;
356 		return;
357 	}
358 	/*
359 	 * Try harder if we were previously at a no premption level.
360 	 */
361 	if (self->ul_preempt > 1) {
362 		if (self->ul_spin_lock_spin2 != UINT_MAX)
363 			self->ul_spin_lock_spin2++;
364 		if (mutex_queuelock_adaptive(mp) == 0 ||
365 		    set_lock_byte(&mp->mutex_lockw) == 0) {
366 			mp->mutex_owner = (uintptr_t)self;
367 			return;
368 		}
369 	}
370 	/*
371 	 * Give up and block in the kernel for the mutex.
372 	 */
373 	if (self->ul_spin_lock_sleep != UINT_MAX)
374 		self->ul_spin_lock_sleep++;
375 	(void) ___lwp_mutex_timedlock(mp, NULL);
376 	mp->mutex_owner = (uintptr_t)self;
377 }
378 
379 void
380 spin_lock_clear(mutex_t *mp)
381 {
382 	ulwp_t *self = curthread;
383 
384 	mp->mutex_owner = 0;
385 	if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) {
386 		(void) ___lwp_mutex_wakeup(mp, 0);
387 		if (self->ul_spin_lock_wakeup != UINT_MAX)
388 			self->ul_spin_lock_wakeup++;
389 	}
390 	preempt(self);
391 }
392 
393 /*
394  * Allocate the sleep queue hash table.
395  */
396 void
397 queue_alloc(void)
398 {
399 	ulwp_t *self = curthread;
400 	uberdata_t *udp = self->ul_uberdata;
401 	mutex_t *mp;
402 	void *data;
403 	int i;
404 
405 	/*
406 	 * No locks are needed; we call here only when single-threaded.
407 	 */
408 	ASSERT(self == udp->ulwp_one);
409 	ASSERT(!udp->uberflags.uf_mt);
410 	if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t),
411 	    PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0))
412 	    == MAP_FAILED)
413 		thr_panic("cannot allocate thread queue_head table");
414 	udp->queue_head = (queue_head_t *)data;
415 	for (i = 0; i < 2 * QHASHSIZE; i++) {
416 		mp = &udp->queue_head[i].qh_lock;
417 		mp->mutex_flag = LOCK_INITED;
418 		mp->mutex_magic = MUTEX_MAGIC;
419 	}
420 }
421 
422 #if defined(THREAD_DEBUG)
423 
424 /*
425  * Debugging: verify correctness of a sleep queue.
426  */
427 void
428 QVERIFY(queue_head_t *qp)
429 {
430 	ulwp_t *self = curthread;
431 	uberdata_t *udp = self->ul_uberdata;
432 	ulwp_t *ulwp;
433 	ulwp_t *prev;
434 	uint_t index;
435 	uint32_t cnt = 0;
436 	char qtype;
437 	void *wchan;
438 
439 	ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE);
440 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
441 	ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) ||
442 	    (qp->qh_head == NULL && qp->qh_tail == NULL));
443 	if (!thread_queue_verify)
444 		return;
445 	/* real expensive stuff, only for _THREAD_QUEUE_VERIFY */
446 	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
447 	for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL;
448 	    prev = ulwp, ulwp = ulwp->ul_link, cnt++) {
449 		ASSERT(ulwp->ul_qtype == qtype);
450 		ASSERT(ulwp->ul_wchan != NULL);
451 		ASSERT(ulwp->ul_sleepq == qp);
452 		wchan = ulwp->ul_wchan;
453 		index = QUEUE_HASH(wchan, qtype);
454 		ASSERT(&udp->queue_head[index] == qp);
455 	}
456 	ASSERT(qp->qh_tail == prev);
457 	ASSERT(qp->qh_qlen == cnt);
458 }
459 
460 #else	/* THREAD_DEBUG */
461 
462 #define	QVERIFY(qp)
463 
464 #endif	/* THREAD_DEBUG */
465 
466 /*
467  * Acquire a queue head.
468  */
469 queue_head_t *
470 queue_lock(void *wchan, int qtype)
471 {
472 	uberdata_t *udp = curthread->ul_uberdata;
473 	queue_head_t *qp;
474 
475 	ASSERT(qtype == MX || qtype == CV);
476 
477 	/*
478 	 * It is possible that we could be called while still single-threaded.
479 	 * If so, we call queue_alloc() to allocate the queue_head[] array.
480 	 */
481 	if ((qp = udp->queue_head) == NULL) {
482 		queue_alloc();
483 		qp = udp->queue_head;
484 	}
485 	qp += QUEUE_HASH(wchan, qtype);
486 	spin_lock_set(&qp->qh_lock);
487 	/*
488 	 * At once per nanosecond, qh_lockcount will wrap after 512 years.
489 	 * Were we to care about this, we could peg the value at UINT64_MAX.
490 	 */
491 	qp->qh_lockcount++;
492 	QVERIFY(qp);
493 	return (qp);
494 }
495 
496 /*
497  * Release a queue head.
498  */
499 void
500 queue_unlock(queue_head_t *qp)
501 {
502 	QVERIFY(qp);
503 	spin_lock_clear(&qp->qh_lock);
504 }
505 
506 /*
507  * For rwlock queueing, we must queue writers ahead of readers of the
508  * same priority.  We do this by making writers appear to have a half
509  * point higher priority for purposes of priority comparisons below.
510  */
511 #define	CMP_PRIO(ulwp)	((real_priority(ulwp) << 1) + (ulwp)->ul_writer)
512 
513 void
514 enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype)
515 {
516 	ulwp_t **ulwpp;
517 	ulwp_t *next;
518 	int pri = CMP_PRIO(ulwp);
519 	int force_fifo = (qtype & FIFOQ);
520 	int do_fifo;
521 
522 	qtype &= ~FIFOQ;
523 	ASSERT(qtype == MX || qtype == CV);
524 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
525 	ASSERT(ulwp->ul_sleepq != qp);
526 
527 	/*
528 	 * LIFO queue ordering is unfair and can lead to starvation,
529 	 * but it gives better performance for heavily contended locks.
530 	 * We use thread_queue_fifo (range is 0..8) to determine
531 	 * the frequency of FIFO vs LIFO queuing:
532 	 *	0 : every 256th time	(almost always LIFO)
533 	 *	1 : every 128th time
534 	 *	2 : every 64th  time
535 	 *	3 : every 32nd  time
536 	 *	4 : every 16th  time	(the default value, mostly LIFO)
537 	 *	5 : every 8th   time
538 	 *	6 : every 4th   time
539 	 *	7 : every 2nd   time
540 	 *	8 : every time		(never LIFO, always FIFO)
541 	 * Note that there is always some degree of FIFO ordering.
542 	 * This breaks live lock conditions that occur in applications
543 	 * that are written assuming (incorrectly) that threads acquire
544 	 * locks fairly, that is, in roughly round-robin order.
545 	 * In any event, the queue is maintained in priority order.
546 	 *
547 	 * If we are given the FIFOQ flag in qtype, fifo queueing is forced.
548 	 * SUSV3 requires this for semaphores.
549 	 */
550 	do_fifo = (force_fifo ||
551 	    ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0);
552 
553 	if (qp->qh_head == NULL) {
554 		/*
555 		 * The queue is empty.  LIFO/FIFO doesn't matter.
556 		 */
557 		ASSERT(qp->qh_tail == NULL);
558 		ulwpp = &qp->qh_head;
559 	} else if (do_fifo) {
560 		/*
561 		 * Enqueue after the last thread whose priority is greater
562 		 * than or equal to the priority of the thread being queued.
563 		 * Attempt first to go directly onto the tail of the queue.
564 		 */
565 		if (pri <= CMP_PRIO(qp->qh_tail))
566 			ulwpp = &qp->qh_tail->ul_link;
567 		else {
568 			for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
569 			    ulwpp = &next->ul_link)
570 				if (pri > CMP_PRIO(next))
571 					break;
572 		}
573 	} else {
574 		/*
575 		 * Enqueue before the first thread whose priority is less
576 		 * than or equal to the priority of the thread being queued.
577 		 * Hopefully we can go directly onto the head of the queue.
578 		 */
579 		for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
580 		    ulwpp = &next->ul_link)
581 			if (pri >= CMP_PRIO(next))
582 				break;
583 	}
584 	if ((ulwp->ul_link = *ulwpp) == NULL)
585 		qp->qh_tail = ulwp;
586 	*ulwpp = ulwp;
587 
588 	ulwp->ul_sleepq = qp;
589 	ulwp->ul_wchan = wchan;
590 	ulwp->ul_qtype = qtype;
591 	if (qp->qh_qmax < ++qp->qh_qlen)
592 		qp->qh_qmax = qp->qh_qlen;
593 }
594 
595 /*
596  * Return a pointer to the queue slot of the
597  * highest priority thread on the queue.
598  * On return, prevp, if not NULL, will contain a pointer
599  * to the thread's predecessor on the queue
600  */
601 static ulwp_t **
602 queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp)
603 {
604 	ulwp_t **ulwpp;
605 	ulwp_t *ulwp;
606 	ulwp_t *prev = NULL;
607 	ulwp_t **suspp = NULL;
608 	ulwp_t *susprev;
609 
610 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
611 
612 	/*
613 	 * Find a waiter on the sleep queue.
614 	 */
615 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
616 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
617 		if (ulwp->ul_wchan == wchan) {
618 			if (!ulwp->ul_stop)
619 				break;
620 			/*
621 			 * Try not to return a suspended thread.
622 			 * This mimics the old libthread's behavior.
623 			 */
624 			if (suspp == NULL) {
625 				suspp = ulwpp;
626 				susprev = prev;
627 			}
628 		}
629 	}
630 
631 	if (ulwp == NULL && suspp != NULL) {
632 		ulwp = *(ulwpp = suspp);
633 		prev = susprev;
634 		suspp = NULL;
635 	}
636 	if (ulwp == NULL) {
637 		if (more != NULL)
638 			*more = 0;
639 		return (NULL);
640 	}
641 
642 	if (prevp != NULL)
643 		*prevp = prev;
644 	if (more == NULL)
645 		return (ulwpp);
646 
647 	/*
648 	 * Scan the remainder of the queue for another waiter.
649 	 */
650 	if (suspp != NULL) {
651 		*more = 1;
652 		return (ulwpp);
653 	}
654 	for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) {
655 		if (ulwp->ul_wchan == wchan) {
656 			*more = 1;
657 			return (ulwpp);
658 		}
659 	}
660 
661 	*more = 0;
662 	return (ulwpp);
663 }
664 
665 ulwp_t *
666 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev)
667 {
668 	ulwp_t *ulwp;
669 
670 	ulwp = *ulwpp;
671 	*ulwpp = ulwp->ul_link;
672 	ulwp->ul_link = NULL;
673 	if (qp->qh_tail == ulwp)
674 		qp->qh_tail = prev;
675 	qp->qh_qlen--;
676 	ulwp->ul_sleepq = NULL;
677 	ulwp->ul_wchan = NULL;
678 
679 	return (ulwp);
680 }
681 
682 ulwp_t *
683 dequeue(queue_head_t *qp, void *wchan, int *more)
684 {
685 	ulwp_t **ulwpp;
686 	ulwp_t *prev;
687 
688 	if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL)
689 		return (NULL);
690 	return (queue_unlink(qp, ulwpp, prev));
691 }
692 
693 /*
694  * Return a pointer to the highest priority thread sleeping on wchan.
695  */
696 ulwp_t *
697 queue_waiter(queue_head_t *qp, void *wchan)
698 {
699 	ulwp_t **ulwpp;
700 
701 	if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL)
702 		return (NULL);
703 	return (*ulwpp);
704 }
705 
706 uint8_t
707 dequeue_self(queue_head_t *qp, void *wchan)
708 {
709 	ulwp_t *self = curthread;
710 	ulwp_t **ulwpp;
711 	ulwp_t *ulwp;
712 	ulwp_t *prev = NULL;
713 	int found = 0;
714 	int more = 0;
715 
716 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
717 
718 	/* find self on the sleep queue */
719 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
720 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
721 		if (ulwp == self) {
722 			/* dequeue ourself */
723 			ASSERT(self->ul_wchan == wchan);
724 			(void) queue_unlink(qp, ulwpp, prev);
725 			self->ul_cvmutex = NULL;
726 			self->ul_cv_wake = 0;
727 			found = 1;
728 			break;
729 		}
730 		if (ulwp->ul_wchan == wchan)
731 			more = 1;
732 	}
733 
734 	if (!found)
735 		thr_panic("dequeue_self(): curthread not found on queue");
736 
737 	if (more)
738 		return (1);
739 
740 	/* scan the remainder of the queue for another waiter */
741 	for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) {
742 		if (ulwp->ul_wchan == wchan)
743 			return (1);
744 	}
745 
746 	return (0);
747 }
748 
749 /*
750  * Called from call_user_handler() and _thrp_suspend() to take
751  * ourself off of our sleep queue so we can grab locks.
752  */
753 void
754 unsleep_self(void)
755 {
756 	ulwp_t *self = curthread;
757 	queue_head_t *qp;
758 
759 	/*
760 	 * Calling enter_critical()/exit_critical() here would lead
761 	 * to recursion.  Just manipulate self->ul_critical directly.
762 	 */
763 	self->ul_critical++;
764 	while (self->ul_sleepq != NULL) {
765 		qp = queue_lock(self->ul_wchan, self->ul_qtype);
766 		/*
767 		 * We may have been moved from a CV queue to a
768 		 * mutex queue while we were attempting queue_lock().
769 		 * If so, just loop around and try again.
770 		 * dequeue_self() clears self->ul_sleepq.
771 		 */
772 		if (qp == self->ul_sleepq) {
773 			(void) dequeue_self(qp, self->ul_wchan);
774 			self->ul_writer = 0;
775 		}
776 		queue_unlock(qp);
777 	}
778 	self->ul_critical--;
779 }
780 
781 /*
782  * Common code for calling the the ___lwp_mutex_timedlock() system call.
783  * Returns with mutex_owner and mutex_ownerpid set correctly.
784  */
785 static int
786 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp)
787 {
788 	ulwp_t *self = curthread;
789 	uberdata_t *udp = self->ul_uberdata;
790 	int mtype = mp->mutex_type;
791 	hrtime_t begin_sleep;
792 	int acquired;
793 	int error;
794 
795 	self->ul_sp = stkptr();
796 	self->ul_wchan = mp;
797 	if (__td_event_report(self, TD_SLEEP, udp)) {
798 		self->ul_td_evbuf.eventnum = TD_SLEEP;
799 		self->ul_td_evbuf.eventdata = mp;
800 		tdb_event(TD_SLEEP, udp);
801 	}
802 	if (msp) {
803 		tdb_incr(msp->mutex_sleep);
804 		begin_sleep = gethrtime();
805 	}
806 
807 	DTRACE_PROBE1(plockstat, mutex__block, mp);
808 
809 	for (;;) {
810 		/*
811 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
812 		 * means we successfully acquired the lock.
813 		 */
814 		if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 &&
815 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
816 			acquired = 0;
817 			break;
818 		}
819 
820 		if (mtype & USYNC_PROCESS) {
821 			/*
822 			 * Defend against forkall().  We may be the child,
823 			 * in which case we don't actually own the mutex.
824 			 */
825 			enter_critical(self);
826 			if (mp->mutex_ownerpid == udp->pid) {
827 				mp->mutex_owner = (uintptr_t)self;
828 				exit_critical(self);
829 				acquired = 1;
830 				break;
831 			}
832 			exit_critical(self);
833 		} else {
834 			mp->mutex_owner = (uintptr_t)self;
835 			acquired = 1;
836 			break;
837 		}
838 	}
839 	if (msp)
840 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
841 	self->ul_wchan = NULL;
842 	self->ul_sp = 0;
843 
844 	if (acquired) {
845 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
846 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
847 	} else {
848 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
849 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
850 	}
851 
852 	return (error);
853 }
854 
855 /*
856  * Common code for calling the ___lwp_mutex_trylock() system call.
857  * Returns with mutex_owner and mutex_ownerpid set correctly.
858  */
859 int
860 mutex_trylock_kernel(mutex_t *mp)
861 {
862 	ulwp_t *self = curthread;
863 	uberdata_t *udp = self->ul_uberdata;
864 	int mtype = mp->mutex_type;
865 	int error;
866 	int acquired;
867 
868 	for (;;) {
869 		/*
870 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
871 		 * means we successfully acquired the lock.
872 		 */
873 		if ((error = ___lwp_mutex_trylock(mp)) != 0 &&
874 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
875 			acquired = 0;
876 			break;
877 		}
878 
879 		if (mtype & USYNC_PROCESS) {
880 			/*
881 			 * Defend against forkall().  We may be the child,
882 			 * in which case we don't actually own the mutex.
883 			 */
884 			enter_critical(self);
885 			if (mp->mutex_ownerpid == udp->pid) {
886 				mp->mutex_owner = (uintptr_t)self;
887 				exit_critical(self);
888 				acquired = 1;
889 				break;
890 			}
891 			exit_critical(self);
892 		} else {
893 			mp->mutex_owner = (uintptr_t)self;
894 			acquired = 1;
895 			break;
896 		}
897 	}
898 
899 	if (acquired) {
900 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
901 	} else if (error != EBUSY) {
902 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
903 	}
904 
905 	return (error);
906 }
907 
908 volatile sc_shared_t *
909 setup_schedctl(void)
910 {
911 	ulwp_t *self = curthread;
912 	volatile sc_shared_t *scp;
913 	sc_shared_t *tmp;
914 
915 	if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */
916 	    !self->ul_vfork &&			/* not a child of vfork() */
917 	    !self->ul_schedctl_called) {	/* haven't been called before */
918 		enter_critical(self);
919 		self->ul_schedctl_called = &self->ul_uberdata->uberflags;
920 		if ((tmp = __schedctl()) != (sc_shared_t *)(-1))
921 			self->ul_schedctl = scp = tmp;
922 		exit_critical(self);
923 	}
924 	/*
925 	 * Unless the call to setup_schedctl() is surrounded
926 	 * by enter_critical()/exit_critical(), the address
927 	 * we are returning could be invalid due to a forkall()
928 	 * having occurred in another thread.
929 	 */
930 	return (scp);
931 }
932 
933 /*
934  * Interfaces from libsched, incorporated into libc.
935  * libsched.so.1 is now a filter library onto libc.
936  */
937 #pragma weak schedctl_lookup = _schedctl_init
938 #pragma weak _schedctl_lookup = _schedctl_init
939 #pragma weak schedctl_init = _schedctl_init
940 schedctl_t *
941 _schedctl_init(void)
942 {
943 	volatile sc_shared_t *scp = setup_schedctl();
944 	return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl);
945 }
946 
947 #pragma weak schedctl_exit = _schedctl_exit
948 void
949 _schedctl_exit(void)
950 {
951 }
952 
953 /*
954  * Contract private interface for java.
955  * Set up the schedctl data if it doesn't exist yet.
956  * Return a pointer to the pointer to the schedctl data.
957  */
958 volatile sc_shared_t *volatile *
959 _thr_schedctl(void)
960 {
961 	ulwp_t *self = curthread;
962 	volatile sc_shared_t *volatile *ptr;
963 
964 	if (self->ul_vfork)
965 		return (NULL);
966 	if (*(ptr = &self->ul_schedctl) == NULL)
967 		(void) setup_schedctl();
968 	return (ptr);
969 }
970 
971 /*
972  * Block signals and attempt to block preemption.
973  * no_preempt()/preempt() must be used in pairs but can be nested.
974  */
975 void
976 no_preempt(ulwp_t *self)
977 {
978 	volatile sc_shared_t *scp;
979 
980 	if (self->ul_preempt++ == 0) {
981 		enter_critical(self);
982 		if ((scp = self->ul_schedctl) != NULL ||
983 		    (scp = setup_schedctl()) != NULL) {
984 			/*
985 			 * Save the pre-existing preempt value.
986 			 */
987 			self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt;
988 			scp->sc_preemptctl.sc_nopreempt = 1;
989 		}
990 	}
991 }
992 
993 /*
994  * Undo the effects of no_preempt().
995  */
996 void
997 preempt(ulwp_t *self)
998 {
999 	volatile sc_shared_t *scp;
1000 
1001 	ASSERT(self->ul_preempt > 0);
1002 	if (--self->ul_preempt == 0) {
1003 		if ((scp = self->ul_schedctl) != NULL) {
1004 			/*
1005 			 * Restore the pre-existing preempt value.
1006 			 */
1007 			scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt;
1008 			if (scp->sc_preemptctl.sc_yield &&
1009 			    scp->sc_preemptctl.sc_nopreempt == 0) {
1010 				lwp_yield();
1011 				if (scp->sc_preemptctl.sc_yield) {
1012 					/*
1013 					 * Shouldn't happen.  This is either
1014 					 * a race condition or the thread
1015 					 * just entered the real-time class.
1016 					 */
1017 					lwp_yield();
1018 					scp->sc_preemptctl.sc_yield = 0;
1019 				}
1020 			}
1021 		}
1022 		exit_critical(self);
1023 	}
1024 }
1025 
1026 /*
1027  * If a call to preempt() would cause the current thread to yield or to
1028  * take deferred actions in exit_critical(), then unpark the specified
1029  * lwp so it can run while we delay.  Return the original lwpid if the
1030  * unpark was not performed, else return zero.  The tests are a repeat
1031  * of some of the tests in preempt(), above.  This is a statistical
1032  * optimization solely for cond_sleep_queue(), below.
1033  */
1034 static lwpid_t
1035 preempt_unpark(ulwp_t *self, lwpid_t lwpid)
1036 {
1037 	volatile sc_shared_t *scp = self->ul_schedctl;
1038 
1039 	ASSERT(self->ul_preempt == 1 && self->ul_critical > 0);
1040 	if ((scp != NULL && scp->sc_preemptctl.sc_yield) ||
1041 	    (self->ul_curplease && self->ul_critical == 1)) {
1042 		(void) __lwp_unpark(lwpid);
1043 		lwpid = 0;
1044 	}
1045 	return (lwpid);
1046 }
1047 
1048 /*
1049  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
1050  * If this fails, return EBUSY and let the caller deal with it.
1051  * If this succeeds, return 0 with mutex_owner set to curthread.
1052  */
1053 static int
1054 mutex_trylock_adaptive(mutex_t *mp, int tryhard)
1055 {
1056 	ulwp_t *self = curthread;
1057 	int error = EBUSY;
1058 	ulwp_t *ulwp;
1059 	volatile sc_shared_t *scp;
1060 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
1061 	volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner;
1062 	uint32_t new_lockword;
1063 	int count = 0;
1064 	int max_count;
1065 	uint8_t max_spinners;
1066 
1067 	ASSERT(!(mp->mutex_type & USYNC_PROCESS));
1068 
1069 	if (MUTEX_OWNER(mp) == self)
1070 		return (EBUSY);
1071 
1072 	/* short-cut, not definitive (see below) */
1073 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
1074 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1075 		error = ENOTRECOVERABLE;
1076 		goto done;
1077 	}
1078 
1079 	/*
1080 	 * Make one attempt to acquire the lock before
1081 	 * incurring the overhead of the spin loop.
1082 	 */
1083 	if (set_lock_byte(lockp) == 0) {
1084 		*ownerp = (uintptr_t)self;
1085 		error = 0;
1086 		goto done;
1087 	}
1088 	if (!tryhard)
1089 		goto done;
1090 	if (ncpus == 0)
1091 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1092 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
1093 		max_spinners = ncpus - 1;
1094 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
1095 	if (max_count == 0)
1096 		goto done;
1097 
1098 	/*
1099 	 * This spin loop is unfair to lwps that have already dropped into
1100 	 * the kernel to sleep.  They will starve on a highly-contended mutex.
1101 	 * This is just too bad.  The adaptive spin algorithm is intended
1102 	 * to allow programs with highly-contended locks (that is, broken
1103 	 * programs) to execute with reasonable speed despite their contention.
1104 	 * Being fair would reduce the speed of such programs and well-written
1105 	 * programs will not suffer in any case.
1106 	 */
1107 	enter_critical(self);
1108 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) {
1109 		exit_critical(self);
1110 		goto done;
1111 	}
1112 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
1113 	for (count = 1; ; count++) {
1114 		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
1115 			*ownerp = (uintptr_t)self;
1116 			error = 0;
1117 			break;
1118 		}
1119 		if (count == max_count)
1120 			break;
1121 		SMT_PAUSE();
1122 		/*
1123 		 * Stop spinning if the mutex owner is not running on
1124 		 * a processor; it will not drop the lock any time soon
1125 		 * and we would just be wasting time to keep spinning.
1126 		 *
1127 		 * Note that we are looking at another thread (ulwp_t)
1128 		 * without ensuring that the other thread does not exit.
1129 		 * The scheme relies on ulwp_t structures never being
1130 		 * deallocated by the library (the library employs a free
1131 		 * list of ulwp_t structs that are reused when new threads
1132 		 * are created) and on schedctl shared memory never being
1133 		 * deallocated once created via __schedctl().
1134 		 *
1135 		 * Thus, the worst that can happen when the spinning thread
1136 		 * looks at the owner's schedctl data is that it is looking
1137 		 * at some other thread's schedctl data.  This almost never
1138 		 * happens and is benign when it does.
1139 		 */
1140 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1141 		    ((scp = ulwp->ul_schedctl) == NULL ||
1142 		    scp->sc_state != SC_ONPROC))
1143 			break;
1144 	}
1145 	new_lockword = spinners_decr(&mp->mutex_lockword);
1146 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
1147 		/*
1148 		 * We haven't yet acquired the lock, the lock
1149 		 * is free, and there are no other spinners.
1150 		 * Make one final attempt to acquire the lock.
1151 		 *
1152 		 * This isn't strictly necessary since mutex_lock_queue()
1153 		 * (the next action this thread will take if it doesn't
1154 		 * acquire the lock here) makes one attempt to acquire
1155 		 * the lock before putting the thread to sleep.
1156 		 *
1157 		 * If the next action for this thread (on failure here)
1158 		 * were not to call mutex_lock_queue(), this would be
1159 		 * necessary for correctness, to avoid ending up with an
1160 		 * unheld mutex with waiters but no one to wake them up.
1161 		 */
1162 		if (set_lock_byte(lockp) == 0) {
1163 			*ownerp = (uintptr_t)self;
1164 			error = 0;
1165 		}
1166 		count++;
1167 	}
1168 	exit_critical(self);
1169 
1170 done:
1171 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1172 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1173 		/*
1174 		 * We shouldn't own the mutex; clear the lock.
1175 		 */
1176 		mp->mutex_owner = 0;
1177 		if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK)
1178 			mutex_wakeup_all(mp);
1179 		error = ENOTRECOVERABLE;
1180 	}
1181 
1182 	if (error) {
1183 		if (count) {
1184 			DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1185 		}
1186 		if (error != EBUSY) {
1187 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1188 		}
1189 	} else {
1190 		if (count) {
1191 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
1192 		}
1193 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
1194 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
1195 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1196 			error = EOWNERDEAD;
1197 		}
1198 	}
1199 
1200 	return (error);
1201 }
1202 
1203 /*
1204  * Same as mutex_trylock_adaptive(), except specifically for queue locks.
1205  * The owner field is not set here; the caller (spin_lock_set()) sets it.
1206  */
1207 static int
1208 mutex_queuelock_adaptive(mutex_t *mp)
1209 {
1210 	ulwp_t *ulwp;
1211 	volatile sc_shared_t *scp;
1212 	volatile uint8_t *lockp;
1213 	volatile uint64_t *ownerp;
1214 	int count = curthread->ul_queue_spin;
1215 
1216 	ASSERT(mp->mutex_type == USYNC_THREAD);
1217 
1218 	if (count == 0)
1219 		return (EBUSY);
1220 
1221 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
1222 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
1223 	while (--count >= 0) {
1224 		if (*lockp == 0 && set_lock_byte(lockp) == 0)
1225 			return (0);
1226 		SMT_PAUSE();
1227 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1228 		    ((scp = ulwp->ul_schedctl) == NULL ||
1229 		    scp->sc_state != SC_ONPROC))
1230 			break;
1231 	}
1232 
1233 	return (EBUSY);
1234 }
1235 
1236 /*
1237  * Like mutex_trylock_adaptive(), but for process-shared mutexes.
1238  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
1239  * If this fails, return EBUSY and let the caller deal with it.
1240  * If this succeeds, return 0 with mutex_owner set to curthread
1241  * and mutex_ownerpid set to the current pid.
1242  */
1243 static int
1244 mutex_trylock_process(mutex_t *mp, int tryhard)
1245 {
1246 	ulwp_t *self = curthread;
1247 	uberdata_t *udp = self->ul_uberdata;
1248 	int error = EBUSY;
1249 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
1250 	uint32_t new_lockword;
1251 	int count = 0;
1252 	int max_count;
1253 	uint8_t max_spinners;
1254 
1255 	ASSERT(mp->mutex_type & USYNC_PROCESS);
1256 
1257 	if (shared_mutex_held(mp))
1258 		return (EBUSY);
1259 
1260 	/* short-cut, not definitive (see below) */
1261 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
1262 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1263 		error = ENOTRECOVERABLE;
1264 		goto done;
1265 	}
1266 
1267 	/*
1268 	 * Make one attempt to acquire the lock before
1269 	 * incurring the overhead of the spin loop.
1270 	 */
1271 	enter_critical(self);
1272 	if (set_lock_byte(lockp) == 0) {
1273 		mp->mutex_owner = (uintptr_t)self;
1274 		mp->mutex_ownerpid = udp->pid;
1275 		exit_critical(self);
1276 		error = 0;
1277 		goto done;
1278 	}
1279 	exit_critical(self);
1280 	if (!tryhard)
1281 		goto done;
1282 	if (ncpus == 0)
1283 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1284 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
1285 		max_spinners = ncpus - 1;
1286 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
1287 	if (max_count == 0)
1288 		goto done;
1289 
1290 	/*
1291 	 * This is a process-shared mutex.
1292 	 * We cannot know if the owner is running on a processor.
1293 	 * We just spin and hope that it is on a processor.
1294 	 */
1295 	enter_critical(self);
1296 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) {
1297 		exit_critical(self);
1298 		goto done;
1299 	}
1300 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
1301 	for (count = 1; ; count++) {
1302 		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
1303 			mp->mutex_owner = (uintptr_t)self;
1304 			mp->mutex_ownerpid = udp->pid;
1305 			error = 0;
1306 			break;
1307 		}
1308 		if (count == max_count)
1309 			break;
1310 		SMT_PAUSE();
1311 	}
1312 	new_lockword = spinners_decr(&mp->mutex_lockword);
1313 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
1314 		/*
1315 		 * We haven't yet acquired the lock, the lock
1316 		 * is free, and there are no other spinners.
1317 		 * Make one final attempt to acquire the lock.
1318 		 *
1319 		 * This isn't strictly necessary since mutex_lock_kernel()
1320 		 * (the next action this thread will take if it doesn't
1321 		 * acquire the lock here) makes one attempt to acquire
1322 		 * the lock before putting the thread to sleep.
1323 		 *
1324 		 * If the next action for this thread (on failure here)
1325 		 * were not to call mutex_lock_kernel(), this would be
1326 		 * necessary for correctness, to avoid ending up with an
1327 		 * unheld mutex with waiters but no one to wake them up.
1328 		 */
1329 		if (set_lock_byte(lockp) == 0) {
1330 			mp->mutex_owner = (uintptr_t)self;
1331 			mp->mutex_ownerpid = udp->pid;
1332 			error = 0;
1333 		}
1334 		count++;
1335 	}
1336 	exit_critical(self);
1337 
1338 done:
1339 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1340 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1341 		/*
1342 		 * We shouldn't own the mutex; clear the lock.
1343 		 */
1344 		mp->mutex_owner = 0;
1345 		mp->mutex_ownerpid = 0;
1346 		if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) {
1347 			no_preempt(self);
1348 			(void) ___lwp_mutex_wakeup(mp, 1);
1349 			preempt(self);
1350 		}
1351 		error = ENOTRECOVERABLE;
1352 	}
1353 
1354 	if (error) {
1355 		if (count) {
1356 			DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1357 		}
1358 		if (error != EBUSY) {
1359 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1360 		}
1361 	} else {
1362 		if (count) {
1363 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
1364 		}
1365 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
1366 		if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
1367 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1368 			if (mp->mutex_flag & LOCK_OWNERDEAD)
1369 				error = EOWNERDEAD;
1370 			else if (mp->mutex_type & USYNC_PROCESS_ROBUST)
1371 				error = ELOCKUNMAPPED;
1372 			else
1373 				error = EOWNERDEAD;
1374 		}
1375 	}
1376 
1377 	return (error);
1378 }
1379 
1380 /*
1381  * Mutex wakeup code for releasing a USYNC_THREAD mutex.
1382  * Returns the lwpid of the thread that was dequeued, if any.
1383  * The caller of mutex_wakeup() must call __lwp_unpark(lwpid)
1384  * to wake up the specified lwp.
1385  */
1386 static lwpid_t
1387 mutex_wakeup(mutex_t *mp)
1388 {
1389 	lwpid_t lwpid = 0;
1390 	queue_head_t *qp;
1391 	ulwp_t *ulwp;
1392 	int more;
1393 
1394 	/*
1395 	 * Dequeue a waiter from the sleep queue.  Don't touch the mutex
1396 	 * waiters bit if no one was found on the queue because the mutex
1397 	 * might have been deallocated or reallocated for another purpose.
1398 	 */
1399 	qp = queue_lock(mp, MX);
1400 	if ((ulwp = dequeue(qp, mp, &more)) != NULL) {
1401 		lwpid = ulwp->ul_lwpid;
1402 		mp->mutex_waiters = (more? 1 : 0);
1403 	}
1404 	queue_unlock(qp);
1405 	return (lwpid);
1406 }
1407 
1408 /*
1409  * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex.
1410  */
1411 static void
1412 mutex_wakeup_all(mutex_t *mp)
1413 {
1414 	queue_head_t *qp;
1415 	int nlwpid = 0;
1416 	int maxlwps = MAXLWPS;
1417 	ulwp_t **ulwpp;
1418 	ulwp_t *ulwp;
1419 	ulwp_t *prev = NULL;
1420 	lwpid_t buffer[MAXLWPS];
1421 	lwpid_t *lwpid = buffer;
1422 
1423 	/*
1424 	 * Walk the list of waiters and prepare to wake up all of them.
1425 	 * The waiters flag has already been cleared from the mutex.
1426 	 *
1427 	 * We keep track of lwpids that are to be unparked in lwpid[].
1428 	 * __lwp_unpark_all() is called to unpark all of them after
1429 	 * they have been removed from the sleep queue and the sleep
1430 	 * queue lock has been dropped.  If we run out of space in our
1431 	 * on-stack buffer, we need to allocate more but we can't call
1432 	 * lmalloc() because we are holding a queue lock when the overflow
1433 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
1434 	 * either because the application may have allocated a small
1435 	 * stack and we don't want to overrun the stack.  So we call
1436 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
1437 	 * system call directly since that path acquires no locks.
1438 	 */
1439 	qp = queue_lock(mp, MX);
1440 	ulwpp = &qp->qh_head;
1441 	while ((ulwp = *ulwpp) != NULL) {
1442 		if (ulwp->ul_wchan != mp) {
1443 			prev = ulwp;
1444 			ulwpp = &ulwp->ul_link;
1445 		} else {
1446 			if (nlwpid == maxlwps)
1447 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
1448 			(void) queue_unlink(qp, ulwpp, prev);
1449 			lwpid[nlwpid++] = ulwp->ul_lwpid;
1450 		}
1451 	}
1452 
1453 	if (nlwpid == 0) {
1454 		queue_unlock(qp);
1455 	} else {
1456 		mp->mutex_waiters = 0;
1457 		no_preempt(curthread);
1458 		queue_unlock(qp);
1459 		if (nlwpid == 1)
1460 			(void) __lwp_unpark(lwpid[0]);
1461 		else
1462 			(void) __lwp_unpark_all(lwpid, nlwpid);
1463 		preempt(curthread);
1464 	}
1465 
1466 	if (lwpid != buffer)
1467 		(void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
1468 }
1469 
1470 /*
1471  * Release a process-private mutex.
1472  * As an optimization, if there are waiters but there are also spinners
1473  * attempting to acquire the mutex, then don't bother waking up a waiter;
1474  * one of the spinners will acquire the mutex soon and it would be a waste
1475  * of resources to wake up some thread just to have it spin for a while
1476  * and then possibly go back to sleep.  See mutex_trylock_adaptive().
1477  */
1478 static lwpid_t
1479 mutex_unlock_queue(mutex_t *mp, int release_all)
1480 {
1481 	lwpid_t lwpid = 0;
1482 	uint32_t old_lockword;
1483 
1484 	mp->mutex_owner = 0;
1485 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1486 	old_lockword = clear_lockbyte(&mp->mutex_lockword);
1487 	if ((old_lockword & WAITERMASK) &&
1488 	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
1489 		ulwp_t *self = curthread;
1490 		no_preempt(self);	/* ensure a prompt wakeup */
1491 		if (release_all)
1492 			mutex_wakeup_all(mp);
1493 		else
1494 			lwpid = mutex_wakeup(mp);
1495 		if (lwpid == 0)
1496 			preempt(self);
1497 	}
1498 	return (lwpid);
1499 }
1500 
1501 /*
1502  * Like mutex_unlock_queue(), but for process-shared mutexes.
1503  */
1504 static void
1505 mutex_unlock_process(mutex_t *mp, int release_all)
1506 {
1507 	uint32_t old_lockword;
1508 
1509 	mp->mutex_owner = 0;
1510 	mp->mutex_ownerpid = 0;
1511 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1512 	old_lockword = clear_lockbyte(&mp->mutex_lockword);
1513 	if ((old_lockword & WAITERMASK) &&
1514 	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
1515 		ulwp_t *self = curthread;
1516 		no_preempt(self);	/* ensure a prompt wakeup */
1517 		(void) ___lwp_mutex_wakeup(mp, release_all);
1518 		preempt(self);
1519 	}
1520 }
1521 
1522 /*
1523  * Return the real priority of a thread.
1524  */
1525 int
1526 real_priority(ulwp_t *ulwp)
1527 {
1528 	if (ulwp->ul_epri == 0)
1529 		return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri);
1530 	return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri);
1531 }
1532 
1533 void
1534 stall(void)
1535 {
1536 	for (;;)
1537 		(void) mutex_lock_kernel(&stall_mutex, NULL, NULL);
1538 }
1539 
1540 /*
1541  * Acquire a USYNC_THREAD mutex via user-level sleep queues.
1542  * We failed set_lock_byte(&mp->mutex_lockw) before coming here.
1543  * If successful, returns with mutex_owner set correctly.
1544  */
1545 int
1546 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
1547 	timespec_t *tsp)
1548 {
1549 	uberdata_t *udp = curthread->ul_uberdata;
1550 	queue_head_t *qp;
1551 	hrtime_t begin_sleep;
1552 	int error = 0;
1553 
1554 	self->ul_sp = stkptr();
1555 	if (__td_event_report(self, TD_SLEEP, udp)) {
1556 		self->ul_wchan = mp;
1557 		self->ul_td_evbuf.eventnum = TD_SLEEP;
1558 		self->ul_td_evbuf.eventdata = mp;
1559 		tdb_event(TD_SLEEP, udp);
1560 	}
1561 	if (msp) {
1562 		tdb_incr(msp->mutex_sleep);
1563 		begin_sleep = gethrtime();
1564 	}
1565 
1566 	DTRACE_PROBE1(plockstat, mutex__block, mp);
1567 
1568 	/*
1569 	 * Put ourself on the sleep queue, and while we are
1570 	 * unable to grab the lock, go park in the kernel.
1571 	 * Take ourself off the sleep queue after we acquire the lock.
1572 	 * The waiter bit can be set/cleared only while holding the queue lock.
1573 	 */
1574 	qp = queue_lock(mp, MX);
1575 	enqueue(qp, self, mp, MX);
1576 	mp->mutex_waiters = 1;
1577 	for (;;) {
1578 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1579 			mp->mutex_owner = (uintptr_t)self;
1580 			mp->mutex_waiters = dequeue_self(qp, mp);
1581 			break;
1582 		}
1583 		set_parking_flag(self, 1);
1584 		queue_unlock(qp);
1585 		/*
1586 		 * __lwp_park() will return the residual time in tsp
1587 		 * if we are unparked before the timeout expires.
1588 		 */
1589 		error = __lwp_park(tsp, 0);
1590 		set_parking_flag(self, 0);
1591 		/*
1592 		 * We could have taken a signal or suspended ourself.
1593 		 * If we did, then we removed ourself from the queue.
1594 		 * Someone else may have removed us from the queue
1595 		 * as a consequence of mutex_unlock().  We may have
1596 		 * gotten a timeout from __lwp_park().  Or we may still
1597 		 * be on the queue and this is just a spurious wakeup.
1598 		 */
1599 		qp = queue_lock(mp, MX);
1600 		if (self->ul_sleepq == NULL) {
1601 			if (error) {
1602 				mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0;
1603 				if (error != EINTR)
1604 					break;
1605 				error = 0;
1606 			}
1607 			if (set_lock_byte(&mp->mutex_lockw) == 0) {
1608 				mp->mutex_owner = (uintptr_t)self;
1609 				break;
1610 			}
1611 			enqueue(qp, self, mp, MX);
1612 			mp->mutex_waiters = 1;
1613 		}
1614 		ASSERT(self->ul_sleepq == qp &&
1615 		    self->ul_qtype == MX &&
1616 		    self->ul_wchan == mp);
1617 		if (error) {
1618 			if (error != EINTR) {
1619 				mp->mutex_waiters = dequeue_self(qp, mp);
1620 				break;
1621 			}
1622 			error = 0;
1623 		}
1624 	}
1625 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
1626 	    self->ul_wchan == NULL);
1627 	self->ul_sp = 0;
1628 	queue_unlock(qp);
1629 
1630 	if (msp)
1631 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
1632 
1633 	ASSERT(error == 0 || error == EINVAL || error == ETIME);
1634 
1635 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1636 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1637 		/*
1638 		 * We shouldn't own the mutex; clear the lock.
1639 		 */
1640 		mp->mutex_owner = 0;
1641 		if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK)
1642 			mutex_wakeup_all(mp);
1643 		error = ENOTRECOVERABLE;
1644 	}
1645 
1646 	if (error) {
1647 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
1648 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1649 	} else {
1650 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
1651 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1652 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
1653 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1654 			error = EOWNERDEAD;
1655 		}
1656 	}
1657 
1658 	return (error);
1659 }
1660 
1661 static int
1662 mutex_recursion(mutex_t *mp, int mtype, int try)
1663 {
1664 	ASSERT(mutex_is_held(mp));
1665 	ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK));
1666 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1667 
1668 	if (mtype & LOCK_RECURSIVE) {
1669 		if (mp->mutex_rcount == RECURSION_MAX) {
1670 			DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN);
1671 			return (EAGAIN);
1672 		}
1673 		mp->mutex_rcount++;
1674 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0);
1675 		return (0);
1676 	}
1677 	if (try == MUTEX_LOCK) {
1678 		DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1679 		return (EDEADLK);
1680 	}
1681 	return (EBUSY);
1682 }
1683 
1684 /*
1685  * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so
1686  * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary.
1687  * We use tdb_hash_lock here and in the synch object tracking code in
1688  * the tdb_agent.c file.  There is no conflict between these two usages.
1689  */
1690 void
1691 register_lock(mutex_t *mp)
1692 {
1693 	uberdata_t *udp = curthread->ul_uberdata;
1694 	uint_t hash = LOCK_HASH(mp);
1695 	robust_t *rlp;
1696 	robust_t **rlpp;
1697 	robust_t **table;
1698 
1699 	if ((table = udp->robustlocks) == NULL) {
1700 		lmutex_lock(&udp->tdb_hash_lock);
1701 		if ((table = udp->robustlocks) == NULL) {
1702 			table = lmalloc(LOCKHASHSZ * sizeof (robust_t *));
1703 			_membar_producer();
1704 			udp->robustlocks = table;
1705 		}
1706 		lmutex_unlock(&udp->tdb_hash_lock);
1707 	}
1708 	_membar_consumer();
1709 
1710 	/*
1711 	 * First search the registered table with no locks held.
1712 	 * This is safe because the table never shrinks
1713 	 * and we can only get a false negative.
1714 	 */
1715 	for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) {
1716 		if (rlp->robust_lock == mp)	/* already registered */
1717 			return;
1718 	}
1719 
1720 	/*
1721 	 * The lock was not found.
1722 	 * Repeat the operation with tdb_hash_lock held.
1723 	 */
1724 	lmutex_lock(&udp->tdb_hash_lock);
1725 
1726 	for (rlpp = &table[hash];
1727 	    (rlp = *rlpp) != NULL;
1728 	    rlpp = &rlp->robust_next) {
1729 		if (rlp->robust_lock == mp) {	/* already registered */
1730 			lmutex_unlock(&udp->tdb_hash_lock);
1731 			return;
1732 		}
1733 	}
1734 
1735 	/*
1736 	 * The lock has never been registered.
1737 	 * Register it now and add it to the table.
1738 	 */
1739 	(void) ___lwp_mutex_register(mp);
1740 	rlp = lmalloc(sizeof (*rlp));
1741 	rlp->robust_lock = mp;
1742 	_membar_producer();
1743 	*rlpp = rlp;
1744 
1745 	lmutex_unlock(&udp->tdb_hash_lock);
1746 }
1747 
1748 /*
1749  * This is called in the child of fork()/forkall() to start over
1750  * with a clean slate.  (Each process must register its own locks.)
1751  * No locks are needed because all other threads are suspended or gone.
1752  */
1753 void
1754 unregister_locks(void)
1755 {
1756 	uberdata_t *udp = curthread->ul_uberdata;
1757 	uint_t hash;
1758 	robust_t **table;
1759 	robust_t *rlp;
1760 	robust_t *next;
1761 
1762 	if ((table = udp->robustlocks) != NULL) {
1763 		for (hash = 0; hash < LOCKHASHSZ; hash++) {
1764 			rlp = table[hash];
1765 			while (rlp != NULL) {
1766 				next = rlp->robust_next;
1767 				lfree(rlp, sizeof (*rlp));
1768 				rlp = next;
1769 			}
1770 		}
1771 		lfree(table, LOCKHASHSZ * sizeof (robust_t *));
1772 		udp->robustlocks = NULL;
1773 	}
1774 }
1775 
1776 /*
1777  * Returns with mutex_owner set correctly.
1778  */
1779 static int
1780 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
1781 {
1782 	ulwp_t *self = curthread;
1783 	uberdata_t *udp = self->ul_uberdata;
1784 	int mtype = mp->mutex_type;
1785 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
1786 	int error = 0;
1787 	uint8_t ceil;
1788 	int myprio;
1789 
1790 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1791 
1792 	if (!self->ul_schedctl_called)
1793 		(void) setup_schedctl();
1794 
1795 	if (msp && try == MUTEX_TRY)
1796 		tdb_incr(msp->mutex_try);
1797 
1798 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp))
1799 		return (mutex_recursion(mp, mtype, try));
1800 
1801 	if (self->ul_error_detection && try == MUTEX_LOCK &&
1802 	    tsp == NULL && mutex_is_held(mp))
1803 		lock_error(mp, "mutex_lock", NULL, NULL);
1804 
1805 	if (mtype & LOCK_PRIO_PROTECT) {
1806 		ceil = mp->mutex_ceiling;
1807 		ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0);
1808 		myprio = real_priority(self);
1809 		if (myprio > ceil) {
1810 			DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL);
1811 			return (EINVAL);
1812 		}
1813 		if ((error = _ceil_mylist_add(mp)) != 0) {
1814 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1815 			return (error);
1816 		}
1817 		if (myprio < ceil)
1818 			_ceil_prio_inherit(ceil);
1819 	}
1820 
1821 	if ((mtype & (USYNC_PROCESS | LOCK_ROBUST))
1822 	    == (USYNC_PROCESS | LOCK_ROBUST))
1823 		register_lock(mp);
1824 
1825 	if (mtype & LOCK_PRIO_INHERIT) {
1826 		/* go straight to the kernel */
1827 		if (try == MUTEX_TRY)
1828 			error = mutex_trylock_kernel(mp);
1829 		else	/* MUTEX_LOCK */
1830 			error = mutex_lock_kernel(mp, tsp, msp);
1831 		/*
1832 		 * The kernel never sets or clears the lock byte
1833 		 * for LOCK_PRIO_INHERIT mutexes.
1834 		 * Set it here for consistency.
1835 		 */
1836 		switch (error) {
1837 		case 0:
1838 			mp->mutex_lockw = LOCKSET;
1839 			break;
1840 		case EOWNERDEAD:
1841 		case ELOCKUNMAPPED:
1842 			mp->mutex_lockw = LOCKSET;
1843 			/* FALLTHROUGH */
1844 		case ENOTRECOVERABLE:
1845 			ASSERT(mtype & LOCK_ROBUST);
1846 			break;
1847 		case EDEADLK:
1848 			if (try == MUTEX_LOCK)
1849 				stall();
1850 			error = EBUSY;
1851 			break;
1852 		}
1853 	} else if (mtype & USYNC_PROCESS) {
1854 		error = mutex_trylock_process(mp, try == MUTEX_LOCK);
1855 		if (error == EBUSY && try == MUTEX_LOCK)
1856 			error = mutex_lock_kernel(mp, tsp, msp);
1857 	} else {	/* USYNC_THREAD */
1858 		error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK);
1859 		if (error == EBUSY && try == MUTEX_LOCK)
1860 			error = mutex_lock_queue(self, msp, mp, tsp);
1861 	}
1862 
1863 	switch (error) {
1864 	case 0:
1865 	case EOWNERDEAD:
1866 	case ELOCKUNMAPPED:
1867 		if (mtype & LOCK_ROBUST)
1868 			remember_lock(mp);
1869 		if (msp)
1870 			record_begin_hold(msp);
1871 		break;
1872 	default:
1873 		if (mtype & LOCK_PRIO_PROTECT) {
1874 			(void) _ceil_mylist_del(mp);
1875 			if (myprio < ceil)
1876 				_ceil_prio_waive();
1877 		}
1878 		if (try == MUTEX_TRY) {
1879 			if (msp)
1880 				tdb_incr(msp->mutex_try_fail);
1881 			if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1882 				self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1883 				tdb_event(TD_LOCK_TRY, udp);
1884 			}
1885 		}
1886 		break;
1887 	}
1888 
1889 	return (error);
1890 }
1891 
1892 int
1893 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try)
1894 {
1895 	ulwp_t *self = curthread;
1896 	uberdata_t *udp = self->ul_uberdata;
1897 
1898 	/*
1899 	 * We know that USYNC_PROCESS is set in mtype and that
1900 	 * zero, one, or both of the flags LOCK_RECURSIVE and
1901 	 * LOCK_ERRORCHECK are set, and that no other flags are set.
1902 	 */
1903 	ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0);
1904 	enter_critical(self);
1905 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
1906 		mp->mutex_owner = (uintptr_t)self;
1907 		mp->mutex_ownerpid = udp->pid;
1908 		exit_critical(self);
1909 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1910 		return (0);
1911 	}
1912 	exit_critical(self);
1913 
1914 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp))
1915 		return (mutex_recursion(mp, mtype, try));
1916 
1917 	if (try == MUTEX_LOCK) {
1918 		if (mutex_trylock_process(mp, 1) == 0)
1919 			return (0);
1920 		return (mutex_lock_kernel(mp, tsp, NULL));
1921 	}
1922 
1923 	if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1924 		self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1925 		tdb_event(TD_LOCK_TRY, udp);
1926 	}
1927 	return (EBUSY);
1928 }
1929 
1930 static int
1931 mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
1932 {
1933 	ulwp_t *self = curthread;
1934 	uberdata_t *udp = self->ul_uberdata;
1935 	uberflags_t *gflags;
1936 	int mtype;
1937 
1938 	/*
1939 	 * Optimize the case of USYNC_THREAD, including
1940 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
1941 	 * no error detection, no lock statistics,
1942 	 * and the process has only a single thread.
1943 	 * (Most likely a traditional single-threaded application.)
1944 	 */
1945 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
1946 	    udp->uberflags.uf_all) == 0) {
1947 		/*
1948 		 * Only one thread exists so we don't need an atomic operation.
1949 		 */
1950 		if (mp->mutex_lockw == 0) {
1951 			mp->mutex_lockw = LOCKSET;
1952 			mp->mutex_owner = (uintptr_t)self;
1953 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1954 			return (0);
1955 		}
1956 		if (mtype && MUTEX_OWNER(mp) == self)
1957 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
1958 		/*
1959 		 * We have reached a deadlock, probably because the
1960 		 * process is executing non-async-signal-safe code in
1961 		 * a signal handler and is attempting to acquire a lock
1962 		 * that it already owns.  This is not surprising, given
1963 		 * bad programming practices over the years that has
1964 		 * resulted in applications calling printf() and such
1965 		 * in their signal handlers.  Unless the user has told
1966 		 * us that the signal handlers are safe by setting:
1967 		 *	export _THREAD_ASYNC_SAFE=1
1968 		 * we return EDEADLK rather than actually deadlocking.
1969 		 */
1970 		if (tsp == NULL &&
1971 		    MUTEX_OWNER(mp) == self && !self->ul_async_safe) {
1972 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1973 			return (EDEADLK);
1974 		}
1975 	}
1976 
1977 	/*
1978 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
1979 	 * no error detection, and no lock statistics.
1980 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
1981 	 */
1982 	if ((gflags = self->ul_schedctl_called) != NULL &&
1983 	    (gflags->uf_trs_ted |
1984 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
1985 		if (mtype & USYNC_PROCESS)
1986 			return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK));
1987 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1988 			mp->mutex_owner = (uintptr_t)self;
1989 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1990 			return (0);
1991 		}
1992 		if (mtype && MUTEX_OWNER(mp) == self)
1993 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
1994 		if (mutex_trylock_adaptive(mp, 1) != 0)
1995 			return (mutex_lock_queue(self, NULL, mp, tsp));
1996 		return (0);
1997 	}
1998 
1999 	/* else do it the long way */
2000 	return (mutex_lock_internal(mp, tsp, MUTEX_LOCK));
2001 }
2002 
2003 /*
2004  * Of the following function names (all the same function, of course),
2005  * only _private_mutex_lock() is not exported from libc.  This means
2006  * that calling _private_mutex_lock() within libc will not invoke the
2007  * dynamic linker.  This is critical for any code called in the child
2008  * of vfork() (via posix_spawn()) because invoking the dynamic linker
2009  * in such a case would corrupt the parent's address space.  There are
2010  * other places in libc where avoiding the dynamic linker is necessary.
2011  * Of course, _private_mutex_lock() can be called in cases not requiring
2012  * the avoidance of the dynamic linker too, and often is.
2013  */
2014 #pragma weak _private_mutex_lock = __mutex_lock
2015 #pragma weak mutex_lock = __mutex_lock
2016 #pragma weak _mutex_lock = __mutex_lock
2017 #pragma weak pthread_mutex_lock = __mutex_lock
2018 #pragma weak _pthread_mutex_lock = __mutex_lock
2019 int
2020 __mutex_lock(mutex_t *mp)
2021 {
2022 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2023 	return (mutex_lock_impl(mp, NULL));
2024 }
2025 
2026 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock
2027 int
2028 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime)
2029 {
2030 	timespec_t tslocal;
2031 	int error;
2032 
2033 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2034 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
2035 	error = mutex_lock_impl(mp, &tslocal);
2036 	if (error == ETIME)
2037 		error = ETIMEDOUT;
2038 	return (error);
2039 }
2040 
2041 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np
2042 int
2043 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime)
2044 {
2045 	timespec_t tslocal;
2046 	int error;
2047 
2048 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2049 	tslocal = *reltime;
2050 	error = mutex_lock_impl(mp, &tslocal);
2051 	if (error == ETIME)
2052 		error = ETIMEDOUT;
2053 	return (error);
2054 }
2055 
2056 #pragma weak _private_mutex_trylock = __mutex_trylock
2057 #pragma weak mutex_trylock = __mutex_trylock
2058 #pragma weak _mutex_trylock = __mutex_trylock
2059 #pragma weak pthread_mutex_trylock = __mutex_trylock
2060 #pragma weak _pthread_mutex_trylock = __mutex_trylock
2061 int
2062 __mutex_trylock(mutex_t *mp)
2063 {
2064 	ulwp_t *self = curthread;
2065 	uberdata_t *udp = self->ul_uberdata;
2066 	uberflags_t *gflags;
2067 	int mtype;
2068 
2069 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2070 	/*
2071 	 * Optimize the case of USYNC_THREAD, including
2072 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2073 	 * no error detection, no lock statistics,
2074 	 * and the process has only a single thread.
2075 	 * (Most likely a traditional single-threaded application.)
2076 	 */
2077 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2078 	    udp->uberflags.uf_all) == 0) {
2079 		/*
2080 		 * Only one thread exists so we don't need an atomic operation.
2081 		 */
2082 		if (mp->mutex_lockw == 0) {
2083 			mp->mutex_lockw = LOCKSET;
2084 			mp->mutex_owner = (uintptr_t)self;
2085 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2086 			return (0);
2087 		}
2088 		if (mtype && MUTEX_OWNER(mp) == self)
2089 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
2090 		return (EBUSY);
2091 	}
2092 
2093 	/*
2094 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2095 	 * no error detection, and no lock statistics.
2096 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2097 	 */
2098 	if ((gflags = self->ul_schedctl_called) != NULL &&
2099 	    (gflags->uf_trs_ted |
2100 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
2101 		if (mtype & USYNC_PROCESS)
2102 			return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY));
2103 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2104 			mp->mutex_owner = (uintptr_t)self;
2105 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2106 			return (0);
2107 		}
2108 		if (mtype && MUTEX_OWNER(mp) == self)
2109 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
2110 		if (__td_event_report(self, TD_LOCK_TRY, udp)) {
2111 			self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
2112 			tdb_event(TD_LOCK_TRY, udp);
2113 		}
2114 		return (EBUSY);
2115 	}
2116 
2117 	/* else do it the long way */
2118 	return (mutex_lock_internal(mp, NULL, MUTEX_TRY));
2119 }
2120 
2121 int
2122 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags)
2123 {
2124 	ulwp_t *self = curthread;
2125 	uberdata_t *udp = self->ul_uberdata;
2126 	int mtype = mp->mutex_type;
2127 	tdb_mutex_stats_t *msp;
2128 	int error = 0;
2129 	int release_all;
2130 	lwpid_t lwpid;
2131 
2132 	if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp))
2133 		return (EPERM);
2134 
2135 	if (self->ul_error_detection && !mutex_is_held(mp))
2136 		lock_error(mp, "mutex_unlock", NULL, NULL);
2137 
2138 	if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2139 		mp->mutex_rcount--;
2140 		DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2141 		return (0);
2142 	}
2143 
2144 	if ((msp = MUTEX_STATS(mp, udp)) != NULL)
2145 		(void) record_hold_time(msp);
2146 
2147 	if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) &&
2148 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
2149 		ASSERT(mp->mutex_type & LOCK_ROBUST);
2150 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2151 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
2152 	}
2153 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
2154 
2155 	if (mtype & LOCK_PRIO_INHERIT) {
2156 		no_preempt(self);
2157 		mp->mutex_owner = 0;
2158 		mp->mutex_ownerpid = 0;
2159 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2160 		mp->mutex_lockw = LOCKCLEAR;
2161 		error = ___lwp_mutex_unlock(mp);
2162 		preempt(self);
2163 	} else if (mtype & USYNC_PROCESS) {
2164 		mutex_unlock_process(mp, release_all);
2165 	} else {	/* USYNC_THREAD */
2166 		if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) {
2167 			(void) __lwp_unpark(lwpid);
2168 			preempt(self);
2169 		}
2170 	}
2171 
2172 	if (mtype & LOCK_ROBUST)
2173 		forget_lock(mp);
2174 
2175 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
2176 		_ceil_prio_waive();
2177 
2178 	return (error);
2179 }
2180 
2181 #pragma weak _private_mutex_unlock = __mutex_unlock
2182 #pragma weak mutex_unlock = __mutex_unlock
2183 #pragma weak _mutex_unlock = __mutex_unlock
2184 #pragma weak pthread_mutex_unlock = __mutex_unlock
2185 #pragma weak _pthread_mutex_unlock = __mutex_unlock
2186 int
2187 __mutex_unlock(mutex_t *mp)
2188 {
2189 	ulwp_t *self = curthread;
2190 	uberdata_t *udp = self->ul_uberdata;
2191 	uberflags_t *gflags;
2192 	lwpid_t lwpid;
2193 	int mtype;
2194 	short el;
2195 
2196 	/*
2197 	 * Optimize the case of USYNC_THREAD, including
2198 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2199 	 * no error detection, no lock statistics,
2200 	 * and the process has only a single thread.
2201 	 * (Most likely a traditional single-threaded application.)
2202 	 */
2203 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2204 	    udp->uberflags.uf_all) == 0) {
2205 		if (mtype) {
2206 			/*
2207 			 * At this point we know that one or both of the
2208 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2209 			 */
2210 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2211 				return (EPERM);
2212 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2213 				mp->mutex_rcount--;
2214 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2215 				return (0);
2216 			}
2217 		}
2218 		/*
2219 		 * Only one thread exists so we don't need an atomic operation.
2220 		 * Also, there can be no waiters.
2221 		 */
2222 		mp->mutex_owner = 0;
2223 		mp->mutex_lockword = 0;
2224 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2225 		return (0);
2226 	}
2227 
2228 	/*
2229 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2230 	 * no error detection, and no lock statistics.
2231 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2232 	 */
2233 	if ((gflags = self->ul_schedctl_called) != NULL) {
2234 		if (((el = gflags->uf_trs_ted) | mtype) == 0) {
2235 fast_unlock:
2236 			if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
2237 				(void) __lwp_unpark(lwpid);
2238 				preempt(self);
2239 			}
2240 			return (0);
2241 		}
2242 		if (el)		/* error detection or lock statistics */
2243 			goto slow_unlock;
2244 		if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2245 			/*
2246 			 * At this point we know that one or both of the
2247 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2248 			 */
2249 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2250 				return (EPERM);
2251 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2252 				mp->mutex_rcount--;
2253 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2254 				return (0);
2255 			}
2256 			goto fast_unlock;
2257 		}
2258 		if ((mtype &
2259 		    ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2260 			/*
2261 			 * At this point we know that zero, one, or both of the
2262 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and
2263 			 * that the USYNC_PROCESS flag is set.
2264 			 */
2265 			if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp))
2266 				return (EPERM);
2267 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2268 				mp->mutex_rcount--;
2269 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2270 				return (0);
2271 			}
2272 			mutex_unlock_process(mp, 0);
2273 			return (0);
2274 		}
2275 	}
2276 
2277 	/* else do it the long way */
2278 slow_unlock:
2279 	return (mutex_unlock_internal(mp, 0));
2280 }
2281 
2282 /*
2283  * Internally to the library, almost all mutex lock/unlock actions
2284  * go through these lmutex_ functions, to protect critical regions.
2285  * We replicate a bit of code from __mutex_lock() and __mutex_unlock()
2286  * to make these functions faster since we know that the mutex type
2287  * of all internal locks is USYNC_THREAD.  We also know that internal
2288  * locking can never fail, so we panic if it does.
2289  */
2290 void
2291 lmutex_lock(mutex_t *mp)
2292 {
2293 	ulwp_t *self = curthread;
2294 	uberdata_t *udp = self->ul_uberdata;
2295 
2296 	ASSERT(mp->mutex_type == USYNC_THREAD);
2297 
2298 	enter_critical(self);
2299 	/*
2300 	 * Optimize the case of no lock statistics and only a single thread.
2301 	 * (Most likely a traditional single-threaded application.)
2302 	 */
2303 	if (udp->uberflags.uf_all == 0) {
2304 		/*
2305 		 * Only one thread exists; the mutex must be free.
2306 		 */
2307 		ASSERT(mp->mutex_lockw == 0);
2308 		mp->mutex_lockw = LOCKSET;
2309 		mp->mutex_owner = (uintptr_t)self;
2310 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2311 	} else {
2312 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2313 
2314 		if (!self->ul_schedctl_called)
2315 			(void) setup_schedctl();
2316 
2317 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2318 			mp->mutex_owner = (uintptr_t)self;
2319 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2320 		} else if (mutex_trylock_adaptive(mp, 1) != 0) {
2321 			(void) mutex_lock_queue(self, msp, mp, NULL);
2322 		}
2323 
2324 		if (msp)
2325 			record_begin_hold(msp);
2326 	}
2327 }
2328 
2329 void
2330 lmutex_unlock(mutex_t *mp)
2331 {
2332 	ulwp_t *self = curthread;
2333 	uberdata_t *udp = self->ul_uberdata;
2334 
2335 	ASSERT(mp->mutex_type == USYNC_THREAD);
2336 
2337 	/*
2338 	 * Optimize the case of no lock statistics and only a single thread.
2339 	 * (Most likely a traditional single-threaded application.)
2340 	 */
2341 	if (udp->uberflags.uf_all == 0) {
2342 		/*
2343 		 * Only one thread exists so there can be no waiters.
2344 		 */
2345 		mp->mutex_owner = 0;
2346 		mp->mutex_lockword = 0;
2347 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2348 	} else {
2349 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2350 		lwpid_t lwpid;
2351 
2352 		if (msp)
2353 			(void) record_hold_time(msp);
2354 		if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
2355 			(void) __lwp_unpark(lwpid);
2356 			preempt(self);
2357 		}
2358 	}
2359 	exit_critical(self);
2360 }
2361 
2362 /*
2363  * For specialized code in libc, like the asynchronous i/o code,
2364  * the following sig_*() locking primitives are used in order
2365  * to make the code asynchronous signal safe.  Signals are
2366  * deferred while locks acquired by these functions are held.
2367  */
2368 void
2369 sig_mutex_lock(mutex_t *mp)
2370 {
2371 	sigoff(curthread);
2372 	(void) _private_mutex_lock(mp);
2373 }
2374 
2375 void
2376 sig_mutex_unlock(mutex_t *mp)
2377 {
2378 	(void) _private_mutex_unlock(mp);
2379 	sigon(curthread);
2380 }
2381 
2382 int
2383 sig_mutex_trylock(mutex_t *mp)
2384 {
2385 	int error;
2386 
2387 	sigoff(curthread);
2388 	if ((error = _private_mutex_trylock(mp)) != 0)
2389 		sigon(curthread);
2390 	return (error);
2391 }
2392 
2393 /*
2394  * sig_cond_wait() is a cancellation point.
2395  */
2396 int
2397 sig_cond_wait(cond_t *cv, mutex_t *mp)
2398 {
2399 	int error;
2400 
2401 	ASSERT(curthread->ul_sigdefer != 0);
2402 	_private_testcancel();
2403 	error = __cond_wait(cv, mp);
2404 	if (error == EINTR && curthread->ul_cursig) {
2405 		sig_mutex_unlock(mp);
2406 		/* take the deferred signal here */
2407 		sig_mutex_lock(mp);
2408 	}
2409 	_private_testcancel();
2410 	return (error);
2411 }
2412 
2413 /*
2414  * sig_cond_reltimedwait() is a cancellation point.
2415  */
2416 int
2417 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts)
2418 {
2419 	int error;
2420 
2421 	ASSERT(curthread->ul_sigdefer != 0);
2422 	_private_testcancel();
2423 	error = __cond_reltimedwait(cv, mp, ts);
2424 	if (error == EINTR && curthread->ul_cursig) {
2425 		sig_mutex_unlock(mp);
2426 		/* take the deferred signal here */
2427 		sig_mutex_lock(mp);
2428 	}
2429 	_private_testcancel();
2430 	return (error);
2431 }
2432 
2433 /*
2434  * For specialized code in libc, like the stdio code.
2435  * the following cancel_safe_*() locking primitives are used in
2436  * order to make the code cancellation-safe.  Cancellation is
2437  * deferred while locks acquired by these functions are held.
2438  */
2439 void
2440 cancel_safe_mutex_lock(mutex_t *mp)
2441 {
2442 	(void) _private_mutex_lock(mp);
2443 	curthread->ul_libc_locks++;
2444 }
2445 
2446 int
2447 cancel_safe_mutex_trylock(mutex_t *mp)
2448 {
2449 	int error;
2450 
2451 	if ((error = _private_mutex_trylock(mp)) == 0)
2452 		curthread->ul_libc_locks++;
2453 	return (error);
2454 }
2455 
2456 void
2457 cancel_safe_mutex_unlock(mutex_t *mp)
2458 {
2459 	ulwp_t *self = curthread;
2460 
2461 	ASSERT(self->ul_libc_locks != 0);
2462 
2463 	(void) _private_mutex_unlock(mp);
2464 
2465 	/*
2466 	 * Decrement the count of locks held by cancel_safe_mutex_lock().
2467 	 * If we are then in a position to terminate cleanly and
2468 	 * if there is a pending cancellation and cancellation
2469 	 * is not disabled and we received EINTR from a recent
2470 	 * system call then perform the cancellation action now.
2471 	 */
2472 	if (--self->ul_libc_locks == 0 &&
2473 	    !(self->ul_vfork | self->ul_nocancel |
2474 	    self->ul_critical | self->ul_sigdefer) &&
2475 	    cancel_active())
2476 		_pthread_exit(PTHREAD_CANCELED);
2477 }
2478 
2479 static int
2480 shared_mutex_held(mutex_t *mparg)
2481 {
2482 	/*
2483 	 * The 'volatile' is necessary to make sure the compiler doesn't
2484 	 * reorder the tests of the various components of the mutex.
2485 	 * They must be tested in this order:
2486 	 *	mutex_lockw
2487 	 *	mutex_owner
2488 	 *	mutex_ownerpid
2489 	 * This relies on the fact that everywhere mutex_lockw is cleared,
2490 	 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw
2491 	 * is cleared, and that everywhere mutex_lockw is set, mutex_owner
2492 	 * and mutex_ownerpid are set after mutex_lockw is set, and that
2493 	 * mutex_lockw is set or cleared with a memory barrier.
2494 	 */
2495 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2496 	ulwp_t *self = curthread;
2497 	uberdata_t *udp = self->ul_uberdata;
2498 
2499 	return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid);
2500 }
2501 
2502 /*
2503  * Some crufty old programs define their own version of _mutex_held()
2504  * to be simply return(1).  This breaks internal libc logic, so we
2505  * define a private version for exclusive use by libc, mutex_is_held(),
2506  * and also a new public function, __mutex_held(), to be used in new
2507  * code to circumvent these crufty old programs.
2508  */
2509 #pragma weak mutex_held = mutex_is_held
2510 #pragma weak _mutex_held = mutex_is_held
2511 #pragma weak __mutex_held = mutex_is_held
2512 int
2513 mutex_is_held(mutex_t *mparg)
2514 {
2515 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2516 
2517 	if (mparg->mutex_type & USYNC_PROCESS)
2518 		return (shared_mutex_held(mparg));
2519 	return (MUTEX_OWNED(mp, curthread));
2520 }
2521 
2522 #pragma weak _private_mutex_destroy = __mutex_destroy
2523 #pragma weak mutex_destroy = __mutex_destroy
2524 #pragma weak _mutex_destroy = __mutex_destroy
2525 #pragma weak pthread_mutex_destroy = __mutex_destroy
2526 #pragma weak _pthread_mutex_destroy = __mutex_destroy
2527 int
2528 __mutex_destroy(mutex_t *mp)
2529 {
2530 	if (mp->mutex_type & USYNC_PROCESS)
2531 		forget_lock(mp);
2532 	(void) _memset(mp, 0, sizeof (*mp));
2533 	tdb_sync_obj_deregister(mp);
2534 	return (0);
2535 }
2536 
2537 #pragma weak mutex_consistent = __mutex_consistent
2538 #pragma weak _mutex_consistent = __mutex_consistent
2539 #pragma weak pthread_mutex_consistent_np = __mutex_consistent
2540 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent
2541 int
2542 __mutex_consistent(mutex_t *mp)
2543 {
2544 	/*
2545 	 * Do this only for an inconsistent, initialized robust lock
2546 	 * that we hold.  For all other cases, return EINVAL.
2547 	 */
2548 	if (mutex_is_held(mp) &&
2549 	    (mp->mutex_type & LOCK_ROBUST) &&
2550 	    (mp->mutex_flag & LOCK_INITED) &&
2551 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
2552 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2553 		mp->mutex_rcount = 0;
2554 		return (0);
2555 	}
2556 	return (EINVAL);
2557 }
2558 
2559 /*
2560  * Spin locks are separate from ordinary mutexes,
2561  * but we use the same data structure for them.
2562  */
2563 
2564 #pragma weak pthread_spin_init = _pthread_spin_init
2565 int
2566 _pthread_spin_init(pthread_spinlock_t *lock, int pshared)
2567 {
2568 	mutex_t *mp = (mutex_t *)lock;
2569 
2570 	(void) _memset(mp, 0, sizeof (*mp));
2571 	if (pshared == PTHREAD_PROCESS_SHARED)
2572 		mp->mutex_type = USYNC_PROCESS;
2573 	else
2574 		mp->mutex_type = USYNC_THREAD;
2575 	mp->mutex_flag = LOCK_INITED;
2576 	mp->mutex_magic = MUTEX_MAGIC;
2577 	return (0);
2578 }
2579 
2580 #pragma weak pthread_spin_destroy = _pthread_spin_destroy
2581 int
2582 _pthread_spin_destroy(pthread_spinlock_t *lock)
2583 {
2584 	(void) _memset(lock, 0, sizeof (*lock));
2585 	return (0);
2586 }
2587 
2588 #pragma weak pthread_spin_trylock = _pthread_spin_trylock
2589 int
2590 _pthread_spin_trylock(pthread_spinlock_t *lock)
2591 {
2592 	mutex_t *mp = (mutex_t *)lock;
2593 	ulwp_t *self = curthread;
2594 	int error = 0;
2595 
2596 	no_preempt(self);
2597 	if (set_lock_byte(&mp->mutex_lockw) != 0)
2598 		error = EBUSY;
2599 	else {
2600 		mp->mutex_owner = (uintptr_t)self;
2601 		if (mp->mutex_type == USYNC_PROCESS)
2602 			mp->mutex_ownerpid = self->ul_uberdata->pid;
2603 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2604 	}
2605 	preempt(self);
2606 	return (error);
2607 }
2608 
2609 #pragma weak pthread_spin_lock = _pthread_spin_lock
2610 int
2611 _pthread_spin_lock(pthread_spinlock_t *lock)
2612 {
2613 	mutex_t *mp = (mutex_t *)lock;
2614 	ulwp_t *self = curthread;
2615 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
2616 	int count = 0;
2617 
2618 	ASSERT(!self->ul_critical || self->ul_bindflags);
2619 
2620 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
2621 
2622 	/*
2623 	 * We don't care whether the owner is running on a processor.
2624 	 * We just spin because that's what this interface requires.
2625 	 */
2626 	for (;;) {
2627 		if (*lockp == 0) {	/* lock byte appears to be clear */
2628 			no_preempt(self);
2629 			if (set_lock_byte(lockp) == 0)
2630 				break;
2631 			preempt(self);
2632 		}
2633 		if (count < INT_MAX)
2634 			count++;
2635 		SMT_PAUSE();
2636 	}
2637 	mp->mutex_owner = (uintptr_t)self;
2638 	if (mp->mutex_type == USYNC_PROCESS)
2639 		mp->mutex_ownerpid = self->ul_uberdata->pid;
2640 	preempt(self);
2641 	if (count) {
2642 		DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
2643 	}
2644 	DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
2645 	return (0);
2646 }
2647 
2648 #pragma weak pthread_spin_unlock = _pthread_spin_unlock
2649 int
2650 _pthread_spin_unlock(pthread_spinlock_t *lock)
2651 {
2652 	mutex_t *mp = (mutex_t *)lock;
2653 	ulwp_t *self = curthread;
2654 
2655 	no_preempt(self);
2656 	mp->mutex_owner = 0;
2657 	mp->mutex_ownerpid = 0;
2658 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2659 	(void) atomic_swap_32(&mp->mutex_lockword, 0);
2660 	preempt(self);
2661 	return (0);
2662 }
2663 
2664 #define	INITIAL_LOCKS	8	/* initial size of ul_heldlocks.array */
2665 
2666 /*
2667  * Find/allocate an entry for 'lock' in our array of held locks.
2668  */
2669 static mutex_t **
2670 find_lock_entry(mutex_t *lock)
2671 {
2672 	ulwp_t *self = curthread;
2673 	mutex_t **remembered = NULL;
2674 	mutex_t **lockptr;
2675 	uint_t nlocks;
2676 
2677 	if ((nlocks = self->ul_heldlockcnt) != 0)
2678 		lockptr = self->ul_heldlocks.array;
2679 	else {
2680 		nlocks = 1;
2681 		lockptr = &self->ul_heldlocks.single;
2682 	}
2683 
2684 	for (; nlocks; nlocks--, lockptr++) {
2685 		if (*lockptr == lock)
2686 			return (lockptr);
2687 		if (*lockptr == NULL && remembered == NULL)
2688 			remembered = lockptr;
2689 	}
2690 	if (remembered != NULL) {
2691 		*remembered = lock;
2692 		return (remembered);
2693 	}
2694 
2695 	/*
2696 	 * No entry available.  Allocate more space, converting
2697 	 * the single entry into an array of entries if necessary.
2698 	 */
2699 	if ((nlocks = self->ul_heldlockcnt) == 0) {
2700 		/*
2701 		 * Initial allocation of the array.
2702 		 * Convert the single entry into an array.
2703 		 */
2704 		self->ul_heldlockcnt = nlocks = INITIAL_LOCKS;
2705 		lockptr = lmalloc(nlocks * sizeof (mutex_t *));
2706 		/*
2707 		 * The single entry becomes the first entry in the array.
2708 		 */
2709 		*lockptr = self->ul_heldlocks.single;
2710 		self->ul_heldlocks.array = lockptr;
2711 		/*
2712 		 * Return the next available entry in the array.
2713 		 */
2714 		*++lockptr = lock;
2715 		return (lockptr);
2716 	}
2717 	/*
2718 	 * Reallocate the array, double the size each time.
2719 	 */
2720 	lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *));
2721 	(void) _memcpy(lockptr, self->ul_heldlocks.array,
2722 	    nlocks * sizeof (mutex_t *));
2723 	lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
2724 	self->ul_heldlocks.array = lockptr;
2725 	self->ul_heldlockcnt *= 2;
2726 	/*
2727 	 * Return the next available entry in the newly allocated array.
2728 	 */
2729 	*(lockptr += nlocks) = lock;
2730 	return (lockptr);
2731 }
2732 
2733 /*
2734  * Insert 'lock' into our list of held locks.
2735  * Currently only used for LOCK_ROBUST mutexes.
2736  */
2737 void
2738 remember_lock(mutex_t *lock)
2739 {
2740 	(void) find_lock_entry(lock);
2741 }
2742 
2743 /*
2744  * Remove 'lock' from our list of held locks.
2745  * Currently only used for LOCK_ROBUST mutexes.
2746  */
2747 void
2748 forget_lock(mutex_t *lock)
2749 {
2750 	*find_lock_entry(lock) = NULL;
2751 }
2752 
2753 /*
2754  * Free the array of held locks.
2755  */
2756 void
2757 heldlock_free(ulwp_t *ulwp)
2758 {
2759 	uint_t nlocks;
2760 
2761 	if ((nlocks = ulwp->ul_heldlockcnt) != 0)
2762 		lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
2763 	ulwp->ul_heldlockcnt = 0;
2764 	ulwp->ul_heldlocks.array = NULL;
2765 }
2766 
2767 /*
2768  * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD.
2769  * Called from _thrp_exit() to deal with abandoned locks.
2770  */
2771 void
2772 heldlock_exit(void)
2773 {
2774 	ulwp_t *self = curthread;
2775 	mutex_t **lockptr;
2776 	uint_t nlocks;
2777 	mutex_t *mp;
2778 
2779 	if ((nlocks = self->ul_heldlockcnt) != 0)
2780 		lockptr = self->ul_heldlocks.array;
2781 	else {
2782 		nlocks = 1;
2783 		lockptr = &self->ul_heldlocks.single;
2784 	}
2785 
2786 	for (; nlocks; nlocks--, lockptr++) {
2787 		/*
2788 		 * The kernel takes care of transitioning held
2789 		 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD.
2790 		 * We avoid that case here.
2791 		 */
2792 		if ((mp = *lockptr) != NULL &&
2793 		    mutex_is_held(mp) &&
2794 		    (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) ==
2795 		    LOCK_ROBUST) {
2796 			mp->mutex_rcount = 0;
2797 			if (!(mp->mutex_flag & LOCK_UNMAPPED))
2798 				mp->mutex_flag |= LOCK_OWNERDEAD;
2799 			(void) mutex_unlock_internal(mp, 1);
2800 		}
2801 	}
2802 
2803 	heldlock_free(self);
2804 }
2805 
2806 #pragma weak cond_init = _cond_init
2807 /* ARGSUSED2 */
2808 int
2809 _cond_init(cond_t *cvp, int type, void *arg)
2810 {
2811 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2812 		return (EINVAL);
2813 	(void) _memset(cvp, 0, sizeof (*cvp));
2814 	cvp->cond_type = (uint16_t)type;
2815 	cvp->cond_magic = COND_MAGIC;
2816 	return (0);
2817 }
2818 
2819 /*
2820  * cond_sleep_queue(): utility function for cond_wait_queue().
2821  *
2822  * Go to sleep on a condvar sleep queue, expect to be waked up
2823  * by someone calling cond_signal() or cond_broadcast() or due
2824  * to receiving a UNIX signal or being cancelled, or just simply
2825  * due to a spurious wakeup (like someome calling forkall()).
2826  *
2827  * The associated mutex is *not* reacquired before returning.
2828  * That must be done by the caller of cond_sleep_queue().
2829  */
2830 static int
2831 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2832 {
2833 	ulwp_t *self = curthread;
2834 	queue_head_t *qp;
2835 	queue_head_t *mqp;
2836 	lwpid_t lwpid;
2837 	int signalled;
2838 	int error;
2839 	int release_all;
2840 
2841 	/*
2842 	 * Put ourself on the CV sleep queue, unlock the mutex, then
2843 	 * park ourself and unpark a candidate lwp to grab the mutex.
2844 	 * We must go onto the CV sleep queue before dropping the
2845 	 * mutex in order to guarantee atomicity of the operation.
2846 	 */
2847 	self->ul_sp = stkptr();
2848 	qp = queue_lock(cvp, CV);
2849 	enqueue(qp, self, cvp, CV);
2850 	cvp->cond_waiters_user = 1;
2851 	self->ul_cvmutex = mp;
2852 	self->ul_cv_wake = (tsp != NULL);
2853 	self->ul_signalled = 0;
2854 	if (mp->mutex_flag & LOCK_OWNERDEAD) {
2855 		mp->mutex_flag &= ~LOCK_OWNERDEAD;
2856 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
2857 	}
2858 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
2859 	lwpid = mutex_unlock_queue(mp, release_all);
2860 	for (;;) {
2861 		set_parking_flag(self, 1);
2862 		queue_unlock(qp);
2863 		if (lwpid != 0) {
2864 			lwpid = preempt_unpark(self, lwpid);
2865 			preempt(self);
2866 		}
2867 		/*
2868 		 * We may have a deferred signal present,
2869 		 * in which case we should return EINTR.
2870 		 * Also, we may have received a SIGCANCEL; if so
2871 		 * and we are cancelable we should return EINTR.
2872 		 * We force an immediate EINTR return from
2873 		 * __lwp_park() by turning our parking flag off.
2874 		 */
2875 		if (self->ul_cursig != 0 ||
2876 		    (self->ul_cancelable && self->ul_cancel_pending))
2877 			set_parking_flag(self, 0);
2878 		/*
2879 		 * __lwp_park() will return the residual time in tsp
2880 		 * if we are unparked before the timeout expires.
2881 		 */
2882 		error = __lwp_park(tsp, lwpid);
2883 		set_parking_flag(self, 0);
2884 		lwpid = 0;	/* unpark the other lwp only once */
2885 		/*
2886 		 * We were waked up by cond_signal(), cond_broadcast(),
2887 		 * by an interrupt or timeout (EINTR or ETIME),
2888 		 * or we may just have gotten a spurious wakeup.
2889 		 */
2890 		qp = queue_lock(cvp, CV);
2891 		mqp = queue_lock(mp, MX);
2892 		if (self->ul_sleepq == NULL)
2893 			break;
2894 		/*
2895 		 * We are on either the condvar sleep queue or the
2896 		 * mutex sleep queue.  Break out of the sleep if we
2897 		 * were interrupted or we timed out (EINTR or ETIME).
2898 		 * Else this is a spurious wakeup; continue the loop.
2899 		 */
2900 		if (self->ul_sleepq == mqp) {		/* mutex queue */
2901 			if (error) {
2902 				mp->mutex_waiters = dequeue_self(mqp, mp);
2903 				break;
2904 			}
2905 			tsp = NULL;	/* no more timeout */
2906 		} else if (self->ul_sleepq == qp) {	/* condvar queue */
2907 			if (error) {
2908 				cvp->cond_waiters_user = dequeue_self(qp, cvp);
2909 				break;
2910 			}
2911 			/*
2912 			 * Else a spurious wakeup on the condvar queue.
2913 			 * __lwp_park() has already adjusted the timeout.
2914 			 */
2915 		} else {
2916 			thr_panic("cond_sleep_queue(): thread not on queue");
2917 		}
2918 		queue_unlock(mqp);
2919 	}
2920 
2921 	self->ul_sp = 0;
2922 	ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0);
2923 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
2924 	    self->ul_wchan == NULL);
2925 
2926 	signalled = self->ul_signalled;
2927 	self->ul_signalled = 0;
2928 	queue_unlock(qp);
2929 	queue_unlock(mqp);
2930 
2931 	/*
2932 	 * If we were concurrently cond_signal()d and any of:
2933 	 * received a UNIX signal, were cancelled, or got a timeout,
2934 	 * then perform another cond_signal() to avoid consuming it.
2935 	 */
2936 	if (error && signalled)
2937 		(void) cond_signal_internal(cvp);
2938 
2939 	return (error);
2940 }
2941 
2942 int
2943 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2944 {
2945 	ulwp_t *self = curthread;
2946 	int error;
2947 	int merror;
2948 
2949 	/*
2950 	 * The old thread library was programmed to defer signals
2951 	 * while in cond_wait() so that the associated mutex would
2952 	 * be guaranteed to be held when the application signal
2953 	 * handler was invoked.
2954 	 *
2955 	 * We do not behave this way by default; the state of the
2956 	 * associated mutex in the signal handler is undefined.
2957 	 *
2958 	 * To accommodate applications that depend on the old
2959 	 * behavior, the _THREAD_COND_WAIT_DEFER environment
2960 	 * variable can be set to 1 and we will behave in the
2961 	 * old way with respect to cond_wait().
2962 	 */
2963 	if (self->ul_cond_wait_defer)
2964 		sigoff(self);
2965 
2966 	error = cond_sleep_queue(cvp, mp, tsp);
2967 
2968 	/*
2969 	 * Reacquire the mutex.
2970 	 */
2971 	if ((merror = mutex_lock_impl(mp, NULL)) != 0)
2972 		error = merror;
2973 
2974 	/*
2975 	 * Take any deferred signal now, after we have reacquired the mutex.
2976 	 */
2977 	if (self->ul_cond_wait_defer)
2978 		sigon(self);
2979 
2980 	return (error);
2981 }
2982 
2983 /*
2984  * cond_sleep_kernel(): utility function for cond_wait_kernel().
2985  * See the comment ahead of cond_sleep_queue(), above.
2986  */
2987 static int
2988 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2989 {
2990 	int mtype = mp->mutex_type;
2991 	ulwp_t *self = curthread;
2992 	int error;
2993 
2994 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
2995 		_ceil_prio_waive();
2996 
2997 	self->ul_sp = stkptr();
2998 	self->ul_wchan = cvp;
2999 	mp->mutex_owner = 0;
3000 	mp->mutex_ownerpid = 0;
3001 	if (mtype & LOCK_PRIO_INHERIT)
3002 		mp->mutex_lockw = LOCKCLEAR;
3003 	/*
3004 	 * ___lwp_cond_wait() returns immediately with EINTR if
3005 	 * set_parking_flag(self,0) is called on this lwp before it
3006 	 * goes to sleep in the kernel.  sigacthandler() calls this
3007 	 * when a deferred signal is noted.  This assures that we don't
3008 	 * get stuck in ___lwp_cond_wait() with all signals blocked
3009 	 * due to taking a deferred signal before going to sleep.
3010 	 */
3011 	set_parking_flag(self, 1);
3012 	if (self->ul_cursig != 0 ||
3013 	    (self->ul_cancelable && self->ul_cancel_pending))
3014 		set_parking_flag(self, 0);
3015 	error = ___lwp_cond_wait(cvp, mp, tsp, 1);
3016 	set_parking_flag(self, 0);
3017 	self->ul_sp = 0;
3018 	self->ul_wchan = NULL;
3019 	return (error);
3020 }
3021 
3022 int
3023 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3024 {
3025 	ulwp_t *self = curthread;
3026 	int error;
3027 	int merror;
3028 
3029 	/*
3030 	 * See the large comment in cond_wait_queue(), above.
3031 	 */
3032 	if (self->ul_cond_wait_defer)
3033 		sigoff(self);
3034 
3035 	error = cond_sleep_kernel(cvp, mp, tsp);
3036 
3037 	/*
3038 	 * Override the return code from ___lwp_cond_wait()
3039 	 * with any non-zero return code from mutex_lock().
3040 	 * This addresses robust lock failures in particular;
3041 	 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE
3042 	 * errors in order to take corrective action.
3043 	 */
3044 	if ((merror = mutex_lock_impl(mp, NULL)) != 0)
3045 		error = merror;
3046 
3047 	/*
3048 	 * Take any deferred signal now, after we have reacquired the mutex.
3049 	 */
3050 	if (self->ul_cond_wait_defer)
3051 		sigon(self);
3052 
3053 	return (error);
3054 }
3055 
3056 /*
3057  * Common code for _cond_wait() and _cond_timedwait()
3058  */
3059 int
3060 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3061 {
3062 	int mtype = mp->mutex_type;
3063 	hrtime_t begin_sleep = 0;
3064 	ulwp_t *self = curthread;
3065 	uberdata_t *udp = self->ul_uberdata;
3066 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3067 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
3068 	uint8_t rcount;
3069 	int error = 0;
3070 
3071 	/*
3072 	 * The SUSV3 Posix spec for pthread_cond_timedwait() states:
3073 	 *	Except in the case of [ETIMEDOUT], all these error checks
3074 	 *	shall act as if they were performed immediately at the
3075 	 *	beginning of processing for the function and shall cause
3076 	 *	an error return, in effect, prior to modifying the state
3077 	 *	of the mutex specified by mutex or the condition variable
3078 	 *	specified by cond.
3079 	 * Therefore, we must return EINVAL now if the timout is invalid.
3080 	 */
3081 	if (tsp != NULL &&
3082 	    (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC))
3083 		return (EINVAL);
3084 
3085 	if (__td_event_report(self, TD_SLEEP, udp)) {
3086 		self->ul_sp = stkptr();
3087 		self->ul_wchan = cvp;
3088 		self->ul_td_evbuf.eventnum = TD_SLEEP;
3089 		self->ul_td_evbuf.eventdata = cvp;
3090 		tdb_event(TD_SLEEP, udp);
3091 		self->ul_sp = 0;
3092 	}
3093 	if (csp) {
3094 		if (tsp)
3095 			tdb_incr(csp->cond_timedwait);
3096 		else
3097 			tdb_incr(csp->cond_wait);
3098 	}
3099 	if (msp)
3100 		begin_sleep = record_hold_time(msp);
3101 	else if (csp)
3102 		begin_sleep = gethrtime();
3103 
3104 	if (self->ul_error_detection) {
3105 		if (!mutex_is_held(mp))
3106 			lock_error(mp, "cond_wait", cvp, NULL);
3107 		if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0)
3108 			lock_error(mp, "recursive mutex in cond_wait",
3109 			    cvp, NULL);
3110 		if (cvp->cond_type & USYNC_PROCESS) {
3111 			if (!(mtype & USYNC_PROCESS))
3112 				lock_error(mp, "cond_wait", cvp,
3113 				    "condvar process-shared, "
3114 				    "mutex process-private");
3115 		} else {
3116 			if (mtype & USYNC_PROCESS)
3117 				lock_error(mp, "cond_wait", cvp,
3118 				    "condvar process-private, "
3119 				    "mutex process-shared");
3120 		}
3121 	}
3122 
3123 	/*
3124 	 * We deal with recursive mutexes by completely
3125 	 * dropping the lock and restoring the recursion
3126 	 * count after waking up.  This is arguably wrong,
3127 	 * but it obeys the principle of least astonishment.
3128 	 */
3129 	rcount = mp->mutex_rcount;
3130 	mp->mutex_rcount = 0;
3131 	if ((mtype &
3132 	    (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) |
3133 	    (cvp->cond_type & USYNC_PROCESS))
3134 		error = cond_wait_kernel(cvp, mp, tsp);
3135 	else
3136 		error = cond_wait_queue(cvp, mp, tsp);
3137 	mp->mutex_rcount = rcount;
3138 
3139 	if (csp) {
3140 		hrtime_t lapse = gethrtime() - begin_sleep;
3141 		if (tsp == NULL)
3142 			csp->cond_wait_sleep_time += lapse;
3143 		else {
3144 			csp->cond_timedwait_sleep_time += lapse;
3145 			if (error == ETIME)
3146 				tdb_incr(csp->cond_timedwait_timeout);
3147 		}
3148 	}
3149 	return (error);
3150 }
3151 
3152 /*
3153  * cond_wait() and _cond_wait() are cancellation points but __cond_wait()
3154  * is not.  Internally, libc calls the non-cancellation version.
3155  * Other libraries need to use pthread_setcancelstate(), as appropriate,
3156  * since __cond_wait() is not exported from libc.
3157  */
3158 int
3159 __cond_wait(cond_t *cvp, mutex_t *mp)
3160 {
3161 	ulwp_t *self = curthread;
3162 	uberdata_t *udp = self->ul_uberdata;
3163 	uberflags_t *gflags;
3164 
3165 	/*
3166 	 * Optimize the common case of USYNC_THREAD plus
3167 	 * no error detection, no lock statistics, and no event tracing.
3168 	 */
3169 	if ((gflags = self->ul_schedctl_called) != NULL &&
3170 	    (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted |
3171 	    self->ul_td_events_enable |
3172 	    udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0)
3173 		return (cond_wait_queue(cvp, mp, NULL));
3174 
3175 	/*
3176 	 * Else do it the long way.
3177 	 */
3178 	return (cond_wait_common(cvp, mp, NULL));
3179 }
3180 
3181 #pragma weak cond_wait = _cond_wait
3182 int
3183 _cond_wait(cond_t *cvp, mutex_t *mp)
3184 {
3185 	int error;
3186 
3187 	_cancelon();
3188 	error = __cond_wait(cvp, mp);
3189 	if (error == EINTR)
3190 		_canceloff();
3191 	else
3192 		_canceloff_nocancel();
3193 	return (error);
3194 }
3195 
3196 /*
3197  * pthread_cond_wait() is a cancellation point.
3198  */
3199 #pragma weak pthread_cond_wait = _pthread_cond_wait
3200 int
3201 _pthread_cond_wait(cond_t *cvp, mutex_t *mp)
3202 {
3203 	int error;
3204 
3205 	error = _cond_wait(cvp, mp);
3206 	return ((error == EINTR)? 0 : error);
3207 }
3208 
3209 /*
3210  * cond_timedwait() and _cond_timedwait() are cancellation points
3211  * but __cond_timedwait() is not.
3212  */
3213 int
3214 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3215 {
3216 	clockid_t clock_id = cvp->cond_clockid;
3217 	timespec_t reltime;
3218 	int error;
3219 
3220 	if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES)
3221 		clock_id = CLOCK_REALTIME;
3222 	abstime_to_reltime(clock_id, abstime, &reltime);
3223 	error = cond_wait_common(cvp, mp, &reltime);
3224 	if (error == ETIME && clock_id == CLOCK_HIGHRES) {
3225 		/*
3226 		 * Don't return ETIME if we didn't really get a timeout.
3227 		 * This can happen if we return because someone resets
3228 		 * the system clock.  Just return zero in this case,
3229 		 * giving a spurious wakeup but not a timeout.
3230 		 */
3231 		if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC +
3232 		    abstime->tv_nsec > gethrtime())
3233 			error = 0;
3234 	}
3235 	return (error);
3236 }
3237 
3238 #pragma weak cond_timedwait = _cond_timedwait
3239 int
3240 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3241 {
3242 	int error;
3243 
3244 	_cancelon();
3245 	error = __cond_timedwait(cvp, mp, abstime);
3246 	if (error == EINTR)
3247 		_canceloff();
3248 	else
3249 		_canceloff_nocancel();
3250 	return (error);
3251 }
3252 
3253 /*
3254  * pthread_cond_timedwait() is a cancellation point.
3255  */
3256 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait
3257 int
3258 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3259 {
3260 	int error;
3261 
3262 	error = _cond_timedwait(cvp, mp, abstime);
3263 	if (error == ETIME)
3264 		error = ETIMEDOUT;
3265 	else if (error == EINTR)
3266 		error = 0;
3267 	return (error);
3268 }
3269 
3270 /*
3271  * cond_reltimedwait() and _cond_reltimedwait() are cancellation points
3272  * but __cond_reltimedwait() is not.
3273  */
3274 int
3275 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
3276 {
3277 	timespec_t tslocal = *reltime;
3278 
3279 	return (cond_wait_common(cvp, mp, &tslocal));
3280 }
3281 
3282 #pragma weak cond_reltimedwait = _cond_reltimedwait
3283 int
3284 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
3285 {
3286 	int error;
3287 
3288 	_cancelon();
3289 	error = __cond_reltimedwait(cvp, mp, reltime);
3290 	if (error == EINTR)
3291 		_canceloff();
3292 	else
3293 		_canceloff_nocancel();
3294 	return (error);
3295 }
3296 
3297 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np
3298 int
3299 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp,
3300 	const timespec_t *reltime)
3301 {
3302 	int error;
3303 
3304 	error = _cond_reltimedwait(cvp, mp, reltime);
3305 	if (error == ETIME)
3306 		error = ETIMEDOUT;
3307 	else if (error == EINTR)
3308 		error = 0;
3309 	return (error);
3310 }
3311 
3312 #pragma weak pthread_cond_signal = cond_signal_internal
3313 #pragma weak _pthread_cond_signal = cond_signal_internal
3314 #pragma weak cond_signal = cond_signal_internal
3315 #pragma weak _cond_signal = cond_signal_internal
3316 int
3317 cond_signal_internal(cond_t *cvp)
3318 {
3319 	ulwp_t *self = curthread;
3320 	uberdata_t *udp = self->ul_uberdata;
3321 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3322 	int error = 0;
3323 	queue_head_t *qp;
3324 	mutex_t *mp;
3325 	queue_head_t *mqp;
3326 	ulwp_t **ulwpp;
3327 	ulwp_t *ulwp;
3328 	ulwp_t *prev = NULL;
3329 	ulwp_t *next;
3330 	ulwp_t **suspp = NULL;
3331 	ulwp_t *susprev;
3332 
3333 	if (csp)
3334 		tdb_incr(csp->cond_signal);
3335 
3336 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
3337 		error = __lwp_cond_signal(cvp);
3338 
3339 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
3340 		return (error);
3341 
3342 	/*
3343 	 * Move someone from the condvar sleep queue to the mutex sleep
3344 	 * queue for the mutex that he will acquire on being waked up.
3345 	 * We can do this only if we own the mutex he will acquire.
3346 	 * If we do not own the mutex, or if his ul_cv_wake flag
3347 	 * is set, just dequeue and unpark him.
3348 	 */
3349 	qp = queue_lock(cvp, CV);
3350 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
3351 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
3352 		if (ulwp->ul_wchan == cvp) {
3353 			if (!ulwp->ul_stop)
3354 				break;
3355 			/*
3356 			 * Try not to dequeue a suspended thread.
3357 			 * This mimics the old libthread's behavior.
3358 			 */
3359 			if (suspp == NULL) {
3360 				suspp = ulwpp;
3361 				susprev = prev;
3362 			}
3363 		}
3364 	}
3365 	if (ulwp == NULL && suspp != NULL) {
3366 		ulwp = *(ulwpp = suspp);
3367 		prev = susprev;
3368 		suspp = NULL;
3369 	}
3370 	if (ulwp == NULL) {	/* no one on the sleep queue */
3371 		cvp->cond_waiters_user = 0;
3372 		queue_unlock(qp);
3373 		return (error);
3374 	}
3375 	/*
3376 	 * Scan the remainder of the CV queue for another waiter.
3377 	 */
3378 	if (suspp != NULL) {
3379 		next = *suspp;
3380 	} else {
3381 		for (next = ulwp->ul_link; next != NULL; next = next->ul_link)
3382 			if (next->ul_wchan == cvp)
3383 				break;
3384 	}
3385 	if (next == NULL)
3386 		cvp->cond_waiters_user = 0;
3387 
3388 	/*
3389 	 * Inform the thread that he was the recipient of a cond_signal().
3390 	 * This lets him deal with cond_signal() and, concurrently,
3391 	 * one or more of a cancellation, a UNIX signal, or a timeout.
3392 	 * These latter conditions must not consume a cond_signal().
3393 	 */
3394 	ulwp->ul_signalled = 1;
3395 
3396 	/*
3397 	 * Dequeue the waiter but leave his ul_sleepq non-NULL
3398 	 * while we move him to the mutex queue so that he can
3399 	 * deal properly with spurious wakeups.
3400 	 */
3401 	*ulwpp = ulwp->ul_link;
3402 	ulwp->ul_link = NULL;
3403 	if (qp->qh_tail == ulwp)
3404 		qp->qh_tail = prev;
3405 	qp->qh_qlen--;
3406 
3407 	mp = ulwp->ul_cvmutex;		/* the mutex he will acquire */
3408 	ulwp->ul_cvmutex = NULL;
3409 	ASSERT(mp != NULL);
3410 
3411 	if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3412 		lwpid_t lwpid = ulwp->ul_lwpid;
3413 
3414 		no_preempt(self);
3415 		ulwp->ul_sleepq = NULL;
3416 		ulwp->ul_wchan = NULL;
3417 		ulwp->ul_cv_wake = 0;
3418 		queue_unlock(qp);
3419 		(void) __lwp_unpark(lwpid);
3420 		preempt(self);
3421 	} else {
3422 		mqp = queue_lock(mp, MX);
3423 		enqueue(mqp, ulwp, mp, MX);
3424 		mp->mutex_waiters = 1;
3425 		queue_unlock(mqp);
3426 		queue_unlock(qp);
3427 	}
3428 
3429 	return (error);
3430 }
3431 
3432 /*
3433  * Utility function called by mutex_wakeup_all(), cond_broadcast(),
3434  * and rw_queue_release() to (re)allocate a big buffer to hold the
3435  * lwpids of all the threads to be set running after they are removed
3436  * from their sleep queues.  Since we are holding a queue lock, we
3437  * cannot call any function that might acquire a lock.  mmap(), munmap(),
3438  * lwp_unpark_all() are simple system calls and are safe in this regard.
3439  */
3440 lwpid_t *
3441 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr)
3442 {
3443 	/*
3444 	 * Allocate NEWLWPS ids on the first overflow.
3445 	 * Double the allocation each time after that.
3446 	 */
3447 	int nlwpid = *nlwpid_ptr;
3448 	int maxlwps = *maxlwps_ptr;
3449 	int first_allocation;
3450 	int newlwps;
3451 	void *vaddr;
3452 
3453 	ASSERT(nlwpid == maxlwps);
3454 
3455 	first_allocation = (maxlwps == MAXLWPS);
3456 	newlwps = first_allocation? NEWLWPS : 2 * maxlwps;
3457 	vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t),
3458 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
3459 
3460 	if (vaddr == MAP_FAILED) {
3461 		/*
3462 		 * Let's hope this never happens.
3463 		 * If it does, then we have a terrible
3464 		 * thundering herd on our hands.
3465 		 */
3466 		(void) __lwp_unpark_all(lwpid, nlwpid);
3467 		*nlwpid_ptr = 0;
3468 	} else {
3469 		(void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t));
3470 		if (!first_allocation)
3471 			(void) _private_munmap(lwpid,
3472 			    maxlwps * sizeof (lwpid_t));
3473 		lwpid = vaddr;
3474 		*maxlwps_ptr = newlwps;
3475 	}
3476 
3477 	return (lwpid);
3478 }
3479 
3480 #pragma weak pthread_cond_broadcast = cond_broadcast_internal
3481 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal
3482 #pragma weak cond_broadcast = cond_broadcast_internal
3483 #pragma weak _cond_broadcast = cond_broadcast_internal
3484 int
3485 cond_broadcast_internal(cond_t *cvp)
3486 {
3487 	ulwp_t *self = curthread;
3488 	uberdata_t *udp = self->ul_uberdata;
3489 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3490 	int error = 0;
3491 	queue_head_t *qp;
3492 	mutex_t *mp;
3493 	mutex_t *mp_cache = NULL;
3494 	queue_head_t *mqp = NULL;
3495 	ulwp_t **ulwpp;
3496 	ulwp_t *ulwp;
3497 	ulwp_t *prev = NULL;
3498 	int nlwpid = 0;
3499 	int maxlwps = MAXLWPS;
3500 	lwpid_t buffer[MAXLWPS];
3501 	lwpid_t *lwpid = buffer;
3502 
3503 	if (csp)
3504 		tdb_incr(csp->cond_broadcast);
3505 
3506 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
3507 		error = __lwp_cond_broadcast(cvp);
3508 
3509 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
3510 		return (error);
3511 
3512 	/*
3513 	 * Move everyone from the condvar sleep queue to the mutex sleep
3514 	 * queue for the mutex that they will acquire on being waked up.
3515 	 * We can do this only if we own the mutex they will acquire.
3516 	 * If we do not own the mutex, or if their ul_cv_wake flag
3517 	 * is set, just dequeue and unpark them.
3518 	 *
3519 	 * We keep track of lwpids that are to be unparked in lwpid[].
3520 	 * __lwp_unpark_all() is called to unpark all of them after
3521 	 * they have been removed from the sleep queue and the sleep
3522 	 * queue lock has been dropped.  If we run out of space in our
3523 	 * on-stack buffer, we need to allocate more but we can't call
3524 	 * lmalloc() because we are holding a queue lock when the overflow
3525 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
3526 	 * either because the application may have allocated a small
3527 	 * stack and we don't want to overrun the stack.  So we call
3528 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
3529 	 * system call directly since that path acquires no locks.
3530 	 */
3531 	qp = queue_lock(cvp, CV);
3532 	cvp->cond_waiters_user = 0;
3533 	ulwpp = &qp->qh_head;
3534 	while ((ulwp = *ulwpp) != NULL) {
3535 		if (ulwp->ul_wchan != cvp) {
3536 			prev = ulwp;
3537 			ulwpp = &ulwp->ul_link;
3538 			continue;
3539 		}
3540 		*ulwpp = ulwp->ul_link;
3541 		ulwp->ul_link = NULL;
3542 		if (qp->qh_tail == ulwp)
3543 			qp->qh_tail = prev;
3544 		qp->qh_qlen--;
3545 		mp = ulwp->ul_cvmutex;		/* his mutex */
3546 		ulwp->ul_cvmutex = NULL;
3547 		ASSERT(mp != NULL);
3548 		if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3549 			ulwp->ul_sleepq = NULL;
3550 			ulwp->ul_wchan = NULL;
3551 			ulwp->ul_cv_wake = 0;
3552 			if (nlwpid == maxlwps)
3553 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
3554 			lwpid[nlwpid++] = ulwp->ul_lwpid;
3555 		} else {
3556 			if (mp != mp_cache) {
3557 				mp_cache = mp;
3558 				if (mqp != NULL)
3559 					queue_unlock(mqp);
3560 				mqp = queue_lock(mp, MX);
3561 			}
3562 			enqueue(mqp, ulwp, mp, MX);
3563 			mp->mutex_waiters = 1;
3564 		}
3565 	}
3566 	if (mqp != NULL)
3567 		queue_unlock(mqp);
3568 	if (nlwpid == 0) {
3569 		queue_unlock(qp);
3570 	} else {
3571 		no_preempt(self);
3572 		queue_unlock(qp);
3573 		if (nlwpid == 1)
3574 			(void) __lwp_unpark(lwpid[0]);
3575 		else
3576 			(void) __lwp_unpark_all(lwpid, nlwpid);
3577 		preempt(self);
3578 	}
3579 	if (lwpid != buffer)
3580 		(void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
3581 	return (error);
3582 }
3583 
3584 #pragma weak pthread_cond_destroy = _cond_destroy
3585 #pragma weak _pthread_cond_destroy = _cond_destroy
3586 #pragma weak cond_destroy = _cond_destroy
3587 int
3588 _cond_destroy(cond_t *cvp)
3589 {
3590 	cvp->cond_magic = 0;
3591 	tdb_sync_obj_deregister(cvp);
3592 	return (0);
3593 }
3594 
3595 #if defined(THREAD_DEBUG)
3596 void
3597 assert_no_libc_locks_held(void)
3598 {
3599 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
3600 }
3601 #endif
3602 
3603 /* protected by link_lock */
3604 uint64_t spin_lock_spin;
3605 uint64_t spin_lock_spin2;
3606 uint64_t spin_lock_sleep;
3607 uint64_t spin_lock_wakeup;
3608 
3609 /*
3610  * Record spin lock statistics.
3611  * Called by a thread exiting itself in thrp_exit().
3612  * Also called via atexit() from the thread calling
3613  * exit() to do all the other threads as well.
3614  */
3615 void
3616 record_spin_locks(ulwp_t *ulwp)
3617 {
3618 	spin_lock_spin += ulwp->ul_spin_lock_spin;
3619 	spin_lock_spin2 += ulwp->ul_spin_lock_spin2;
3620 	spin_lock_sleep += ulwp->ul_spin_lock_sleep;
3621 	spin_lock_wakeup += ulwp->ul_spin_lock_wakeup;
3622 	ulwp->ul_spin_lock_spin = 0;
3623 	ulwp->ul_spin_lock_spin2 = 0;
3624 	ulwp->ul_spin_lock_sleep = 0;
3625 	ulwp->ul_spin_lock_wakeup = 0;
3626 }
3627 
3628 /*
3629  * atexit function:  dump the queue statistics to stderr.
3630  */
3631 #if !defined(__lint)
3632 #define	fprintf	_fprintf
3633 #endif
3634 #include <stdio.h>
3635 void
3636 dump_queue_statistics(void)
3637 {
3638 	uberdata_t *udp = curthread->ul_uberdata;
3639 	queue_head_t *qp;
3640 	int qn;
3641 	uint64_t spin_lock_total = 0;
3642 
3643 	if (udp->queue_head == NULL || thread_queue_dump == 0)
3644 		return;
3645 
3646 	if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 ||
3647 	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
3648 		return;
3649 	for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) {
3650 		if (qp->qh_lockcount == 0)
3651 			continue;
3652 		spin_lock_total += qp->qh_lockcount;
3653 		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
3654 		    (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
3655 			return;
3656 	}
3657 
3658 	if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 ||
3659 	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
3660 		return;
3661 	for (qn = 0; qn < QHASHSIZE; qn++, qp++) {
3662 		if (qp->qh_lockcount == 0)
3663 			continue;
3664 		spin_lock_total += qp->qh_lockcount;
3665 		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
3666 		    (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
3667 			return;
3668 	}
3669 
3670 	(void) fprintf(stderr, "\n  spin_lock_total  = %10llu\n",
3671 	    (u_longlong_t)spin_lock_total);
3672 	(void) fprintf(stderr, "  spin_lock_spin   = %10llu\n",
3673 	    (u_longlong_t)spin_lock_spin);
3674 	(void) fprintf(stderr, "  spin_lock_spin2  = %10llu\n",
3675 	    (u_longlong_t)spin_lock_spin2);
3676 	(void) fprintf(stderr, "  spin_lock_sleep  = %10llu\n",
3677 	    (u_longlong_t)spin_lock_sleep);
3678 	(void) fprintf(stderr, "  spin_lock_wakeup = %10llu\n",
3679 	    (u_longlong_t)spin_lock_wakeup);
3680 }
3681