xref: /titanic_52/usr/src/lib/libc/port/threads/thr.c (revision 2d08521bd15501c8370ba2153b9cca4f094979d0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
27  */
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <pthread.h>
32 #include <procfs.h>
33 #include <sys/uio.h>
34 #include <ctype.h>
35 #include "libc.h"
36 
37 /*
38  * These symbols should not be exported from libc, but
39  * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
40  * Also, some older versions of the Studio compiler/debugger
41  * components reference them.  These need to be fixed, too.
42  */
43 #pragma weak _thr_main = thr_main
44 #pragma weak _thr_create = thr_create
45 #pragma weak _thr_join = thr_join
46 #pragma weak _thr_self = thr_self
47 
48 #undef errno
49 extern int errno;
50 
51 /*
52  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
53  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
54  * system used it illegally (it is a consolidation private symbol).
55  * To accommodate this and possibly other abusers of the symbol,
56  * we make it always equal to 1 now that libthread has been folded
57  * into libc.  The new __libc_threaded symbol is used to indicate
58  * the new meaning, "more than one thread exists".
59  */
60 int __threaded = 1;		/* always equal to 1 */
61 int __libc_threaded = 0;	/* zero until first thr_create() */
62 
63 /*
64  * thr_concurrency and pthread_concurrency are not used by the library.
65  * They exist solely to hold and return the values set by calls to
66  * thr_setconcurrency() and pthread_setconcurrency().
67  * Because thr_concurrency is affected by the THR_NEW_LWP flag
68  * to thr_create(), thr_concurrency is protected by link_lock.
69  */
70 static	int	thr_concurrency = 1;
71 static	int	pthread_concurrency;
72 
73 #define	HASHTBLSZ	1024	/* must be a power of two */
74 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
75 
76 /* initial allocation, just enough for one lwp */
77 #pragma align 64(init_hash_table)
78 thr_hash_table_t init_hash_table[1] = {
79 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
80 };
81 
82 extern const Lc_interface rtld_funcs[];
83 
84 /*
85  * The weak version is known to libc_db and mdb.
86  */
87 #pragma weak _uberdata = __uberdata
88 uberdata_t __uberdata = {
89 	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
90 	{ RECURSIVEMUTEX, NULL, 0 },	/* ld_lock */
91 	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
92 	{ RECURSIVEMUTEX, NULL, 0 },	/* atfork_lock */
93 	{ RECURSIVEMUTEX, NULL, 0 },	/* callout_lock */
94 	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
95 	{ 0, },				/* tdb_hash_lock_stats */
96 	{ { 0 }, },			/* siguaction[NSIG] */
97 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
98 	{ DEFAULTMUTEX, NULL, 0 },
99 	{ DEFAULTMUTEX, NULL, 0 },
100 	{ DEFAULTMUTEX, NULL, 0 },
101 	{ DEFAULTMUTEX, NULL, 0 },
102 	{ DEFAULTMUTEX, NULL, 0 },
103 	{ DEFAULTMUTEX, NULL, 0 },
104 	{ DEFAULTMUTEX, NULL, 0 },
105 	{ DEFAULTMUTEX, NULL, 0 },
106 	{ DEFAULTMUTEX, NULL, 0 }},
107 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
108 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
109 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
110 	0,			/* primary_map */
111 	0,			/* bucket_init */
112 	0,			/* pad[0] */
113 	0,			/* pad[1] */
114 	{ 0 },			/* uberflags */
115 	NULL,			/* queue_head */
116 	init_hash_table,	/* thr_hash_table */
117 	1,			/* hash_size: size of the hash table */
118 	0,			/* hash_mask: hash_size - 1 */
119 	NULL,			/* ulwp_one */
120 	NULL,			/* all_lwps */
121 	NULL,			/* all_zombies */
122 	0,			/* nthreads */
123 	0,			/* nzombies */
124 	0,			/* ndaemons */
125 	0,			/* pid */
126 	sigacthandler,		/* sigacthandler */
127 	NULL,			/* lwp_stacks */
128 	NULL,			/* lwp_laststack */
129 	0,			/* nfreestack */
130 	10,			/* thread_stack_cache */
131 	NULL,			/* ulwp_freelist */
132 	NULL,			/* ulwp_lastfree */
133 	NULL,			/* ulwp_replace_free */
134 	NULL,			/* ulwp_replace_last */
135 	NULL,			/* atforklist */
136 	NULL,			/* robustlocks */
137 	NULL,			/* robustlist */
138 	NULL,			/* progname */
139 	NULL,			/* __tdb_bootstrap */
140 	{			/* tdb */
141 		NULL,		/* tdb_sync_addr_hash */
142 		0,		/* tdb_register_count */
143 		0,		/* tdb_hash_alloc_failed */
144 		NULL,		/* tdb_sync_addr_free */
145 		NULL,		/* tdb_sync_addr_last */
146 		0,		/* tdb_sync_alloc */
147 		{ 0, 0 },	/* tdb_ev_global_mask */
148 		tdb_events,	/* tdb_events array */
149 	},
150 };
151 
152 /*
153  * The weak version is known to libc_db and mdb.
154  */
155 #pragma weak _tdb_bootstrap = __tdb_bootstrap
156 uberdata_t **__tdb_bootstrap = NULL;
157 
158 int	thread_queue_fifo = 4;
159 int	thread_queue_dump = 0;
160 int	thread_cond_wait_defer = 0;
161 int	thread_error_detection = 0;
162 int	thread_async_safe = 0;
163 int	thread_stack_cache = 10;
164 int	thread_door_noreserve = 0;
165 int	thread_locks_misaligned = 0;
166 
167 static	ulwp_t	*ulwp_alloc(void);
168 static	void	ulwp_free(ulwp_t *);
169 
170 /*
171  * Insert the lwp into the hash table.
172  */
173 void
174 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
175 {
176 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
177 	udp->thr_hash_table[ix].hash_bucket = ulwp;
178 	ulwp->ul_ix = ix;
179 }
180 
181 void
182 hash_in(ulwp_t *ulwp, uberdata_t *udp)
183 {
184 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
185 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
186 
187 	lmutex_lock(mp);
188 	hash_in_unlocked(ulwp, ix, udp);
189 	lmutex_unlock(mp);
190 }
191 
192 /*
193  * Delete the lwp from the hash table.
194  */
195 void
196 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
197 {
198 	ulwp_t **ulwpp;
199 
200 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
201 	    ulwp != *ulwpp;
202 	    ulwpp = &(*ulwpp)->ul_hash)
203 		;
204 	*ulwpp = ulwp->ul_hash;
205 	ulwp->ul_hash = NULL;
206 	ulwp->ul_ix = -1;
207 }
208 
209 void
210 hash_out(ulwp_t *ulwp, uberdata_t *udp)
211 {
212 	int ix;
213 
214 	if ((ix = ulwp->ul_ix) >= 0) {
215 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
216 
217 		lmutex_lock(mp);
218 		hash_out_unlocked(ulwp, ix, udp);
219 		lmutex_unlock(mp);
220 	}
221 }
222 
223 /*
224  * Retain stack information for thread structures that are being recycled for
225  * new threads.  All other members of the thread structure should be zeroed.
226  */
227 static void
228 ulwp_clean(ulwp_t *ulwp)
229 {
230 	caddr_t stk = ulwp->ul_stk;
231 	size_t mapsiz = ulwp->ul_mapsiz;
232 	size_t guardsize = ulwp->ul_guardsize;
233 	uintptr_t stktop = ulwp->ul_stktop;
234 	size_t stksiz = ulwp->ul_stksiz;
235 
236 	(void) memset(ulwp, 0, sizeof (*ulwp));
237 
238 	ulwp->ul_stk = stk;
239 	ulwp->ul_mapsiz = mapsiz;
240 	ulwp->ul_guardsize = guardsize;
241 	ulwp->ul_stktop = stktop;
242 	ulwp->ul_stksiz = stksiz;
243 }
244 
245 static int stackprot;
246 
247 /*
248  * Answer the question, "Is the lwp in question really dead?"
249  * We must inquire of the operating system to be really sure
250  * because the lwp may have called lwp_exit() but it has not
251  * yet completed the exit.
252  */
253 static int
254 dead_and_buried(ulwp_t *ulwp)
255 {
256 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
257 		return (1);
258 	if (ulwp->ul_dead && ulwp->ul_detached &&
259 	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
260 		ulwp->ul_lwpid = (lwpid_t)(-1);
261 		return (1);
262 	}
263 	return (0);
264 }
265 
266 /*
267  * Attempt to keep the stack cache within the specified cache limit.
268  */
269 static void
270 trim_stack_cache(int cache_limit)
271 {
272 	ulwp_t *self = curthread;
273 	uberdata_t *udp = self->ul_uberdata;
274 	ulwp_t *prev = NULL;
275 	ulwp_t **ulwpp = &udp->lwp_stacks;
276 	ulwp_t *ulwp;
277 
278 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
279 
280 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
281 		if (dead_and_buried(ulwp)) {
282 			*ulwpp = ulwp->ul_next;
283 			if (ulwp == udp->lwp_laststack)
284 				udp->lwp_laststack = prev;
285 			hash_out(ulwp, udp);
286 			udp->nfreestack--;
287 			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
288 			/*
289 			 * Now put the free ulwp on the ulwp freelist.
290 			 */
291 			ulwp->ul_mapsiz = 0;
292 			ulwp->ul_next = NULL;
293 			if (udp->ulwp_freelist == NULL)
294 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
295 			else {
296 				udp->ulwp_lastfree->ul_next = ulwp;
297 				udp->ulwp_lastfree = ulwp;
298 			}
299 		} else {
300 			prev = ulwp;
301 			ulwpp = &ulwp->ul_next;
302 		}
303 	}
304 }
305 
306 /*
307  * Find an unused stack of the requested size
308  * or create a new stack of the requested size.
309  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
310  * thr_exit() stores 1 in the ul_dead member.
311  * thr_join() stores -1 in the ul_lwpid member.
312  */
313 static ulwp_t *
314 find_stack(size_t stksize, size_t guardsize)
315 {
316 	static size_t pagesize = 0;
317 
318 	uberdata_t *udp = curthread->ul_uberdata;
319 	size_t mapsize;
320 	ulwp_t *prev;
321 	ulwp_t *ulwp;
322 	ulwp_t **ulwpp;
323 	void *stk;
324 
325 	/*
326 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
327 	 * unless overridden by the system's configuration.
328 	 */
329 	if (stackprot == 0) {	/* do this once */
330 		long lprot = _sysconf(_SC_STACK_PROT);
331 		if (lprot <= 0)
332 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
333 		stackprot = (int)lprot;
334 	}
335 	if (pagesize == 0)	/* do this once */
336 		pagesize = _sysconf(_SC_PAGESIZE);
337 
338 	/*
339 	 * One megabyte stacks by default, but subtract off
340 	 * two pages for the system-created red zones.
341 	 * Round up a non-zero stack size to a pagesize multiple.
342 	 */
343 	if (stksize == 0)
344 		stksize = DEFAULTSTACK - 2 * pagesize;
345 	else
346 		stksize = ((stksize + pagesize - 1) & -pagesize);
347 
348 	/*
349 	 * Round up the mapping size to a multiple of pagesize.
350 	 * Note: mmap() provides at least one page of red zone
351 	 * so we deduct that from the value of guardsize.
352 	 */
353 	if (guardsize != 0)
354 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
355 	mapsize = stksize + guardsize;
356 
357 	lmutex_lock(&udp->link_lock);
358 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
359 	    (ulwp = *ulwpp) != NULL;
360 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
361 		if (ulwp->ul_mapsiz == mapsize &&
362 		    ulwp->ul_guardsize == guardsize &&
363 		    dead_and_buried(ulwp)) {
364 			/*
365 			 * The previous lwp is gone; reuse the stack.
366 			 * Remove the ulwp from the stack list.
367 			 */
368 			*ulwpp = ulwp->ul_next;
369 			ulwp->ul_next = NULL;
370 			if (ulwp == udp->lwp_laststack)
371 				udp->lwp_laststack = prev;
372 			hash_out(ulwp, udp);
373 			udp->nfreestack--;
374 			lmutex_unlock(&udp->link_lock);
375 			ulwp_clean(ulwp);
376 			return (ulwp);
377 		}
378 	}
379 
380 	/*
381 	 * None of the cached stacks matched our mapping size.
382 	 * Reduce the stack cache to get rid of possibly
383 	 * very old stacks that will never be reused.
384 	 */
385 	if (udp->nfreestack > udp->thread_stack_cache)
386 		trim_stack_cache(udp->thread_stack_cache);
387 	else if (udp->nfreestack > 0)
388 		trim_stack_cache(udp->nfreestack - 1);
389 	lmutex_unlock(&udp->link_lock);
390 
391 	/*
392 	 * Create a new stack.
393 	 */
394 	if ((stk = mmap(NULL, mapsize, stackprot,
395 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
396 		/*
397 		 * We have allocated our stack.  Now allocate the ulwp.
398 		 */
399 		ulwp = ulwp_alloc();
400 		if (ulwp == NULL)
401 			(void) munmap(stk, mapsize);
402 		else {
403 			ulwp->ul_stk = stk;
404 			ulwp->ul_mapsiz = mapsize;
405 			ulwp->ul_guardsize = guardsize;
406 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
407 			ulwp->ul_stksiz = stksize;
408 			if (guardsize)	/* protect the extra red zone */
409 				(void) mprotect(stk, guardsize, PROT_NONE);
410 		}
411 	}
412 	return (ulwp);
413 }
414 
415 /*
416  * Get a ulwp_t structure from the free list or allocate a new one.
417  * Such ulwp_t's do not have a stack allocated by the library.
418  */
419 static ulwp_t *
420 ulwp_alloc(void)
421 {
422 	ulwp_t *self = curthread;
423 	uberdata_t *udp = self->ul_uberdata;
424 	size_t tls_size;
425 	ulwp_t *prev;
426 	ulwp_t *ulwp;
427 	ulwp_t **ulwpp;
428 	caddr_t data;
429 
430 	lmutex_lock(&udp->link_lock);
431 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
432 	    (ulwp = *ulwpp) != NULL;
433 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
434 		if (dead_and_buried(ulwp)) {
435 			*ulwpp = ulwp->ul_next;
436 			ulwp->ul_next = NULL;
437 			if (ulwp == udp->ulwp_lastfree)
438 				udp->ulwp_lastfree = prev;
439 			hash_out(ulwp, udp);
440 			lmutex_unlock(&udp->link_lock);
441 			ulwp_clean(ulwp);
442 			return (ulwp);
443 		}
444 	}
445 	lmutex_unlock(&udp->link_lock);
446 
447 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
448 	data = lmalloc(sizeof (*ulwp) + tls_size);
449 	if (data != NULL) {
450 		/* LINTED pointer cast may result in improper alignment */
451 		ulwp = (ulwp_t *)(data + tls_size);
452 	}
453 	return (ulwp);
454 }
455 
456 /*
457  * Free a ulwp structure.
458  * If there is an associated stack, put it on the stack list and
459  * munmap() previously freed stacks up to the residual cache limit.
460  * Else put it on the ulwp free list and never call lfree() on it.
461  */
462 static void
463 ulwp_free(ulwp_t *ulwp)
464 {
465 	uberdata_t *udp = curthread->ul_uberdata;
466 
467 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
468 	ulwp->ul_next = NULL;
469 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
470 		/*EMPTY*/;
471 	else if (ulwp->ul_mapsiz != 0) {
472 		if (udp->lwp_stacks == NULL)
473 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
474 		else {
475 			udp->lwp_laststack->ul_next = ulwp;
476 			udp->lwp_laststack = ulwp;
477 		}
478 		if (++udp->nfreestack > udp->thread_stack_cache)
479 			trim_stack_cache(udp->thread_stack_cache);
480 	} else {
481 		if (udp->ulwp_freelist == NULL)
482 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
483 		else {
484 			udp->ulwp_lastfree->ul_next = ulwp;
485 			udp->ulwp_lastfree = ulwp;
486 		}
487 	}
488 }
489 
490 /*
491  * Find a named lwp and return a pointer to its hash list location.
492  * On success, returns with the hash lock held.
493  */
494 ulwp_t **
495 find_lwpp(thread_t tid)
496 {
497 	uberdata_t *udp = curthread->ul_uberdata;
498 	int ix = TIDHASH(tid, udp);
499 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
500 	ulwp_t *ulwp;
501 	ulwp_t **ulwpp;
502 
503 	if (tid == 0)
504 		return (NULL);
505 
506 	lmutex_lock(mp);
507 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
508 	    (ulwp = *ulwpp) != NULL;
509 	    ulwpp = &ulwp->ul_hash) {
510 		if (ulwp->ul_lwpid == tid)
511 			return (ulwpp);
512 	}
513 	lmutex_unlock(mp);
514 	return (NULL);
515 }
516 
517 /*
518  * Wake up all lwps waiting on this lwp for some reason.
519  */
520 void
521 ulwp_broadcast(ulwp_t *ulwp)
522 {
523 	ulwp_t *self = curthread;
524 	uberdata_t *udp = self->ul_uberdata;
525 
526 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
527 	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
528 }
529 
530 /*
531  * Find a named lwp and return a pointer to it.
532  * Returns with the hash lock held.
533  */
534 ulwp_t *
535 find_lwp(thread_t tid)
536 {
537 	ulwp_t *self = curthread;
538 	uberdata_t *udp = self->ul_uberdata;
539 	ulwp_t *ulwp = NULL;
540 	ulwp_t **ulwpp;
541 
542 	if (self->ul_lwpid == tid) {
543 		ulwp = self;
544 		ulwp_lock(ulwp, udp);
545 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
546 		ulwp = *ulwpp;
547 	}
548 
549 	if (ulwp && ulwp->ul_dead) {
550 		ulwp_unlock(ulwp, udp);
551 		ulwp = NULL;
552 	}
553 
554 	return (ulwp);
555 }
556 
557 int
558 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
559 	long flags, thread_t *new_thread, size_t guardsize)
560 {
561 	ulwp_t *self = curthread;
562 	uberdata_t *udp = self->ul_uberdata;
563 	ucontext_t uc;
564 	uint_t lwp_flags;
565 	thread_t tid;
566 	int error;
567 	ulwp_t *ulwp;
568 
569 	/*
570 	 * Enforce the restriction of not creating any threads
571 	 * until the primary link map has been initialized.
572 	 * Also, disallow thread creation to a child of vfork().
573 	 */
574 	if (!self->ul_primarymap || self->ul_vfork)
575 		return (ENOTSUP);
576 
577 	if (udp->hash_size == 1)
578 		finish_init();
579 
580 	if ((stk || stksize) && stksize < MINSTACK)
581 		return (EINVAL);
582 
583 	if (stk == NULL) {
584 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
585 			return (ENOMEM);
586 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
587 	} else {
588 		/* initialize the private stack */
589 		if ((ulwp = ulwp_alloc()) == NULL)
590 			return (ENOMEM);
591 		ulwp->ul_stk = stk;
592 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
593 		ulwp->ul_stksiz = stksize;
594 	}
595 	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
596 	ulwp->ul_ix = -1;
597 	ulwp->ul_errnop = &ulwp->ul_errno;
598 
599 	lwp_flags = LWP_SUSPENDED;
600 	if (flags & (THR_DETACHED|THR_DAEMON)) {
601 		flags |= THR_DETACHED;
602 		lwp_flags |= LWP_DETACHED;
603 	}
604 	if (flags & THR_DAEMON)
605 		lwp_flags |= LWP_DAEMON;
606 
607 	/* creating a thread: enforce mt-correctness in mutex_lock() */
608 	self->ul_async_safe = 1;
609 
610 	/* per-thread copies of global variables, for speed */
611 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
612 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
613 	ulwp->ul_error_detection = self->ul_error_detection;
614 	ulwp->ul_async_safe = self->ul_async_safe;
615 	ulwp->ul_max_spinners = self->ul_max_spinners;
616 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
617 	ulwp->ul_queue_spin = self->ul_queue_spin;
618 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
619 	ulwp->ul_misaligned = self->ul_misaligned;
620 
621 	/* new thread inherits creating thread's scheduling parameters */
622 	ulwp->ul_policy = self->ul_policy;
623 	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
624 	ulwp->ul_cid = self->ul_cid;
625 	ulwp->ul_rtclassid = self->ul_rtclassid;
626 
627 	ulwp->ul_primarymap = self->ul_primarymap;
628 	ulwp->ul_self = ulwp;
629 	ulwp->ul_uberdata = udp;
630 
631 	/* debugger support */
632 	ulwp->ul_usropts = flags;
633 
634 #ifdef __sparc
635 	/*
636 	 * We cache several instructions in the thread structure for use
637 	 * by the fasttrap DTrace provider. When changing this, read the
638 	 * comment in fasttrap.h for the all the other places that must
639 	 * be changed.
640 	 */
641 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
642 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
643 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
644 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
645 #endif
646 
647 	ulwp->ul_startpc = func;
648 	ulwp->ul_startarg = arg;
649 	_fpinherit(ulwp);
650 	/*
651 	 * Defer signals on the new thread until its TLS constructors
652 	 * have been called.  _thrp_setup() will call sigon() after
653 	 * it has called tls_setup().
654 	 */
655 	ulwp->ul_sigdefer = 1;
656 
657 	error = setup_context(&uc, _thrp_setup, ulwp,
658 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
659 	if (error != 0 && stk != NULL)	/* inaccessible stack */
660 		error = EFAULT;
661 
662 	/*
663 	 * Call enter_critical() to avoid being suspended until we
664 	 * have linked the new thread into the proper lists.
665 	 * This is necessary because forkall() and fork1() must
666 	 * suspend all threads and they must see a complete list.
667 	 */
668 	enter_critical(self);
669 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
670 	if (error != 0 ||
671 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
672 		exit_critical(self);
673 		ulwp->ul_lwpid = (lwpid_t)(-1);
674 		ulwp->ul_dead = 1;
675 		ulwp->ul_detached = 1;
676 		lmutex_lock(&udp->link_lock);
677 		ulwp_free(ulwp);
678 		lmutex_unlock(&udp->link_lock);
679 		return (error);
680 	}
681 	self->ul_nocancel = 0;	/* cancellation is now possible */
682 	udp->uberflags.uf_mt = 1;
683 	if (new_thread)
684 		*new_thread = tid;
685 	if (flags & THR_DETACHED)
686 		ulwp->ul_detached = 1;
687 	ulwp->ul_lwpid = tid;
688 	ulwp->ul_stop = TSTP_REGULAR;
689 	if (flags & THR_SUSPENDED)
690 		ulwp->ul_created = 1;
691 
692 	lmutex_lock(&udp->link_lock);
693 	ulwp->ul_forw = udp->all_lwps;
694 	ulwp->ul_back = udp->all_lwps->ul_back;
695 	ulwp->ul_back->ul_forw = ulwp;
696 	ulwp->ul_forw->ul_back = ulwp;
697 	hash_in(ulwp, udp);
698 	udp->nthreads++;
699 	if (flags & THR_DAEMON)
700 		udp->ndaemons++;
701 	if (flags & THR_NEW_LWP)
702 		thr_concurrency++;
703 	__libc_threaded = 1;		/* inform stdio */
704 	lmutex_unlock(&udp->link_lock);
705 
706 	if (__td_event_report(self, TD_CREATE, udp)) {
707 		self->ul_td_evbuf.eventnum = TD_CREATE;
708 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
709 		tdb_event(TD_CREATE, udp);
710 	}
711 
712 	exit_critical(self);
713 
714 	if (!(flags & THR_SUSPENDED))
715 		(void) _thrp_continue(tid, TSTP_REGULAR);
716 
717 	return (0);
718 }
719 
720 int
721 thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
722 	long flags, thread_t *new_thread)
723 {
724 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
725 }
726 
727 /*
728  * A special cancellation cleanup hook for DCE.
729  * cleanuphndlr, when it is not NULL, will contain a callback
730  * function to be called before a thread is terminated in
731  * thr_exit() as a result of being cancelled.
732  */
733 static void (*cleanuphndlr)(void) = NULL;
734 
735 /*
736  * _pthread_setcleanupinit: sets the cleanup hook.
737  */
738 int
739 _pthread_setcleanupinit(void (*func)(void))
740 {
741 	cleanuphndlr = func;
742 	return (0);
743 }
744 
745 void
746 _thrp_exit()
747 {
748 	ulwp_t *self = curthread;
749 	uberdata_t *udp = self->ul_uberdata;
750 	ulwp_t *replace = NULL;
751 
752 	if (__td_event_report(self, TD_DEATH, udp)) {
753 		self->ul_td_evbuf.eventnum = TD_DEATH;
754 		tdb_event(TD_DEATH, udp);
755 	}
756 
757 	ASSERT(self->ul_sigdefer != 0);
758 
759 	lmutex_lock(&udp->link_lock);
760 	udp->nthreads--;
761 	if (self->ul_usropts & THR_NEW_LWP)
762 		thr_concurrency--;
763 	if (self->ul_usropts & THR_DAEMON)
764 		udp->ndaemons--;
765 	else if (udp->nthreads == udp->ndaemons) {
766 		/*
767 		 * We are the last non-daemon thread exiting.
768 		 * Exit the process.  We retain our TSD and TLS so
769 		 * that atexit() application functions can use them.
770 		 */
771 		lmutex_unlock(&udp->link_lock);
772 		exit(0);
773 		thr_panic("_thrp_exit(): exit(0) returned");
774 	}
775 	lmutex_unlock(&udp->link_lock);
776 
777 	tmem_exit();		/* deallocate tmem allocations */
778 	tsd_exit();		/* deallocate thread-specific data */
779 	tls_exit();		/* deallocate thread-local storage */
780 	heldlock_exit();	/* deal with left-over held locks */
781 
782 	/* block all signals to finish exiting */
783 	block_all_signals(self);
784 	/* also prevent ourself from being suspended */
785 	enter_critical(self);
786 	rwl_free(self);
787 	lmutex_lock(&udp->link_lock);
788 	ulwp_free(self);
789 	(void) ulwp_lock(self, udp);
790 
791 	if (self->ul_mapsiz && !self->ul_detached) {
792 		/*
793 		 * We want to free the stack for reuse but must keep
794 		 * the ulwp_t struct for the benefit of thr_join().
795 		 * For this purpose we allocate a replacement ulwp_t.
796 		 */
797 		if ((replace = udp->ulwp_replace_free) == NULL)
798 			replace = lmalloc(REPLACEMENT_SIZE);
799 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
800 			udp->ulwp_replace_last = NULL;
801 	}
802 
803 	if (udp->all_lwps == self)
804 		udp->all_lwps = self->ul_forw;
805 	if (udp->all_lwps == self)
806 		udp->all_lwps = NULL;
807 	else {
808 		self->ul_forw->ul_back = self->ul_back;
809 		self->ul_back->ul_forw = self->ul_forw;
810 	}
811 	self->ul_forw = self->ul_back = NULL;
812 #if defined(THREAD_DEBUG)
813 	/* collect queue lock statistics before marking ourself dead */
814 	record_spin_locks(self);
815 #endif
816 	self->ul_dead = 1;
817 	self->ul_pleasestop = 0;
818 	if (replace != NULL) {
819 		int ix = self->ul_ix;		/* the hash index */
820 		(void) memcpy(replace, self, REPLACEMENT_SIZE);
821 		replace->ul_self = replace;
822 		replace->ul_next = NULL;	/* clone not on stack list */
823 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
824 		replace->ul_replace = 1;	/* requires clone to be freed */
825 		hash_out_unlocked(self, ix, udp);
826 		hash_in_unlocked(replace, ix, udp);
827 		ASSERT(!(self->ul_detached));
828 		self->ul_detached = 1;		/* this frees the stack */
829 		self->ul_schedctl = NULL;
830 		self->ul_schedctl_called = &udp->uberflags;
831 		set_curthread(self = replace);
832 		/*
833 		 * Having just changed the address of curthread, we
834 		 * must reset the ownership of the locks we hold so
835 		 * that assertions will not fire when we release them.
836 		 */
837 		udp->link_lock.mutex_owner = (uintptr_t)self;
838 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
839 		/*
840 		 * NOTE:
841 		 * On i386, %gs still references the original, not the
842 		 * replacement, ulwp structure.  Fetching the replacement
843 		 * curthread pointer via %gs:0 works correctly since the
844 		 * original ulwp structure will not be reallocated until
845 		 * this lwp has completed its lwp_exit() system call (see
846 		 * dead_and_buried()), but from here on out, we must make
847 		 * no references to %gs:<offset> other than %gs:0.
848 		 */
849 	}
850 	/*
851 	 * Put non-detached terminated threads in the all_zombies list.
852 	 */
853 	if (!self->ul_detached) {
854 		udp->nzombies++;
855 		if (udp->all_zombies == NULL) {
856 			ASSERT(udp->nzombies == 1);
857 			udp->all_zombies = self->ul_forw = self->ul_back = self;
858 		} else {
859 			self->ul_forw = udp->all_zombies;
860 			self->ul_back = udp->all_zombies->ul_back;
861 			self->ul_back->ul_forw = self;
862 			self->ul_forw->ul_back = self;
863 		}
864 	}
865 	/*
866 	 * Notify everyone waiting for this thread.
867 	 */
868 	ulwp_broadcast(self);
869 	(void) ulwp_unlock(self, udp);
870 	/*
871 	 * Prevent any more references to the schedctl data.
872 	 * We are exiting and continue_fork() may not find us.
873 	 * Do this just before dropping link_lock, since fork
874 	 * serializes on link_lock.
875 	 */
876 	self->ul_schedctl = NULL;
877 	self->ul_schedctl_called = &udp->uberflags;
878 	lmutex_unlock(&udp->link_lock);
879 
880 	ASSERT(self->ul_critical == 1);
881 	ASSERT(self->ul_preempt == 0);
882 	_lwp_terminate();	/* never returns */
883 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
884 }
885 
886 #if defined(THREAD_DEBUG)
887 void
888 collect_queue_statistics()
889 {
890 	uberdata_t *udp = curthread->ul_uberdata;
891 	ulwp_t *ulwp;
892 
893 	if (thread_queue_dump) {
894 		lmutex_lock(&udp->link_lock);
895 		if ((ulwp = udp->all_lwps) != NULL) {
896 			do {
897 				record_spin_locks(ulwp);
898 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
899 		}
900 		lmutex_unlock(&udp->link_lock);
901 	}
902 }
903 #endif
904 
905 static void __NORETURN
906 _thrp_exit_common(void *status, int unwind)
907 {
908 	ulwp_t *self = curthread;
909 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
910 
911 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
912 
913 	/*
914 	 * Disable cancellation and call the special DCE cancellation
915 	 * cleanup hook if it is enabled.  Do nothing else before calling
916 	 * the DCE cancellation cleanup hook; it may call longjmp() and
917 	 * never return here.
918 	 */
919 	self->ul_cancel_disabled = 1;
920 	self->ul_cancel_async = 0;
921 	self->ul_save_async = 0;
922 	self->ul_cancelable = 0;
923 	self->ul_cancel_pending = 0;
924 	set_cancel_pending_flag(self, 1);
925 	if (cancelled && cleanuphndlr != NULL)
926 		(*cleanuphndlr)();
927 
928 	/*
929 	 * Block application signals while we are exiting.
930 	 * We call out to C++, TSD, and TLS destructors while exiting
931 	 * and these are application-defined, so we cannot be assured
932 	 * that they won't reset the signal mask.  We use sigoff() to
933 	 * defer any signals that may be received as a result of this
934 	 * bad behavior.  Such signals will be lost to the process
935 	 * when the thread finishes exiting.
936 	 */
937 	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
938 	sigoff(self);
939 
940 	self->ul_rval = status;
941 
942 	/*
943 	 * If thr_exit is being called from the places where
944 	 * C++ destructors are to be called such as cancellation
945 	 * points, then set this flag. It is checked in _t_cancel()
946 	 * to decide whether _ex_unwind() is to be called or not.
947 	 */
948 	if (unwind)
949 		self->ul_unwind = 1;
950 
951 	/*
952 	 * _thrp_unwind() will eventually call _thrp_exit().
953 	 * It never returns.
954 	 */
955 	_thrp_unwind(NULL);
956 	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
957 
958 	for (;;)	/* to shut the compiler up about __NORETURN */
959 		continue;
960 }
961 
962 /*
963  * Called when a thread returns from its start function.
964  * We are at the top of the stack; no unwinding is necessary.
965  */
966 void
967 _thrp_terminate(void *status)
968 {
969 	_thrp_exit_common(status, 0);
970 }
971 
972 #pragma weak pthread_exit = thr_exit
973 #pragma weak _thr_exit = thr_exit
974 void
975 thr_exit(void *status)
976 {
977 	_thrp_exit_common(status, 1);
978 }
979 
980 int
981 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
982 {
983 	uberdata_t *udp = curthread->ul_uberdata;
984 	mutex_t *mp;
985 	void *rval;
986 	thread_t found;
987 	ulwp_t *ulwp;
988 	ulwp_t **ulwpp;
989 	int replace;
990 	int error;
991 
992 	if (do_cancel)
993 		error = lwp_wait(tid, &found);
994 	else {
995 		while ((error = __lwp_wait(tid, &found)) == EINTR)
996 			;
997 	}
998 	if (error)
999 		return (error);
1000 
1001 	/*
1002 	 * We must hold link_lock to avoid a race condition with find_stack().
1003 	 */
1004 	lmutex_lock(&udp->link_lock);
1005 	if ((ulwpp = find_lwpp(found)) == NULL) {
1006 		/*
1007 		 * lwp_wait() found an lwp that the library doesn't know
1008 		 * about.  It must have been created with _lwp_create().
1009 		 * Just return its lwpid; we can't know its status.
1010 		 */
1011 		lmutex_unlock(&udp->link_lock);
1012 		rval = NULL;
1013 	} else {
1014 		/*
1015 		 * Remove ulwp from the hash table.
1016 		 */
1017 		ulwp = *ulwpp;
1018 		*ulwpp = ulwp->ul_hash;
1019 		ulwp->ul_hash = NULL;
1020 		/*
1021 		 * Remove ulwp from all_zombies list.
1022 		 */
1023 		ASSERT(udp->nzombies >= 1);
1024 		if (udp->all_zombies == ulwp)
1025 			udp->all_zombies = ulwp->ul_forw;
1026 		if (udp->all_zombies == ulwp)
1027 			udp->all_zombies = NULL;
1028 		else {
1029 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1030 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1031 		}
1032 		ulwp->ul_forw = ulwp->ul_back = NULL;
1033 		udp->nzombies--;
1034 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1035 		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1036 		/*
1037 		 * We can't call ulwp_unlock(ulwp) after we set
1038 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1039 		 * ulwp's hash table mutex now in order to unlock it below.
1040 		 */
1041 		mp = ulwp_mutex(ulwp, udp);
1042 		ulwp->ul_lwpid = (lwpid_t)(-1);
1043 		ulwp->ul_ix = -1;
1044 		rval = ulwp->ul_rval;
1045 		replace = ulwp->ul_replace;
1046 		lmutex_unlock(mp);
1047 		if (replace) {
1048 			ulwp->ul_next = NULL;
1049 			if (udp->ulwp_replace_free == NULL)
1050 				udp->ulwp_replace_free =
1051 				    udp->ulwp_replace_last = ulwp;
1052 			else {
1053 				udp->ulwp_replace_last->ul_next = ulwp;
1054 				udp->ulwp_replace_last = ulwp;
1055 			}
1056 		}
1057 		lmutex_unlock(&udp->link_lock);
1058 	}
1059 
1060 	if (departed != NULL)
1061 		*departed = found;
1062 	if (status != NULL)
1063 		*status = rval;
1064 	return (0);
1065 }
1066 
1067 int
1068 thr_join(thread_t tid, thread_t *departed, void **status)
1069 {
1070 	int error = _thrp_join(tid, departed, status, 1);
1071 	return ((error == EINVAL)? ESRCH : error);
1072 }
1073 
1074 /*
1075  * pthread_join() differs from Solaris thr_join():
1076  * It does not return the departed thread's id
1077  * and hence does not have a "departed" argument.
1078  * It returns EINVAL if tid refers to a detached thread.
1079  */
1080 #pragma weak _pthread_join = pthread_join
1081 int
1082 pthread_join(pthread_t tid, void **status)
1083 {
1084 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1085 }
1086 
1087 int
1088 pthread_detach(pthread_t tid)
1089 {
1090 	uberdata_t *udp = curthread->ul_uberdata;
1091 	ulwp_t *ulwp;
1092 	ulwp_t **ulwpp;
1093 	int error = 0;
1094 
1095 	if ((ulwpp = find_lwpp(tid)) == NULL)
1096 		return (ESRCH);
1097 	ulwp = *ulwpp;
1098 
1099 	if (ulwp->ul_dead) {
1100 		ulwp_unlock(ulwp, udp);
1101 		error = _thrp_join(tid, NULL, NULL, 0);
1102 	} else {
1103 		error = __lwp_detach(tid);
1104 		ulwp->ul_detached = 1;
1105 		ulwp->ul_usropts |= THR_DETACHED;
1106 		ulwp_unlock(ulwp, udp);
1107 	}
1108 	return (error);
1109 }
1110 
1111 static const char *
1112 ematch(const char *ev, const char *match)
1113 {
1114 	int c;
1115 
1116 	while ((c = *match++) != '\0') {
1117 		if (*ev++ != c)
1118 			return (NULL);
1119 	}
1120 	if (*ev++ != '=')
1121 		return (NULL);
1122 	return (ev);
1123 }
1124 
1125 static int
1126 envvar(const char *ev, const char *match, int limit)
1127 {
1128 	int val = -1;
1129 	const char *ename;
1130 
1131 	if ((ename = ematch(ev, match)) != NULL) {
1132 		int c;
1133 		for (val = 0; (c = *ename) != '\0'; ename++) {
1134 			if (!isdigit(c)) {
1135 				val = -1;
1136 				break;
1137 			}
1138 			val = val * 10 + (c - '0');
1139 			if (val > limit) {
1140 				val = limit;
1141 				break;
1142 			}
1143 		}
1144 	}
1145 	return (val);
1146 }
1147 
1148 static void
1149 etest(const char *ev)
1150 {
1151 	int value;
1152 
1153 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1154 		thread_queue_spin = value;
1155 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1156 		thread_adaptive_spin = value;
1157 	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1158 		thread_max_spinners = value;
1159 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1160 		thread_queue_fifo = value;
1161 #if defined(THREAD_DEBUG)
1162 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1163 		thread_queue_verify = value;
1164 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1165 		thread_queue_dump = value;
1166 #endif
1167 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1168 		thread_stack_cache = value;
1169 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1170 		thread_cond_wait_defer = value;
1171 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1172 		thread_error_detection = value;
1173 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1174 		thread_async_safe = value;
1175 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1176 		thread_door_noreserve = value;
1177 	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1178 		thread_locks_misaligned = value;
1179 }
1180 
1181 /*
1182  * Look for and evaluate environment variables of the form "_THREAD_*".
1183  * For compatibility with the past, we also look for environment
1184  * names of the form "LIBTHREAD_*".
1185  */
1186 static void
1187 set_thread_vars()
1188 {
1189 	extern const char **_environ;
1190 	const char **pev;
1191 	const char *ev;
1192 	char c;
1193 
1194 	if ((pev = _environ) == NULL)
1195 		return;
1196 	while ((ev = *pev++) != NULL) {
1197 		c = *ev;
1198 		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1199 			etest(ev + 8);
1200 		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1201 			etest(ev + 10);
1202 	}
1203 }
1204 
1205 /* PROBE_SUPPORT begin */
1206 #pragma weak __tnf_probe_notify
1207 extern void __tnf_probe_notify(void);
1208 /* PROBE_SUPPORT end */
1209 
1210 /* same as atexit() but private to the library */
1211 extern int _atexit(void (*)(void));
1212 
1213 /* same as _cleanup() but private to the library */
1214 extern void __cleanup(void);
1215 
1216 extern void atfork_init(void);
1217 
1218 #ifdef __amd64
1219 extern void __proc64id(void);
1220 #endif
1221 
1222 /*
1223  * libc_init() is called by ld.so.1 for library initialization.
1224  * We perform minimal initialization; enough to work with the main thread.
1225  */
1226 void
1227 libc_init(void)
1228 {
1229 	uberdata_t *udp = &__uberdata;
1230 	ulwp_t *oldself = __curthread();
1231 	ucontext_t uc;
1232 	ulwp_t *self;
1233 	struct rlimit rl;
1234 	caddr_t data;
1235 	size_t tls_size;
1236 	int setmask;
1237 
1238 	/*
1239 	 * For the initial stage of initialization, we must be careful
1240 	 * not to call any function that could possibly call _cerror().
1241 	 * For this purpose, we call only the raw system call wrappers.
1242 	 */
1243 
1244 #ifdef __amd64
1245 	/*
1246 	 * Gather information about cache layouts for optimized
1247 	 * AMD and Intel assembler strfoo() and memfoo() functions.
1248 	 */
1249 	__proc64id();
1250 #endif
1251 
1252 	/*
1253 	 * Every libc, regardless of which link map, must register __cleanup().
1254 	 */
1255 	(void) _atexit(__cleanup);
1256 
1257 	/*
1258 	 * We keep our uberdata on one of (a) the first alternate link map
1259 	 * or (b) the primary link map.  We switch to the primary link map
1260 	 * and stay there once we see it.  All intermediate link maps are
1261 	 * subject to being unloaded at any time.
1262 	 */
1263 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1264 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1265 		mutex_setup();
1266 		atfork_init();	/* every link map needs atfork() processing */
1267 		init_progname();
1268 		return;
1269 	}
1270 
1271 	/*
1272 	 * To establish the main stack information, we have to get our context.
1273 	 * This is also convenient to use for getting our signal mask.
1274 	 */
1275 	uc.uc_flags = UC_ALL;
1276 	(void) __getcontext(&uc);
1277 	ASSERT(uc.uc_link == NULL);
1278 
1279 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1280 	ASSERT(primary_link_map || tls_size == 0);
1281 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1282 	if (data == NULL)
1283 		thr_panic("cannot allocate thread structure for main thread");
1284 	/* LINTED pointer cast may result in improper alignment */
1285 	self = (ulwp_t *)(data + tls_size);
1286 	init_hash_table[0].hash_bucket = self;
1287 
1288 	self->ul_sigmask = uc.uc_sigmask;
1289 	delete_reserved_signals(&self->ul_sigmask);
1290 	/*
1291 	 * Are the old and new sets different?
1292 	 * (This can happen if we are currently blocking SIGCANCEL.)
1293 	 * If so, we must explicitly set our signal mask, below.
1294 	 */
1295 	setmask =
1296 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1297 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1298 	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1299 	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1300 
1301 #ifdef __sparc
1302 	/*
1303 	 * We cache several instructions in the thread structure for use
1304 	 * by the fasttrap DTrace provider. When changing this, read the
1305 	 * comment in fasttrap.h for the all the other places that must
1306 	 * be changed.
1307 	 */
1308 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1309 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1310 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1311 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1312 #endif
1313 
1314 	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1315 	(void) getrlimit(RLIMIT_STACK, &rl);
1316 	self->ul_stksiz = rl.rlim_cur;
1317 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1318 
1319 	self->ul_forw = self->ul_back = self;
1320 	self->ul_hash = NULL;
1321 	self->ul_ix = 0;
1322 	self->ul_lwpid = 1; /* _lwp_self() */
1323 	self->ul_main = 1;
1324 	self->ul_self = self;
1325 	self->ul_policy = -1;		/* initialize only when needed */
1326 	self->ul_pri = 0;
1327 	self->ul_cid = 0;
1328 	self->ul_rtclassid = -1;
1329 	self->ul_uberdata = udp;
1330 	if (oldself != NULL) {
1331 		int i;
1332 
1333 		ASSERT(primary_link_map);
1334 		ASSERT(oldself->ul_main == 1);
1335 		self->ul_stsd = oldself->ul_stsd;
1336 		for (i = 0; i < TSD_NFAST; i++)
1337 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1338 		self->ul_tls = oldself->ul_tls;
1339 		/*
1340 		 * Retrieve all pointers to uberdata allocated
1341 		 * while running on previous link maps.
1342 		 * We would like to do a structure assignment here, but
1343 		 * gcc turns structure assignments into calls to memcpy(),
1344 		 * a function exported from libc.  We can't call any such
1345 		 * external functions until we establish curthread, below,
1346 		 * so we just call our private version of memcpy().
1347 		 */
1348 		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1349 		/*
1350 		 * These items point to global data on the primary link map.
1351 		 */
1352 		udp->thr_hash_table = init_hash_table;
1353 		udp->sigacthandler = sigacthandler;
1354 		udp->tdb.tdb_events = tdb_events;
1355 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1356 		ASSERT(udp->lwp_stacks == NULL);
1357 		ASSERT(udp->ulwp_freelist == NULL);
1358 		ASSERT(udp->ulwp_replace_free == NULL);
1359 		ASSERT(udp->hash_size == 1);
1360 	}
1361 	udp->all_lwps = self;
1362 	udp->ulwp_one = self;
1363 	udp->pid = getpid();
1364 	udp->nthreads = 1;
1365 	/*
1366 	 * In every link map, tdb_bootstrap points to the same piece of
1367 	 * allocated memory.  When the primary link map is initialized,
1368 	 * the allocated memory is assigned a pointer to the one true
1369 	 * uberdata.  This allows libc_db to initialize itself regardless
1370 	 * of which instance of libc it finds in the address space.
1371 	 */
1372 	if (udp->tdb_bootstrap == NULL)
1373 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1374 	__tdb_bootstrap = udp->tdb_bootstrap;
1375 	if (primary_link_map) {
1376 		self->ul_primarymap = 1;
1377 		udp->primary_map = 1;
1378 		*udp->tdb_bootstrap = udp;
1379 	}
1380 	/*
1381 	 * Cancellation can't happen until:
1382 	 *	pthread_cancel() is called
1383 	 * or:
1384 	 *	another thread is created
1385 	 * For now, as a single-threaded process, set the flag that tells
1386 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1387 	 */
1388 	self->ul_nocancel = 1;
1389 
1390 #if defined(__amd64)
1391 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1392 #elif defined(__i386)
1393 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1394 #endif	/* __i386 || __amd64 */
1395 	set_curthread(self);		/* redundant on i386 */
1396 	/*
1397 	 * Now curthread is established and it is safe to call any
1398 	 * function in libc except one that uses thread-local storage.
1399 	 */
1400 	self->ul_errnop = &errno;
1401 	if (oldself != NULL) {
1402 		/* tls_size was zero when oldself was allocated */
1403 		lfree(oldself, sizeof (ulwp_t));
1404 	}
1405 	mutex_setup();
1406 	atfork_init();
1407 	signal_init();
1408 
1409 	/*
1410 	 * If the stack is unlimited, we set the size to zero to disable
1411 	 * stack checking.
1412 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1413 	 */
1414 	if (self->ul_stksiz == RLIM_INFINITY) {
1415 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1416 		self->ul_ustack.ss_size = 0;
1417 	} else {
1418 		self->ul_ustack.ss_sp = self->ul_stk;
1419 		self->ul_ustack.ss_size = self->ul_stksiz;
1420 	}
1421 	self->ul_ustack.ss_flags = 0;
1422 	(void) setustack(&self->ul_ustack);
1423 
1424 	/*
1425 	 * Get the variables that affect thread behavior from the environment.
1426 	 */
1427 	set_thread_vars();
1428 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1429 	udp->thread_stack_cache = thread_stack_cache;
1430 
1431 	/*
1432 	 * Make per-thread copies of global variables, for speed.
1433 	 */
1434 	self->ul_queue_fifo = (char)thread_queue_fifo;
1435 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1436 	self->ul_error_detection = (char)thread_error_detection;
1437 	self->ul_async_safe = (char)thread_async_safe;
1438 	self->ul_door_noreserve = (char)thread_door_noreserve;
1439 	self->ul_misaligned = (char)thread_locks_misaligned;
1440 	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1441 	self->ul_adaptive_spin = thread_adaptive_spin;
1442 	self->ul_queue_spin = thread_queue_spin;
1443 
1444 #if defined(__sparc) && !defined(_LP64)
1445 	if (self->ul_misaligned) {
1446 		/*
1447 		 * Tell the kernel to fix up ldx/stx instructions that
1448 		 * refer to non-8-byte aligned data instead of giving
1449 		 * the process an alignment trap and generating SIGBUS.
1450 		 *
1451 		 * Programs compiled for 32-bit sparc with the Studio SS12
1452 		 * compiler get this done for them automatically (in _init()).
1453 		 * We do it here for the benefit of programs compiled with
1454 		 * other compilers, like gcc.
1455 		 *
1456 		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1457 		 * environment variable horrible hack to work.
1458 		 */
1459 		extern void _do_fix_align(void);
1460 		_do_fix_align();
1461 	}
1462 #endif
1463 
1464 	/*
1465 	 * When we have initialized the primary link map, inform
1466 	 * the dynamic linker about our interface functions.
1467 	 * Set up our pointer to the program name.
1468 	 */
1469 	if (self->ul_primarymap)
1470 		_ld_libc((void *)rtld_funcs);
1471 	init_progname();
1472 
1473 	/*
1474 	 * Defer signals until TLS constructors have been called.
1475 	 */
1476 	sigoff(self);
1477 	tls_setup();
1478 	sigon(self);
1479 	if (setmask)
1480 		(void) restore_signals(self);
1481 
1482 	/*
1483 	 * Make private copies of __xpg4 and __xpg6 so libc can test
1484 	 * them after this point without invoking the dynamic linker.
1485 	 */
1486 	libc__xpg4 = __xpg4;
1487 	libc__xpg6 = __xpg6;
1488 
1489 	/* PROBE_SUPPORT begin */
1490 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1491 		__tnf_probe_notify();
1492 	/* PROBE_SUPPORT end */
1493 
1494 	init_sigev_thread();
1495 	init_aio();
1496 
1497 	/*
1498 	 * We need to reset __threaded dynamically at runtime, so that
1499 	 * __threaded can be bound to __threaded outside libc which may not
1500 	 * have initial value of 1 (without a copy relocation in a.out).
1501 	 */
1502 	__threaded = 1;
1503 }
1504 
1505 #pragma fini(libc_fini)
1506 void
1507 libc_fini()
1508 {
1509 	/*
1510 	 * If we are doing fini processing for the instance of libc
1511 	 * on the first alternate link map (this happens only when
1512 	 * the dynamic linker rejects a bad audit library), then clear
1513 	 * __curthread().  We abandon whatever memory was allocated by
1514 	 * lmalloc() while running on this alternate link-map but we
1515 	 * don't care (and can't find the memory in any case); we just
1516 	 * want to protect the application from this bad audit library.
1517 	 * No fini processing is done by libc in the normal case.
1518 	 */
1519 
1520 	uberdata_t *udp = curthread->ul_uberdata;
1521 
1522 	if (udp->primary_map == 0 && udp == &__uberdata)
1523 		set_curthread(NULL);
1524 }
1525 
1526 /*
1527  * finish_init is called when we are about to become multi-threaded,
1528  * that is, on the first call to thr_create().
1529  */
1530 void
1531 finish_init()
1532 {
1533 	ulwp_t *self = curthread;
1534 	uberdata_t *udp = self->ul_uberdata;
1535 	thr_hash_table_t *htp;
1536 	void *data;
1537 	int i;
1538 
1539 	/*
1540 	 * No locks needed here; we are single-threaded on the first call.
1541 	 * We can be called only after the primary link map has been set up.
1542 	 */
1543 	ASSERT(self->ul_primarymap);
1544 	ASSERT(self == udp->ulwp_one);
1545 	ASSERT(!udp->uberflags.uf_mt);
1546 	ASSERT(udp->hash_size == 1);
1547 
1548 	/*
1549 	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1550 	 */
1551 	update_sched(self);
1552 
1553 	/*
1554 	 * Allocate the queue_head array if not already allocated.
1555 	 */
1556 	if (udp->queue_head == NULL)
1557 		queue_alloc();
1558 
1559 	/*
1560 	 * Now allocate the thread hash table.
1561 	 */
1562 	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1563 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1564 	    == MAP_FAILED)
1565 		thr_panic("cannot allocate thread hash table");
1566 
1567 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1568 	udp->hash_size = HASHTBLSZ;
1569 	udp->hash_mask = HASHTBLSZ - 1;
1570 
1571 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1572 		htp->hash_lock.mutex_flag = LOCK_INITED;
1573 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1574 		htp->hash_cond.cond_magic = COND_MAGIC;
1575 	}
1576 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1577 
1578 	/*
1579 	 * Set up the SIGCANCEL handler for threads cancellation.
1580 	 */
1581 	setup_cancelsig(SIGCANCEL);
1582 
1583 	/*
1584 	 * Arrange to do special things on exit --
1585 	 * - collect queue statistics from all remaining active threads.
1586 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1587 	 * - grab assert_lock to ensure that assertion failures
1588 	 *   and a core dump take precedence over _exit().
1589 	 * (Functions are called in the reverse order of their registration.)
1590 	 */
1591 	(void) _atexit(grab_assert_lock);
1592 #if defined(THREAD_DEBUG)
1593 	(void) _atexit(dump_queue_statistics);
1594 	(void) _atexit(collect_queue_statistics);
1595 #endif
1596 }
1597 
1598 /*
1599  * Used only by postfork1_child(), below.
1600  */
1601 static void
1602 mark_dead_and_buried(ulwp_t *ulwp)
1603 {
1604 	ulwp->ul_dead = 1;
1605 	ulwp->ul_lwpid = (lwpid_t)(-1);
1606 	ulwp->ul_hash = NULL;
1607 	ulwp->ul_ix = -1;
1608 	ulwp->ul_schedctl = NULL;
1609 	ulwp->ul_schedctl_called = NULL;
1610 }
1611 
1612 /*
1613  * This is called from fork1() in the child.
1614  * Reset our data structures to reflect one lwp.
1615  */
1616 void
1617 postfork1_child()
1618 {
1619 	ulwp_t *self = curthread;
1620 	uberdata_t *udp = self->ul_uberdata;
1621 	queue_head_t *qp;
1622 	ulwp_t *next;
1623 	ulwp_t *ulwp;
1624 	int i;
1625 
1626 	/* daemon threads shouldn't call fork1(), but oh well... */
1627 	self->ul_usropts &= ~THR_DAEMON;
1628 	udp->nthreads = 1;
1629 	udp->ndaemons = 0;
1630 	udp->uberflags.uf_mt = 0;
1631 	__libc_threaded = 0;
1632 	for (i = 0; i < udp->hash_size; i++)
1633 		udp->thr_hash_table[i].hash_bucket = NULL;
1634 	self->ul_lwpid = _lwp_self();
1635 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1636 
1637 	/*
1638 	 * Some thread in the parent might have been suspended
1639 	 * while holding udp->callout_lock or udp->ld_lock.
1640 	 * Reinitialize the child's copies.
1641 	 */
1642 	(void) mutex_init(&udp->callout_lock,
1643 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1644 	(void) mutex_init(&udp->ld_lock,
1645 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1646 
1647 	/* no one in the child is on a sleep queue; reinitialize */
1648 	if ((qp = udp->queue_head) != NULL) {
1649 		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1650 		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1651 			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1652 			qp->qh_lock.mutex_flag = LOCK_INITED;
1653 			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1654 			qp->qh_hlist = &qp->qh_def_root;
1655 #if defined(THREAD_DEBUG)
1656 			qp->qh_hlen = 1;
1657 			qp->qh_hmax = 1;
1658 #endif
1659 		}
1660 	}
1661 
1662 	/*
1663 	 * Do post-fork1 processing for subsystems that need it.
1664 	 * We need to do this before unmapping all of the abandoned
1665 	 * threads' stacks, below(), because the post-fork1 actions
1666 	 * might require access to those stacks.
1667 	 */
1668 	postfork1_child_sigev_aio();
1669 	postfork1_child_sigev_mq();
1670 	postfork1_child_sigev_timer();
1671 	postfork1_child_aio();
1672 	/*
1673 	 * The above subsystems use thread pools, so this action
1674 	 * must be performed after those actions.
1675 	 */
1676 	postfork1_child_tpool();
1677 
1678 	/*
1679 	 * All lwps except ourself are gone.  Mark them so.
1680 	 * First mark all of the lwps that have already been freed.
1681 	 * Then mark and free all of the active lwps except ourself.
1682 	 * Since we are single-threaded, no locks are required here.
1683 	 */
1684 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1685 		mark_dead_and_buried(ulwp);
1686 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1687 		mark_dead_and_buried(ulwp);
1688 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1689 		next = ulwp->ul_forw;
1690 		ulwp->ul_forw = ulwp->ul_back = NULL;
1691 		mark_dead_and_buried(ulwp);
1692 		tsd_free(ulwp);
1693 		tls_free(ulwp);
1694 		rwl_free(ulwp);
1695 		heldlock_free(ulwp);
1696 		ulwp_free(ulwp);
1697 	}
1698 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1699 	if (self != udp->ulwp_one)
1700 		mark_dead_and_buried(udp->ulwp_one);
1701 	if ((ulwp = udp->all_zombies) != NULL) {
1702 		ASSERT(udp->nzombies != 0);
1703 		do {
1704 			next = ulwp->ul_forw;
1705 			ulwp->ul_forw = ulwp->ul_back = NULL;
1706 			mark_dead_and_buried(ulwp);
1707 			udp->nzombies--;
1708 			if (ulwp->ul_replace) {
1709 				ulwp->ul_next = NULL;
1710 				if (udp->ulwp_replace_free == NULL) {
1711 					udp->ulwp_replace_free =
1712 					    udp->ulwp_replace_last = ulwp;
1713 				} else {
1714 					udp->ulwp_replace_last->ul_next = ulwp;
1715 					udp->ulwp_replace_last = ulwp;
1716 				}
1717 			}
1718 		} while ((ulwp = next) != udp->all_zombies);
1719 		ASSERT(udp->nzombies == 0);
1720 		udp->all_zombies = NULL;
1721 		udp->nzombies = 0;
1722 	}
1723 	trim_stack_cache(0);
1724 }
1725 
1726 lwpid_t
1727 lwp_self(void)
1728 {
1729 	return (curthread->ul_lwpid);
1730 }
1731 
1732 #pragma weak _ti_thr_self = thr_self
1733 #pragma weak pthread_self = thr_self
1734 thread_t
1735 thr_self()
1736 {
1737 	return (curthread->ul_lwpid);
1738 }
1739 
1740 int
1741 thr_main()
1742 {
1743 	ulwp_t *self = __curthread();
1744 
1745 	return ((self == NULL)? -1 : self->ul_main);
1746 }
1747 
1748 int
1749 _thrp_cancelled(void)
1750 {
1751 	return (curthread->ul_rval == PTHREAD_CANCELED);
1752 }
1753 
1754 int
1755 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1756 {
1757 	stk->ss_sp = (void *)ulwp->ul_stktop;
1758 	stk->ss_size = ulwp->ul_stksiz;
1759 	stk->ss_flags = 0;
1760 	return (0);
1761 }
1762 
1763 #pragma weak _thr_stksegment = thr_stksegment
1764 int
1765 thr_stksegment(stack_t *stk)
1766 {
1767 	return (_thrp_stksegment(curthread, stk));
1768 }
1769 
1770 void
1771 force_continue(ulwp_t *ulwp)
1772 {
1773 #if defined(THREAD_DEBUG)
1774 	ulwp_t *self = curthread;
1775 	uberdata_t *udp = self->ul_uberdata;
1776 #endif
1777 	int error;
1778 	timespec_t ts;
1779 
1780 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1781 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1782 
1783 	for (;;) {
1784 		error = _lwp_continue(ulwp->ul_lwpid);
1785 		if (error != 0 && error != EINTR)
1786 			break;
1787 		error = 0;
1788 		if (ulwp->ul_stopping) {	/* he is stopping himself */
1789 			ts.tv_sec = 0;		/* give him a chance to run */
1790 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1791 			(void) __nanosleep(&ts, NULL);
1792 		}
1793 		if (!ulwp->ul_stopping)		/* he is running now */
1794 			break;			/* so we are done */
1795 		/*
1796 		 * He is marked as being in the process of stopping
1797 		 * himself.  Loop around and continue him again.
1798 		 * He may not have been stopped the first time.
1799 		 */
1800 	}
1801 }
1802 
1803 /*
1804  * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1805  * that is, to a point where ul_critical and ul_rtld are both zero.
1806  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1807  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1808  * If we have to drop link_lock, we store 1 through link_dropped.
1809  * If the lwp exits before it can be suspended, we return ESRCH.
1810  */
1811 int
1812 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1813 {
1814 	ulwp_t *self = curthread;
1815 	uberdata_t *udp = self->ul_uberdata;
1816 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1817 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1818 	thread_t tid = ulwp->ul_lwpid;
1819 	int ix = ulwp->ul_ix;
1820 	int error = 0;
1821 
1822 	ASSERT(whystopped == TSTP_REGULAR ||
1823 	    whystopped == TSTP_MUTATOR ||
1824 	    whystopped == TSTP_FORK);
1825 	ASSERT(ulwp != self);
1826 	ASSERT(!ulwp->ul_stop);
1827 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1828 	ASSERT(MUTEX_OWNED(mp, self));
1829 
1830 	if (link_dropped != NULL)
1831 		*link_dropped = 0;
1832 
1833 	/*
1834 	 * We must grab the target's spin lock before suspending it.
1835 	 * See the comments below and in _thrp_suspend() for why.
1836 	 */
1837 	spin_lock_set(&ulwp->ul_spinlock);
1838 	(void) ___lwp_suspend(tid);
1839 	spin_lock_clear(&ulwp->ul_spinlock);
1840 
1841 top:
1842 	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1843 	    ulwp->ul_stopping) {
1844 		/* thread is already safe */
1845 		ulwp->ul_stop |= whystopped;
1846 	} else {
1847 		/*
1848 		 * Setting ul_pleasestop causes the target thread to stop
1849 		 * itself in _thrp_suspend(), below, after we drop its lock.
1850 		 * We must continue the critical thread before dropping
1851 		 * link_lock because the critical thread may be holding
1852 		 * the queue lock for link_lock.  This is delicate.
1853 		 */
1854 		ulwp->ul_pleasestop |= whystopped;
1855 		force_continue(ulwp);
1856 		if (link_dropped != NULL) {
1857 			*link_dropped = 1;
1858 			lmutex_unlock(&udp->link_lock);
1859 			/* be sure to drop link_lock only once */
1860 			link_dropped = NULL;
1861 		}
1862 
1863 		/*
1864 		 * The thread may disappear by calling thr_exit() so we
1865 		 * cannot rely on the ulwp pointer after dropping the lock.
1866 		 * Instead, we search the hash table to find it again.
1867 		 * When we return, we may find that the thread has been
1868 		 * continued by some other thread.  The suspend/continue
1869 		 * interfaces are prone to such race conditions by design.
1870 		 */
1871 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1872 		    (ulwp->ul_pleasestop & whystopped)) {
1873 			(void) __cond_wait(cvp, mp);
1874 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1875 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1876 				if (ulwp->ul_lwpid == tid)
1877 					break;
1878 			}
1879 		}
1880 
1881 		if (ulwp == NULL || ulwp->ul_dead)
1882 			error = ESRCH;
1883 		else {
1884 			/*
1885 			 * Do another lwp_suspend() to make sure we don't
1886 			 * return until the target thread is fully stopped
1887 			 * in the kernel.  Don't apply lwp_suspend() until
1888 			 * we know that the target is not holding any
1889 			 * queue locks, that is, that it has completed
1890 			 * ulwp_unlock(self) and has, or at least is
1891 			 * about to, call lwp_suspend() on itself.  We do
1892 			 * this by grabbing the target's spin lock.
1893 			 */
1894 			ASSERT(ulwp->ul_lwpid == tid);
1895 			spin_lock_set(&ulwp->ul_spinlock);
1896 			(void) ___lwp_suspend(tid);
1897 			spin_lock_clear(&ulwp->ul_spinlock);
1898 			/*
1899 			 * If some other thread did a thr_continue()
1900 			 * on the target thread we have to start over.
1901 			 */
1902 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1903 				goto top;
1904 		}
1905 	}
1906 
1907 	(void) cond_broadcast(cvp);
1908 	lmutex_unlock(mp);
1909 	return (error);
1910 }
1911 
1912 int
1913 _thrp_suspend(thread_t tid, uchar_t whystopped)
1914 {
1915 	ulwp_t *self = curthread;
1916 	uberdata_t *udp = self->ul_uberdata;
1917 	ulwp_t *ulwp;
1918 	int error = 0;
1919 
1920 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1921 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1922 
1923 	/*
1924 	 * We can't suspend anyone except ourself while
1925 	 * some other thread is performing a fork.
1926 	 * This also allows only one suspension at a time.
1927 	 */
1928 	if (tid != self->ul_lwpid)
1929 		fork_lock_enter();
1930 
1931 	if ((ulwp = find_lwp(tid)) == NULL)
1932 		error = ESRCH;
1933 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1934 		ulwp_unlock(ulwp, udp);
1935 		error = EINVAL;
1936 	} else if (ulwp->ul_stop) {	/* already stopped */
1937 		ulwp->ul_stop |= whystopped;
1938 		ulwp_broadcast(ulwp);
1939 		ulwp_unlock(ulwp, udp);
1940 	} else if (ulwp != self) {
1941 		/*
1942 		 * After suspending the other thread, move it out of a
1943 		 * critical section and deal with the schedctl mappings.
1944 		 * safe_suspend() suspends the other thread, calls
1945 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1946 		 */
1947 		error = safe_suspend(ulwp, whystopped, NULL);
1948 	} else {
1949 		int schedctl_after_fork = 0;
1950 
1951 		/*
1952 		 * We are suspending ourself.  We must not take a signal
1953 		 * until we return from lwp_suspend() and clear ul_stopping.
1954 		 * This is to guard against siglongjmp().
1955 		 */
1956 		enter_critical(self);
1957 		self->ul_sp = stkptr();
1958 		_flush_windows();	/* sparc */
1959 		self->ul_pleasestop = 0;
1960 		self->ul_stop |= whystopped;
1961 		/*
1962 		 * Grab our spin lock before dropping ulwp_mutex(self).
1963 		 * This prevents the suspending thread from applying
1964 		 * lwp_suspend() to us before we emerge from
1965 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1966 		 */
1967 		spin_lock_set(&self->ul_spinlock);
1968 		self->ul_stopping = 1;
1969 		ulwp_broadcast(self);
1970 		ulwp_unlock(self, udp);
1971 		/*
1972 		 * From this point until we return from lwp_suspend(),
1973 		 * we must not call any function that might invoke the
1974 		 * dynamic linker, that is, we can only call functions
1975 		 * private to the library.
1976 		 *
1977 		 * Also, this is a nasty race condition for a process
1978 		 * that is undergoing a forkall() operation:
1979 		 * Once we clear our spinlock (below), we are vulnerable
1980 		 * to being suspended by the forkall() thread before
1981 		 * we manage to suspend ourself in ___lwp_suspend().
1982 		 * See safe_suspend() and force_continue().
1983 		 *
1984 		 * To avoid a SIGSEGV due to the disappearance
1985 		 * of the schedctl mappings in the child process,
1986 		 * which can happen in spin_lock_clear() if we
1987 		 * are suspended while we are in the middle of
1988 		 * its call to preempt(), we preemptively clear
1989 		 * our own schedctl pointer before dropping our
1990 		 * spinlock.  We reinstate it, in both the parent
1991 		 * and (if this really is a forkall()) the child.
1992 		 */
1993 		if (whystopped & TSTP_FORK) {
1994 			schedctl_after_fork = 1;
1995 			self->ul_schedctl = NULL;
1996 			self->ul_schedctl_called = &udp->uberflags;
1997 		}
1998 		spin_lock_clear(&self->ul_spinlock);
1999 		(void) ___lwp_suspend(tid);
2000 		/*
2001 		 * Somebody else continued us.
2002 		 * We can't grab ulwp_lock(self)
2003 		 * until after clearing ul_stopping.
2004 		 * force_continue() relies on this.
2005 		 */
2006 		self->ul_stopping = 0;
2007 		self->ul_sp = 0;
2008 		if (schedctl_after_fork) {
2009 			self->ul_schedctl_called = NULL;
2010 			self->ul_schedctl = NULL;
2011 			(void) setup_schedctl();
2012 		}
2013 		ulwp_lock(self, udp);
2014 		ulwp_broadcast(self);
2015 		ulwp_unlock(self, udp);
2016 		exit_critical(self);
2017 	}
2018 
2019 	if (tid != self->ul_lwpid)
2020 		fork_lock_exit();
2021 
2022 	return (error);
2023 }
2024 
2025 /*
2026  * Suspend all lwps other than ourself in preparation for fork.
2027  */
2028 void
2029 suspend_fork()
2030 {
2031 	ulwp_t *self = curthread;
2032 	uberdata_t *udp = self->ul_uberdata;
2033 	ulwp_t *ulwp;
2034 	int link_dropped;
2035 
2036 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2037 top:
2038 	lmutex_lock(&udp->link_lock);
2039 
2040 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2041 		ulwp_lock(ulwp, udp);
2042 		if (ulwp->ul_stop) {	/* already stopped */
2043 			ulwp->ul_stop |= TSTP_FORK;
2044 			ulwp_broadcast(ulwp);
2045 			ulwp_unlock(ulwp, udp);
2046 		} else {
2047 			/*
2048 			 * Move the stopped lwp out of a critical section.
2049 			 */
2050 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2051 			    link_dropped)
2052 				goto top;
2053 		}
2054 	}
2055 
2056 	lmutex_unlock(&udp->link_lock);
2057 }
2058 
2059 void
2060 continue_fork(int child)
2061 {
2062 	ulwp_t *self = curthread;
2063 	uberdata_t *udp = self->ul_uberdata;
2064 	ulwp_t *ulwp;
2065 
2066 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2067 
2068 	/*
2069 	 * Clear the schedctl pointers in the child of forkall().
2070 	 */
2071 	if (child) {
2072 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2073 			ulwp->ul_schedctl_called =
2074 			    ulwp->ul_dead? &udp->uberflags : NULL;
2075 			ulwp->ul_schedctl = NULL;
2076 		}
2077 	}
2078 
2079 	/*
2080 	 * Set all lwps that were stopped for fork() running again.
2081 	 */
2082 	lmutex_lock(&udp->link_lock);
2083 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2084 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2085 		lmutex_lock(mp);
2086 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2087 		ulwp->ul_stop &= ~TSTP_FORK;
2088 		ulwp_broadcast(ulwp);
2089 		if (!ulwp->ul_stop)
2090 			force_continue(ulwp);
2091 		lmutex_unlock(mp);
2092 	}
2093 	lmutex_unlock(&udp->link_lock);
2094 }
2095 
2096 int
2097 _thrp_continue(thread_t tid, uchar_t whystopped)
2098 {
2099 	uberdata_t *udp = curthread->ul_uberdata;
2100 	ulwp_t *ulwp;
2101 	mutex_t *mp;
2102 	int error = 0;
2103 
2104 	ASSERT(whystopped == TSTP_REGULAR ||
2105 	    whystopped == TSTP_MUTATOR);
2106 
2107 	/*
2108 	 * We single-thread the entire thread suspend/continue mechanism.
2109 	 */
2110 	fork_lock_enter();
2111 
2112 	if ((ulwp = find_lwp(tid)) == NULL) {
2113 		fork_lock_exit();
2114 		return (ESRCH);
2115 	}
2116 
2117 	mp = ulwp_mutex(ulwp, udp);
2118 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2119 		error = EINVAL;
2120 	} else if (ulwp->ul_stop & whystopped) {
2121 		ulwp->ul_stop &= ~whystopped;
2122 		ulwp_broadcast(ulwp);
2123 		if (!ulwp->ul_stop) {
2124 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2125 				ulwp->ul_sp = 0;
2126 				ulwp->ul_created = 0;
2127 			}
2128 			force_continue(ulwp);
2129 		}
2130 	}
2131 	lmutex_unlock(mp);
2132 
2133 	fork_lock_exit();
2134 	return (error);
2135 }
2136 
2137 int
2138 thr_suspend(thread_t tid)
2139 {
2140 	return (_thrp_suspend(tid, TSTP_REGULAR));
2141 }
2142 
2143 int
2144 thr_continue(thread_t tid)
2145 {
2146 	return (_thrp_continue(tid, TSTP_REGULAR));
2147 }
2148 
2149 void
2150 thr_yield()
2151 {
2152 	yield();
2153 }
2154 
2155 #pragma weak pthread_kill = thr_kill
2156 #pragma weak _thr_kill = thr_kill
2157 int
2158 thr_kill(thread_t tid, int sig)
2159 {
2160 	if (sig == SIGCANCEL)
2161 		return (EINVAL);
2162 	return (_lwp_kill(tid, sig));
2163 }
2164 
2165 /*
2166  * Exit a critical section, take deferred actions if necessary.
2167  * Called from exit_critical() and from sigon().
2168  */
2169 void
2170 do_exit_critical()
2171 {
2172 	ulwp_t *self = curthread;
2173 	int sig;
2174 
2175 	ASSERT(self->ul_critical == 0);
2176 
2177 	/*
2178 	 * Don't suspend ourself or take a deferred signal while dying
2179 	 * or while executing inside the dynamic linker (ld.so.1).
2180 	 */
2181 	if (self->ul_dead || self->ul_rtld)
2182 		return;
2183 
2184 	while (self->ul_pleasestop ||
2185 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2186 		/*
2187 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2188 		 * by keeping self->ul_critical == 1 here.
2189 		 */
2190 		self->ul_critical++;
2191 		while (self->ul_pleasestop) {
2192 			/*
2193 			 * Guard against suspending ourself while on a sleep
2194 			 * queue.  See the comments in call_user_handler().
2195 			 */
2196 			unsleep_self();
2197 			set_parking_flag(self, 0);
2198 			(void) _thrp_suspend(self->ul_lwpid,
2199 			    self->ul_pleasestop);
2200 		}
2201 		self->ul_critical--;
2202 
2203 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2204 			/*
2205 			 * Clear ul_cursig before proceeding.
2206 			 * This protects us from the dynamic linker's
2207 			 * calls to bind_guard()/bind_clear() in the
2208 			 * event that it is invoked to resolve a symbol
2209 			 * like take_deferred_signal() below.
2210 			 */
2211 			self->ul_cursig = 0;
2212 			take_deferred_signal(sig);
2213 			ASSERT(self->ul_cursig == 0);
2214 		}
2215 	}
2216 	ASSERT(self->ul_critical == 0);
2217 }
2218 
2219 /*
2220  * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2221  * (ld.so.1) when it has do do something, like resolve a symbol to be called
2222  * by the application or one of its libraries.  _ti_bind_guard() is called
2223  * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2224  * application.  The dynamic linker gets special dispensation from libc to
2225  * run in a critical region (all signals deferred and no thread suspension
2226  * or forking allowed), and to be immune from cancellation for the duration.
2227  */
2228 int
2229 _ti_bind_guard(int flags)
2230 {
2231 	ulwp_t *self = curthread;
2232 	uberdata_t *udp = self->ul_uberdata;
2233 	int bindflag = (flags & THR_FLG_RTLD);
2234 
2235 	if ((self->ul_bindflags & bindflag) == bindflag)
2236 		return (0);
2237 	self->ul_bindflags |= bindflag;
2238 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2239 		sigoff(self);	/* see no signals while holding ld_lock */
2240 		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2241 		(void) mutex_lock(&udp->ld_lock);
2242 	}
2243 	enter_critical(self);
2244 	self->ul_save_state = self->ul_cancel_disabled;
2245 	self->ul_cancel_disabled = 1;
2246 	set_cancel_pending_flag(self, 0);
2247 	return (1);
2248 }
2249 
2250 int
2251 _ti_bind_clear(int flags)
2252 {
2253 	ulwp_t *self = curthread;
2254 	uberdata_t *udp = self->ul_uberdata;
2255 	int bindflag = (flags & THR_FLG_RTLD);
2256 
2257 	if ((self->ul_bindflags & bindflag) == 0)
2258 		return (self->ul_bindflags);
2259 	self->ul_bindflags &= ~bindflag;
2260 	self->ul_cancel_disabled = self->ul_save_state;
2261 	set_cancel_pending_flag(self, 0);
2262 	exit_critical(self);
2263 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2264 		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2265 			(void) mutex_unlock(&udp->ld_lock);
2266 			self->ul_rtld--;
2267 			sigon(self);	/* reenable signals */
2268 		}
2269 	}
2270 	return (self->ul_bindflags);
2271 }
2272 
2273 /*
2274  * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2275  * a critical region in libc.  Return zero if not, else return non-zero.
2276  */
2277 int
2278 _ti_critical(void)
2279 {
2280 	ulwp_t *self = curthread;
2281 	int level = self->ul_critical;
2282 
2283 	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2284 		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2285 	return (level - 1);
2286 }
2287 
2288 /*
2289  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2290  * it does in the old libthread (see the comments in cond_wait_queue()).
2291  * Also, signals are deferred at thread startup until TLS constructors
2292  * have all been called, at which time _thrp_setup() calls sigon().
2293  *
2294  * _sigoff() and _sigon() are external consolidation-private interfaces to
2295  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2296  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2297  * (librtc.so) to defer signals during its critical sections (not to be
2298  * confused with libc critical sections [see exit_critical() above]).
2299  */
2300 void
2301 _sigoff(void)
2302 {
2303 	ulwp_t *self = curthread;
2304 
2305 	sigoff(self);
2306 }
2307 
2308 void
2309 _sigon(void)
2310 {
2311 	ulwp_t *self = curthread;
2312 
2313 	ASSERT(self->ul_sigdefer > 0);
2314 	sigon(self);
2315 }
2316 
2317 int
2318 thr_getconcurrency()
2319 {
2320 	return (thr_concurrency);
2321 }
2322 
2323 int
2324 pthread_getconcurrency()
2325 {
2326 	return (pthread_concurrency);
2327 }
2328 
2329 int
2330 thr_setconcurrency(int new_level)
2331 {
2332 	uberdata_t *udp = curthread->ul_uberdata;
2333 
2334 	if (new_level < 0)
2335 		return (EINVAL);
2336 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2337 		return (EAGAIN);
2338 	lmutex_lock(&udp->link_lock);
2339 	if (new_level > thr_concurrency)
2340 		thr_concurrency = new_level;
2341 	lmutex_unlock(&udp->link_lock);
2342 	return (0);
2343 }
2344 
2345 int
2346 pthread_setconcurrency(int new_level)
2347 {
2348 	if (new_level < 0)
2349 		return (EINVAL);
2350 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2351 		return (EAGAIN);
2352 	pthread_concurrency = new_level;
2353 	return (0);
2354 }
2355 
2356 size_t
2357 thr_min_stack(void)
2358 {
2359 	return (MINSTACK);
2360 }
2361 
2362 int
2363 __nthreads(void)
2364 {
2365 	return (curthread->ul_uberdata->nthreads);
2366 }
2367 
2368 /*
2369  * XXX
2370  * The remainder of this file implements the private interfaces to java for
2371  * garbage collection.  It is no longer used, at least by java 1.2.
2372  * It can all go away once all old JVMs have disappeared.
2373  */
2374 
2375 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2376 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2377 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2378 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2379 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2380 
2381 /*
2382  * Get the available register state for the target thread.
2383  * Return non-volatile registers: TRS_NONVOLATILE
2384  */
2385 #pragma weak _thr_getstate = thr_getstate
2386 int
2387 thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2388 {
2389 	ulwp_t *self = curthread;
2390 	uberdata_t *udp = self->ul_uberdata;
2391 	ulwp_t **ulwpp;
2392 	ulwp_t *ulwp;
2393 	int error = 0;
2394 	int trs_flag = TRS_LWPID;
2395 
2396 	if (tid == 0 || self->ul_lwpid == tid) {
2397 		ulwp = self;
2398 		ulwp_lock(ulwp, udp);
2399 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2400 		ulwp = *ulwpp;
2401 	} else {
2402 		if (flag)
2403 			*flag = TRS_INVALID;
2404 		return (ESRCH);
2405 	}
2406 
2407 	if (ulwp->ul_dead) {
2408 		trs_flag = TRS_INVALID;
2409 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2410 		error = EINVAL;
2411 		trs_flag = TRS_INVALID;
2412 	} else if (ulwp->ul_stop) {
2413 		trs_flag = TRS_NONVOLATILE;
2414 		getgregs(ulwp, rs);
2415 	}
2416 
2417 	if (flag)
2418 		*flag = trs_flag;
2419 	if (lwp)
2420 		*lwp = tid;
2421 	if (ss != NULL)
2422 		(void) _thrp_stksegment(ulwp, ss);
2423 
2424 	ulwp_unlock(ulwp, udp);
2425 	return (error);
2426 }
2427 
2428 /*
2429  * Set the appropriate register state for the target thread.
2430  * This is not used by java.  It exists solely for the MSTC test suite.
2431  */
2432 #pragma weak _thr_setstate = thr_setstate
2433 int
2434 thr_setstate(thread_t tid, int flag, gregset_t rs)
2435 {
2436 	uberdata_t *udp = curthread->ul_uberdata;
2437 	ulwp_t *ulwp;
2438 	int error = 0;
2439 
2440 	if ((ulwp = find_lwp(tid)) == NULL)
2441 		return (ESRCH);
2442 
2443 	if (!ulwp->ul_stop && !suspendedallmutators)
2444 		error = EINVAL;
2445 	else if (rs != NULL) {
2446 		switch (flag) {
2447 		case TRS_NONVOLATILE:
2448 			/* do /proc stuff here? */
2449 			if (ulwp->ul_stop)
2450 				setgregs(ulwp, rs);
2451 			else
2452 				error = EINVAL;
2453 			break;
2454 		case TRS_LWPID:		/* do /proc stuff here? */
2455 		default:
2456 			error = EINVAL;
2457 			break;
2458 		}
2459 	}
2460 
2461 	ulwp_unlock(ulwp, udp);
2462 	return (error);
2463 }
2464 
2465 int
2466 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2467 {
2468 	extern ssize_t __pread(int, void *, size_t, off_t);
2469 	char buf[100];
2470 	int fd;
2471 
2472 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2473 	(void) strcpy(buf, "/proc/self/lwp/");
2474 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2475 	(void) strcat(buf, "/lwpstatus");
2476 	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2477 		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2478 			if (sp->pr_flags & PR_STOPPED) {
2479 				(void) __close(fd);
2480 				return (0);
2481 			}
2482 			yield();	/* give him a chance to stop */
2483 		}
2484 		(void) __close(fd);
2485 	}
2486 	return (-1);
2487 }
2488 
2489 int
2490 putlwpregs(thread_t tid, prgregset_t prp)
2491 {
2492 	extern ssize_t __writev(int, const struct iovec *, int);
2493 	char buf[100];
2494 	int fd;
2495 	long dstop_sreg[2];
2496 	long run_null[2];
2497 	iovec_t iov[3];
2498 
2499 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2500 	(void) strcpy(buf, "/proc/self/lwp/");
2501 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2502 	(void) strcat(buf, "/lwpctl");
2503 	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2504 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2505 		dstop_sreg[1] = PCSREG;		/* set the registers */
2506 		iov[0].iov_base = (caddr_t)dstop_sreg;
2507 		iov[0].iov_len = sizeof (dstop_sreg);
2508 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2509 		iov[1].iov_len = sizeof (prgregset_t);
2510 		run_null[0] = PCRUN;		/* make it runnable again */
2511 		run_null[1] = 0;
2512 		iov[2].iov_base = (caddr_t)run_null;
2513 		iov[2].iov_len = sizeof (run_null);
2514 		if (__writev(fd, iov, 3) >= 0) {
2515 			(void) __close(fd);
2516 			return (0);
2517 		}
2518 		(void) __close(fd);
2519 	}
2520 	return (-1);
2521 }
2522 
2523 static ulong_t
2524 gettsp_slow(thread_t tid)
2525 {
2526 	char buf[100];
2527 	struct lwpstatus status;
2528 
2529 	if (getlwpstatus(tid, &status) != 0) {
2530 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2531 		(void) strcpy(buf, "__gettsp(");
2532 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2533 		(void) strcat(buf, "): can't read lwpstatus");
2534 		thr_panic(buf);
2535 	}
2536 	return (status.pr_reg[R_SP]);
2537 }
2538 
2539 ulong_t
2540 __gettsp(thread_t tid)
2541 {
2542 	uberdata_t *udp = curthread->ul_uberdata;
2543 	ulwp_t *ulwp;
2544 	ulong_t result;
2545 
2546 	if ((ulwp = find_lwp(tid)) == NULL)
2547 		return (0);
2548 
2549 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2550 		ulwp_unlock(ulwp, udp);
2551 		return (result);
2552 	}
2553 
2554 	result = gettsp_slow(tid);
2555 	ulwp_unlock(ulwp, udp);
2556 	return (result);
2557 }
2558 
2559 /*
2560  * This tells java stack walkers how to find the ucontext
2561  * structure passed to signal handlers.
2562  */
2563 #pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2564 void
2565 thr_sighndlrinfo(void (**func)(), int *funcsize)
2566 {
2567 	*func = &__sighndlr;
2568 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2569 }
2570 
2571 /*
2572  * Mark a thread a mutator or reset a mutator to being a default,
2573  * non-mutator thread.
2574  */
2575 #pragma weak _thr_setmutator = thr_setmutator
2576 int
2577 thr_setmutator(thread_t tid, int enabled)
2578 {
2579 	ulwp_t *self = curthread;
2580 	uberdata_t *udp = self->ul_uberdata;
2581 	ulwp_t *ulwp;
2582 	int error;
2583 	int cancel_state;
2584 
2585 	enabled = enabled? 1 : 0;
2586 top:
2587 	if (tid == 0) {
2588 		ulwp = self;
2589 		ulwp_lock(ulwp, udp);
2590 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2591 		return (ESRCH);
2592 	}
2593 
2594 	/*
2595 	 * The target thread should be the caller itself or a suspended thread.
2596 	 * This prevents the target from also changing its ul_mutator field.
2597 	 */
2598 	error = 0;
2599 	if (ulwp != self && !ulwp->ul_stop && enabled)
2600 		error = EINVAL;
2601 	else if (ulwp->ul_mutator != enabled) {
2602 		lmutex_lock(&mutatorslock);
2603 		if (mutatorsbarrier) {
2604 			ulwp_unlock(ulwp, udp);
2605 			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2606 			    &cancel_state);
2607 			while (mutatorsbarrier)
2608 				(void) cond_wait(&mutatorscv, &mutatorslock);
2609 			(void) pthread_setcancelstate(cancel_state, NULL);
2610 			lmutex_unlock(&mutatorslock);
2611 			goto top;
2612 		}
2613 		ulwp->ul_mutator = enabled;
2614 		lmutex_unlock(&mutatorslock);
2615 	}
2616 
2617 	ulwp_unlock(ulwp, udp);
2618 	return (error);
2619 }
2620 
2621 /*
2622  * Establish a barrier against new mutators.  Any non-mutator trying
2623  * to become a mutator is suspended until the barrier is removed.
2624  */
2625 #pragma weak _thr_mutators_barrier = thr_mutators_barrier
2626 void
2627 thr_mutators_barrier(int enabled)
2628 {
2629 	int oldvalue;
2630 	int cancel_state;
2631 
2632 	lmutex_lock(&mutatorslock);
2633 
2634 	/*
2635 	 * Wait if trying to set the barrier while it is already set.
2636 	 */
2637 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2638 	while (mutatorsbarrier && enabled)
2639 		(void) cond_wait(&mutatorscv, &mutatorslock);
2640 	(void) pthread_setcancelstate(cancel_state, NULL);
2641 
2642 	oldvalue = mutatorsbarrier;
2643 	mutatorsbarrier = enabled;
2644 	/*
2645 	 * Wakeup any blocked non-mutators when barrier is removed.
2646 	 */
2647 	if (oldvalue && !enabled)
2648 		(void) cond_broadcast(&mutatorscv);
2649 	lmutex_unlock(&mutatorslock);
2650 }
2651 
2652 /*
2653  * Suspend the set of all mutators except for the caller.  The list
2654  * of actively running threads is searched and only the mutators
2655  * in this list are suspended.  Actively running non-mutators remain
2656  * running.  Any other thread is suspended.
2657  */
2658 #pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2659 int
2660 thr_suspend_allmutators(void)
2661 {
2662 	ulwp_t *self = curthread;
2663 	uberdata_t *udp = self->ul_uberdata;
2664 	ulwp_t *ulwp;
2665 	int link_dropped;
2666 
2667 	/*
2668 	 * We single-thread the entire thread suspend/continue mechanism.
2669 	 */
2670 	fork_lock_enter();
2671 
2672 top:
2673 	lmutex_lock(&udp->link_lock);
2674 
2675 	if (suspendingallmutators || suspendedallmutators) {
2676 		lmutex_unlock(&udp->link_lock);
2677 		fork_lock_exit();
2678 		return (EINVAL);
2679 	}
2680 	suspendingallmutators = 1;
2681 
2682 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2683 		ulwp_lock(ulwp, udp);
2684 		if (!ulwp->ul_mutator) {
2685 			ulwp_unlock(ulwp, udp);
2686 		} else if (ulwp->ul_stop) {	/* already stopped */
2687 			ulwp->ul_stop |= TSTP_MUTATOR;
2688 			ulwp_broadcast(ulwp);
2689 			ulwp_unlock(ulwp, udp);
2690 		} else {
2691 			/*
2692 			 * Move the stopped lwp out of a critical section.
2693 			 */
2694 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2695 			    link_dropped) {
2696 				suspendingallmutators = 0;
2697 				goto top;
2698 			}
2699 		}
2700 	}
2701 
2702 	suspendedallmutators = 1;
2703 	suspendingallmutators = 0;
2704 	lmutex_unlock(&udp->link_lock);
2705 	fork_lock_exit();
2706 	return (0);
2707 }
2708 
2709 /*
2710  * Suspend the target mutator.  The caller is permitted to suspend
2711  * itself.  If a mutator barrier is enabled, the caller will suspend
2712  * itself as though it had been suspended by thr_suspend_allmutators().
2713  * When the barrier is removed, this thread will be resumed.  Any
2714  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2715  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2716  */
2717 #pragma weak _thr_suspend_mutator = thr_suspend_mutator
2718 int
2719 thr_suspend_mutator(thread_t tid)
2720 {
2721 	if (tid == 0)
2722 		tid = curthread->ul_lwpid;
2723 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2724 }
2725 
2726 /*
2727  * Resume the set of all suspended mutators.
2728  */
2729 #pragma weak _thr_continue_allmutators = thr_continue_allmutators
2730 int
2731 thr_continue_allmutators()
2732 {
2733 	ulwp_t *self = curthread;
2734 	uberdata_t *udp = self->ul_uberdata;
2735 	ulwp_t *ulwp;
2736 
2737 	/*
2738 	 * We single-thread the entire thread suspend/continue mechanism.
2739 	 */
2740 	fork_lock_enter();
2741 
2742 	lmutex_lock(&udp->link_lock);
2743 	if (!suspendedallmutators) {
2744 		lmutex_unlock(&udp->link_lock);
2745 		fork_lock_exit();
2746 		return (EINVAL);
2747 	}
2748 	suspendedallmutators = 0;
2749 
2750 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2751 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2752 		lmutex_lock(mp);
2753 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2754 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2755 			ulwp_broadcast(ulwp);
2756 			if (!ulwp->ul_stop)
2757 				force_continue(ulwp);
2758 		}
2759 		lmutex_unlock(mp);
2760 	}
2761 
2762 	lmutex_unlock(&udp->link_lock);
2763 	fork_lock_exit();
2764 	return (0);
2765 }
2766 
2767 /*
2768  * Resume a suspended mutator.
2769  */
2770 #pragma weak _thr_continue_mutator = thr_continue_mutator
2771 int
2772 thr_continue_mutator(thread_t tid)
2773 {
2774 	return (_thrp_continue(tid, TSTP_MUTATOR));
2775 }
2776 
2777 #pragma weak _thr_wait_mutator = thr_wait_mutator
2778 int
2779 thr_wait_mutator(thread_t tid, int dontwait)
2780 {
2781 	uberdata_t *udp = curthread->ul_uberdata;
2782 	ulwp_t *ulwp;
2783 	int cancel_state;
2784 	int error = 0;
2785 
2786 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2787 top:
2788 	if ((ulwp = find_lwp(tid)) == NULL) {
2789 		(void) pthread_setcancelstate(cancel_state, NULL);
2790 		return (ESRCH);
2791 	}
2792 
2793 	if (!ulwp->ul_mutator)
2794 		error = EINVAL;
2795 	else if (dontwait) {
2796 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2797 			error = EWOULDBLOCK;
2798 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2799 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2800 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2801 
2802 		(void) cond_wait(cvp, mp);
2803 		(void) lmutex_unlock(mp);
2804 		goto top;
2805 	}
2806 
2807 	ulwp_unlock(ulwp, udp);
2808 	(void) pthread_setcancelstate(cancel_state, NULL);
2809 	return (error);
2810 }
2811 
2812 /* PROBE_SUPPORT begin */
2813 
2814 void
2815 thr_probe_setup(void *data)
2816 {
2817 	curthread->ul_tpdp = data;
2818 }
2819 
2820 static void *
2821 _thread_probe_getfunc()
2822 {
2823 	return (curthread->ul_tpdp);
2824 }
2825 
2826 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2827 
2828 /* ARGSUSED */
2829 void
2830 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2831 {
2832 	/* never called */
2833 }
2834 
2835 /* ARGSUSED */
2836 void
2837 _resume_ret(ulwp_t *oldlwp)
2838 {
2839 	/* never called */
2840 }
2841 
2842 /* PROBE_SUPPORT end */
2843