xref: /titanic_44/usr/src/lib/libc/port/threads/thr.c (revision 8b1606b5d15e5e037182f32e0306c43206772082)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include "lint.h"
27 #include "thr_uberdata.h"
28 #include <pthread.h>
29 #include <procfs.h>
30 #include <sys/uio.h>
31 #include <ctype.h>
32 #include "libc.h"
33 
34 /*
35  * These symbols should not be exported from libc, but
36  * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
37  * Also, some older versions of the Studio compiler/debugger
38  * components reference them.  These need to be fixed, too.
39  */
40 #pragma weak _thr_main = thr_main
41 #pragma weak _thr_create = thr_create
42 #pragma weak _thr_join = thr_join
43 #pragma weak _thr_self = thr_self
44 
45 #undef errno
46 extern int errno;
47 
48 /*
49  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
50  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
51  * system used it illegally (it is a consolidation private symbol).
52  * To accommodate this and possibly other abusers of the symbol,
53  * we make it always equal to 1 now that libthread has been folded
54  * into libc.  The new __libc_threaded symbol is used to indicate
55  * the new meaning, "more than one thread exists".
56  */
57 int __threaded = 1;		/* always equal to 1 */
58 int __libc_threaded = 0;	/* zero until first thr_create() */
59 
60 /*
61  * thr_concurrency and pthread_concurrency are not used by the library.
62  * They exist solely to hold and return the values set by calls to
63  * thr_setconcurrency() and pthread_setconcurrency().
64  * Because thr_concurrency is affected by the THR_NEW_LWP flag
65  * to thr_create(), thr_concurrency is protected by link_lock.
66  */
67 static	int	thr_concurrency = 1;
68 static	int	pthread_concurrency;
69 
70 #define	HASHTBLSZ	1024	/* must be a power of two */
71 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
72 
73 /* initial allocation, just enough for one lwp */
74 #pragma align 64(init_hash_table)
75 thr_hash_table_t init_hash_table[1] = {
76 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
77 };
78 
79 extern const Lc_interface rtld_funcs[];
80 
81 /*
82  * The weak version is known to libc_db and mdb.
83  */
84 #pragma weak _uberdata = __uberdata
85 uberdata_t __uberdata = {
86 	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
87 	{ RECURSIVEMUTEX, NULL, 0 },	/* ld_lock */
88 	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
89 	{ RECURSIVEMUTEX, NULL, 0 },	/* atfork_lock */
90 	{ RECURSIVEMUTEX, NULL, 0 },	/* callout_lock */
91 	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
92 	{ 0, },				/* tdb_hash_lock_stats */
93 	{ { 0 }, },			/* siguaction[NSIG] */
94 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
95 	{ DEFAULTMUTEX, NULL, 0 },
96 	{ DEFAULTMUTEX, NULL, 0 },
97 	{ DEFAULTMUTEX, NULL, 0 },
98 	{ DEFAULTMUTEX, NULL, 0 },
99 	{ DEFAULTMUTEX, NULL, 0 },
100 	{ DEFAULTMUTEX, NULL, 0 },
101 	{ DEFAULTMUTEX, NULL, 0 },
102 	{ DEFAULTMUTEX, NULL, 0 },
103 	{ DEFAULTMUTEX, NULL, 0 }},
104 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
105 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
106 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
107 	0,			/* primary_map */
108 	0,			/* bucket_init */
109 	0,			/* pad[0] */
110 	0,			/* pad[1] */
111 	{ 0 },			/* uberflags */
112 	NULL,			/* queue_head */
113 	init_hash_table,	/* thr_hash_table */
114 	1,			/* hash_size: size of the hash table */
115 	0,			/* hash_mask: hash_size - 1 */
116 	NULL,			/* ulwp_one */
117 	NULL,			/* all_lwps */
118 	NULL,			/* all_zombies */
119 	0,			/* nthreads */
120 	0,			/* nzombies */
121 	0,			/* ndaemons */
122 	0,			/* pid */
123 	sigacthandler,		/* sigacthandler */
124 	NULL,			/* lwp_stacks */
125 	NULL,			/* lwp_laststack */
126 	0,			/* nfreestack */
127 	10,			/* thread_stack_cache */
128 	NULL,			/* ulwp_freelist */
129 	NULL,			/* ulwp_lastfree */
130 	NULL,			/* ulwp_replace_free */
131 	NULL,			/* ulwp_replace_last */
132 	NULL,			/* atforklist */
133 	NULL,			/* robustlocks */
134 	NULL,			/* robustlist */
135 	NULL,			/* progname */
136 	NULL,			/* __tdb_bootstrap */
137 	{			/* tdb */
138 		NULL,		/* tdb_sync_addr_hash */
139 		0,		/* tdb_register_count */
140 		0,		/* tdb_hash_alloc_failed */
141 		NULL,		/* tdb_sync_addr_free */
142 		NULL,		/* tdb_sync_addr_last */
143 		0,		/* tdb_sync_alloc */
144 		{ 0, 0 },	/* tdb_ev_global_mask */
145 		tdb_events,	/* tdb_events array */
146 	},
147 };
148 
149 /*
150  * The weak version is known to libc_db and mdb.
151  */
152 #pragma weak _tdb_bootstrap = __tdb_bootstrap
153 uberdata_t **__tdb_bootstrap = NULL;
154 
155 int	thread_queue_fifo = 4;
156 int	thread_queue_dump = 0;
157 int	thread_cond_wait_defer = 0;
158 int	thread_error_detection = 0;
159 int	thread_async_safe = 0;
160 int	thread_stack_cache = 10;
161 int	thread_door_noreserve = 0;
162 int	thread_locks_misaligned = 0;
163 
164 static	ulwp_t	*ulwp_alloc(void);
165 static	void	ulwp_free(ulwp_t *);
166 
167 /*
168  * Insert the lwp into the hash table.
169  */
170 void
171 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
172 {
173 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
174 	udp->thr_hash_table[ix].hash_bucket = ulwp;
175 	ulwp->ul_ix = ix;
176 }
177 
178 void
179 hash_in(ulwp_t *ulwp, uberdata_t *udp)
180 {
181 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
182 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
183 
184 	lmutex_lock(mp);
185 	hash_in_unlocked(ulwp, ix, udp);
186 	lmutex_unlock(mp);
187 }
188 
189 /*
190  * Delete the lwp from the hash table.
191  */
192 void
193 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
194 {
195 	ulwp_t **ulwpp;
196 
197 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
198 	    ulwp != *ulwpp;
199 	    ulwpp = &(*ulwpp)->ul_hash)
200 		;
201 	*ulwpp = ulwp->ul_hash;
202 	ulwp->ul_hash = NULL;
203 	ulwp->ul_ix = -1;
204 }
205 
206 void
207 hash_out(ulwp_t *ulwp, uberdata_t *udp)
208 {
209 	int ix;
210 
211 	if ((ix = ulwp->ul_ix) >= 0) {
212 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
213 
214 		lmutex_lock(mp);
215 		hash_out_unlocked(ulwp, ix, udp);
216 		lmutex_unlock(mp);
217 	}
218 }
219 
220 /*
221  * Retain stack information for thread structures that are being recycled for
222  * new threads.  All other members of the thread structure should be zeroed.
223  */
224 static void
225 ulwp_clean(ulwp_t *ulwp)
226 {
227 	caddr_t stk = ulwp->ul_stk;
228 	size_t mapsiz = ulwp->ul_mapsiz;
229 	size_t guardsize = ulwp->ul_guardsize;
230 	uintptr_t stktop = ulwp->ul_stktop;
231 	size_t stksiz = ulwp->ul_stksiz;
232 
233 	(void) memset(ulwp, 0, sizeof (*ulwp));
234 
235 	ulwp->ul_stk = stk;
236 	ulwp->ul_mapsiz = mapsiz;
237 	ulwp->ul_guardsize = guardsize;
238 	ulwp->ul_stktop = stktop;
239 	ulwp->ul_stksiz = stksiz;
240 }
241 
242 static int stackprot;
243 
244 /*
245  * Answer the question, "Is the lwp in question really dead?"
246  * We must inquire of the operating system to be really sure
247  * because the lwp may have called lwp_exit() but it has not
248  * yet completed the exit.
249  */
250 static int
251 dead_and_buried(ulwp_t *ulwp)
252 {
253 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
254 		return (1);
255 	if (ulwp->ul_dead && ulwp->ul_detached &&
256 	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
257 		ulwp->ul_lwpid = (lwpid_t)(-1);
258 		return (1);
259 	}
260 	return (0);
261 }
262 
263 /*
264  * Attempt to keep the stack cache within the specified cache limit.
265  */
266 static void
267 trim_stack_cache(int cache_limit)
268 {
269 	ulwp_t *self = curthread;
270 	uberdata_t *udp = self->ul_uberdata;
271 	ulwp_t *prev = NULL;
272 	ulwp_t **ulwpp = &udp->lwp_stacks;
273 	ulwp_t *ulwp;
274 
275 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
276 
277 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
278 		if (dead_and_buried(ulwp)) {
279 			*ulwpp = ulwp->ul_next;
280 			if (ulwp == udp->lwp_laststack)
281 				udp->lwp_laststack = prev;
282 			hash_out(ulwp, udp);
283 			udp->nfreestack--;
284 			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
285 			/*
286 			 * Now put the free ulwp on the ulwp freelist.
287 			 */
288 			ulwp->ul_mapsiz = 0;
289 			ulwp->ul_next = NULL;
290 			if (udp->ulwp_freelist == NULL)
291 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
292 			else {
293 				udp->ulwp_lastfree->ul_next = ulwp;
294 				udp->ulwp_lastfree = ulwp;
295 			}
296 		} else {
297 			prev = ulwp;
298 			ulwpp = &ulwp->ul_next;
299 		}
300 	}
301 }
302 
303 /*
304  * Find an unused stack of the requested size
305  * or create a new stack of the requested size.
306  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
307  * thr_exit() stores 1 in the ul_dead member.
308  * thr_join() stores -1 in the ul_lwpid member.
309  */
310 static ulwp_t *
311 find_stack(size_t stksize, size_t guardsize)
312 {
313 	static size_t pagesize = 0;
314 
315 	uberdata_t *udp = curthread->ul_uberdata;
316 	size_t mapsize;
317 	ulwp_t *prev;
318 	ulwp_t *ulwp;
319 	ulwp_t **ulwpp;
320 	void *stk;
321 
322 	/*
323 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
324 	 * unless overridden by the system's configuration.
325 	 */
326 	if (stackprot == 0) {	/* do this once */
327 		long lprot = _sysconf(_SC_STACK_PROT);
328 		if (lprot <= 0)
329 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
330 		stackprot = (int)lprot;
331 	}
332 	if (pagesize == 0)	/* do this once */
333 		pagesize = _sysconf(_SC_PAGESIZE);
334 
335 	/*
336 	 * One megabyte stacks by default, but subtract off
337 	 * two pages for the system-created red zones.
338 	 * Round up a non-zero stack size to a pagesize multiple.
339 	 */
340 	if (stksize == 0)
341 		stksize = DEFAULTSTACK - 2 * pagesize;
342 	else
343 		stksize = ((stksize + pagesize - 1) & -pagesize);
344 
345 	/*
346 	 * Round up the mapping size to a multiple of pagesize.
347 	 * Note: mmap() provides at least one page of red zone
348 	 * so we deduct that from the value of guardsize.
349 	 */
350 	if (guardsize != 0)
351 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
352 	mapsize = stksize + guardsize;
353 
354 	lmutex_lock(&udp->link_lock);
355 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
356 	    (ulwp = *ulwpp) != NULL;
357 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
358 		if (ulwp->ul_mapsiz == mapsize &&
359 		    ulwp->ul_guardsize == guardsize &&
360 		    dead_and_buried(ulwp)) {
361 			/*
362 			 * The previous lwp is gone; reuse the stack.
363 			 * Remove the ulwp from the stack list.
364 			 */
365 			*ulwpp = ulwp->ul_next;
366 			ulwp->ul_next = NULL;
367 			if (ulwp == udp->lwp_laststack)
368 				udp->lwp_laststack = prev;
369 			hash_out(ulwp, udp);
370 			udp->nfreestack--;
371 			lmutex_unlock(&udp->link_lock);
372 			ulwp_clean(ulwp);
373 			return (ulwp);
374 		}
375 	}
376 
377 	/*
378 	 * None of the cached stacks matched our mapping size.
379 	 * Reduce the stack cache to get rid of possibly
380 	 * very old stacks that will never be reused.
381 	 */
382 	if (udp->nfreestack > udp->thread_stack_cache)
383 		trim_stack_cache(udp->thread_stack_cache);
384 	else if (udp->nfreestack > 0)
385 		trim_stack_cache(udp->nfreestack - 1);
386 	lmutex_unlock(&udp->link_lock);
387 
388 	/*
389 	 * Create a new stack.
390 	 */
391 	if ((stk = mmap(NULL, mapsize, stackprot,
392 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
393 		/*
394 		 * We have allocated our stack.  Now allocate the ulwp.
395 		 */
396 		ulwp = ulwp_alloc();
397 		if (ulwp == NULL)
398 			(void) munmap(stk, mapsize);
399 		else {
400 			ulwp->ul_stk = stk;
401 			ulwp->ul_mapsiz = mapsize;
402 			ulwp->ul_guardsize = guardsize;
403 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
404 			ulwp->ul_stksiz = stksize;
405 			if (guardsize)	/* protect the extra red zone */
406 				(void) mprotect(stk, guardsize, PROT_NONE);
407 		}
408 	}
409 	return (ulwp);
410 }
411 
412 /*
413  * Get a ulwp_t structure from the free list or allocate a new one.
414  * Such ulwp_t's do not have a stack allocated by the library.
415  */
416 static ulwp_t *
417 ulwp_alloc(void)
418 {
419 	ulwp_t *self = curthread;
420 	uberdata_t *udp = self->ul_uberdata;
421 	size_t tls_size;
422 	ulwp_t *prev;
423 	ulwp_t *ulwp;
424 	ulwp_t **ulwpp;
425 	caddr_t data;
426 
427 	lmutex_lock(&udp->link_lock);
428 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
429 	    (ulwp = *ulwpp) != NULL;
430 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
431 		if (dead_and_buried(ulwp)) {
432 			*ulwpp = ulwp->ul_next;
433 			ulwp->ul_next = NULL;
434 			if (ulwp == udp->ulwp_lastfree)
435 				udp->ulwp_lastfree = prev;
436 			hash_out(ulwp, udp);
437 			lmutex_unlock(&udp->link_lock);
438 			ulwp_clean(ulwp);
439 			return (ulwp);
440 		}
441 	}
442 	lmutex_unlock(&udp->link_lock);
443 
444 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
445 	data = lmalloc(sizeof (*ulwp) + tls_size);
446 	if (data != NULL) {
447 		/* LINTED pointer cast may result in improper alignment */
448 		ulwp = (ulwp_t *)(data + tls_size);
449 	}
450 	return (ulwp);
451 }
452 
453 /*
454  * Free a ulwp structure.
455  * If there is an associated stack, put it on the stack list and
456  * munmap() previously freed stacks up to the residual cache limit.
457  * Else put it on the ulwp free list and never call lfree() on it.
458  */
459 static void
460 ulwp_free(ulwp_t *ulwp)
461 {
462 	uberdata_t *udp = curthread->ul_uberdata;
463 
464 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
465 	ulwp->ul_next = NULL;
466 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
467 		/*EMPTY*/;
468 	else if (ulwp->ul_mapsiz != 0) {
469 		if (udp->lwp_stacks == NULL)
470 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
471 		else {
472 			udp->lwp_laststack->ul_next = ulwp;
473 			udp->lwp_laststack = ulwp;
474 		}
475 		if (++udp->nfreestack > udp->thread_stack_cache)
476 			trim_stack_cache(udp->thread_stack_cache);
477 	} else {
478 		if (udp->ulwp_freelist == NULL)
479 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
480 		else {
481 			udp->ulwp_lastfree->ul_next = ulwp;
482 			udp->ulwp_lastfree = ulwp;
483 		}
484 	}
485 }
486 
487 /*
488  * Find a named lwp and return a pointer to its hash list location.
489  * On success, returns with the hash lock held.
490  */
491 ulwp_t **
492 find_lwpp(thread_t tid)
493 {
494 	uberdata_t *udp = curthread->ul_uberdata;
495 	int ix = TIDHASH(tid, udp);
496 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
497 	ulwp_t *ulwp;
498 	ulwp_t **ulwpp;
499 
500 	if (tid == 0)
501 		return (NULL);
502 
503 	lmutex_lock(mp);
504 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
505 	    (ulwp = *ulwpp) != NULL;
506 	    ulwpp = &ulwp->ul_hash) {
507 		if (ulwp->ul_lwpid == tid)
508 			return (ulwpp);
509 	}
510 	lmutex_unlock(mp);
511 	return (NULL);
512 }
513 
514 /*
515  * Wake up all lwps waiting on this lwp for some reason.
516  */
517 void
518 ulwp_broadcast(ulwp_t *ulwp)
519 {
520 	ulwp_t *self = curthread;
521 	uberdata_t *udp = self->ul_uberdata;
522 
523 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
524 	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
525 }
526 
527 /*
528  * Find a named lwp and return a pointer to it.
529  * Returns with the hash lock held.
530  */
531 ulwp_t *
532 find_lwp(thread_t tid)
533 {
534 	ulwp_t *self = curthread;
535 	uberdata_t *udp = self->ul_uberdata;
536 	ulwp_t *ulwp = NULL;
537 	ulwp_t **ulwpp;
538 
539 	if (self->ul_lwpid == tid) {
540 		ulwp = self;
541 		ulwp_lock(ulwp, udp);
542 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
543 		ulwp = *ulwpp;
544 	}
545 
546 	if (ulwp && ulwp->ul_dead) {
547 		ulwp_unlock(ulwp, udp);
548 		ulwp = NULL;
549 	}
550 
551 	return (ulwp);
552 }
553 
554 int
555 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
556 	long flags, thread_t *new_thread, size_t guardsize)
557 {
558 	ulwp_t *self = curthread;
559 	uberdata_t *udp = self->ul_uberdata;
560 	ucontext_t uc;
561 	uint_t lwp_flags;
562 	thread_t tid;
563 	int error;
564 	ulwp_t *ulwp;
565 
566 	/*
567 	 * Enforce the restriction of not creating any threads
568 	 * until the primary link map has been initialized.
569 	 * Also, disallow thread creation to a child of vfork().
570 	 */
571 	if (!self->ul_primarymap || self->ul_vfork)
572 		return (ENOTSUP);
573 
574 	if (udp->hash_size == 1)
575 		finish_init();
576 
577 	if ((stk || stksize) && stksize < MINSTACK)
578 		return (EINVAL);
579 
580 	if (stk == NULL) {
581 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
582 			return (ENOMEM);
583 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
584 	} else {
585 		/* initialize the private stack */
586 		if ((ulwp = ulwp_alloc()) == NULL)
587 			return (ENOMEM);
588 		ulwp->ul_stk = stk;
589 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
590 		ulwp->ul_stksiz = stksize;
591 	}
592 	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
593 	ulwp->ul_ix = -1;
594 	ulwp->ul_errnop = &ulwp->ul_errno;
595 
596 	lwp_flags = LWP_SUSPENDED;
597 	if (flags & (THR_DETACHED|THR_DAEMON)) {
598 		flags |= THR_DETACHED;
599 		lwp_flags |= LWP_DETACHED;
600 	}
601 	if (flags & THR_DAEMON)
602 		lwp_flags |= LWP_DAEMON;
603 
604 	/* creating a thread: enforce mt-correctness in mutex_lock() */
605 	self->ul_async_safe = 1;
606 
607 	/* per-thread copies of global variables, for speed */
608 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
609 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
610 	ulwp->ul_error_detection = self->ul_error_detection;
611 	ulwp->ul_async_safe = self->ul_async_safe;
612 	ulwp->ul_max_spinners = self->ul_max_spinners;
613 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
614 	ulwp->ul_queue_spin = self->ul_queue_spin;
615 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
616 	ulwp->ul_misaligned = self->ul_misaligned;
617 
618 	/* new thread inherits creating thread's scheduling parameters */
619 	ulwp->ul_policy = self->ul_policy;
620 	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
621 	ulwp->ul_cid = self->ul_cid;
622 	ulwp->ul_rtclassid = self->ul_rtclassid;
623 
624 	ulwp->ul_primarymap = self->ul_primarymap;
625 	ulwp->ul_self = ulwp;
626 	ulwp->ul_uberdata = udp;
627 
628 	/* debugger support */
629 	ulwp->ul_usropts = flags;
630 
631 #ifdef __sparc
632 	/*
633 	 * We cache several instructions in the thread structure for use
634 	 * by the fasttrap DTrace provider. When changing this, read the
635 	 * comment in fasttrap.h for the all the other places that must
636 	 * be changed.
637 	 */
638 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
639 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
640 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
641 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
642 #endif
643 
644 	ulwp->ul_startpc = func;
645 	ulwp->ul_startarg = arg;
646 	_fpinherit(ulwp);
647 	/*
648 	 * Defer signals on the new thread until its TLS constructors
649 	 * have been called.  _thrp_setup() will call sigon() after
650 	 * it has called tls_setup().
651 	 */
652 	ulwp->ul_sigdefer = 1;
653 
654 	error = setup_context(&uc, _thrp_setup, ulwp,
655 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
656 	if (error != 0 && stk != NULL)	/* inaccessible stack */
657 		error = EFAULT;
658 
659 	/*
660 	 * Call enter_critical() to avoid being suspended until we
661 	 * have linked the new thread into the proper lists.
662 	 * This is necessary because forkall() and fork1() must
663 	 * suspend all threads and they must see a complete list.
664 	 */
665 	enter_critical(self);
666 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
667 	if (error != 0 ||
668 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
669 		exit_critical(self);
670 		ulwp->ul_lwpid = (lwpid_t)(-1);
671 		ulwp->ul_dead = 1;
672 		ulwp->ul_detached = 1;
673 		lmutex_lock(&udp->link_lock);
674 		ulwp_free(ulwp);
675 		lmutex_unlock(&udp->link_lock);
676 		return (error);
677 	}
678 	self->ul_nocancel = 0;	/* cancellation is now possible */
679 	udp->uberflags.uf_mt = 1;
680 	if (new_thread)
681 		*new_thread = tid;
682 	if (flags & THR_DETACHED)
683 		ulwp->ul_detached = 1;
684 	ulwp->ul_lwpid = tid;
685 	ulwp->ul_stop = TSTP_REGULAR;
686 	if (flags & THR_SUSPENDED)
687 		ulwp->ul_created = 1;
688 
689 	lmutex_lock(&udp->link_lock);
690 	ulwp->ul_forw = udp->all_lwps;
691 	ulwp->ul_back = udp->all_lwps->ul_back;
692 	ulwp->ul_back->ul_forw = ulwp;
693 	ulwp->ul_forw->ul_back = ulwp;
694 	hash_in(ulwp, udp);
695 	udp->nthreads++;
696 	if (flags & THR_DAEMON)
697 		udp->ndaemons++;
698 	if (flags & THR_NEW_LWP)
699 		thr_concurrency++;
700 	__libc_threaded = 1;		/* inform stdio */
701 	lmutex_unlock(&udp->link_lock);
702 
703 	if (__td_event_report(self, TD_CREATE, udp)) {
704 		self->ul_td_evbuf.eventnum = TD_CREATE;
705 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
706 		tdb_event(TD_CREATE, udp);
707 	}
708 
709 	exit_critical(self);
710 
711 	if (!(flags & THR_SUSPENDED))
712 		(void) _thrp_continue(tid, TSTP_REGULAR);
713 
714 	return (0);
715 }
716 
717 int
718 thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
719 	long flags, thread_t *new_thread)
720 {
721 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
722 }
723 
724 /*
725  * A special cancellation cleanup hook for DCE.
726  * cleanuphndlr, when it is not NULL, will contain a callback
727  * function to be called before a thread is terminated in
728  * thr_exit() as a result of being cancelled.
729  */
730 static void (*cleanuphndlr)(void) = NULL;
731 
732 /*
733  * _pthread_setcleanupinit: sets the cleanup hook.
734  */
735 int
736 _pthread_setcleanupinit(void (*func)(void))
737 {
738 	cleanuphndlr = func;
739 	return (0);
740 }
741 
742 void
743 _thrp_exit()
744 {
745 	ulwp_t *self = curthread;
746 	uberdata_t *udp = self->ul_uberdata;
747 	ulwp_t *replace = NULL;
748 
749 	if (__td_event_report(self, TD_DEATH, udp)) {
750 		self->ul_td_evbuf.eventnum = TD_DEATH;
751 		tdb_event(TD_DEATH, udp);
752 	}
753 
754 	ASSERT(self->ul_sigdefer != 0);
755 
756 	lmutex_lock(&udp->link_lock);
757 	udp->nthreads--;
758 	if (self->ul_usropts & THR_NEW_LWP)
759 		thr_concurrency--;
760 	if (self->ul_usropts & THR_DAEMON)
761 		udp->ndaemons--;
762 	else if (udp->nthreads == udp->ndaemons) {
763 		/*
764 		 * We are the last non-daemon thread exiting.
765 		 * Exit the process.  We retain our TSD and TLS so
766 		 * that atexit() application functions can use them.
767 		 */
768 		lmutex_unlock(&udp->link_lock);
769 		exit(0);
770 		thr_panic("_thrp_exit(): exit(0) returned");
771 	}
772 	lmutex_unlock(&udp->link_lock);
773 
774 	tsd_exit();		/* deallocate thread-specific data */
775 	tls_exit();		/* deallocate thread-local storage */
776 	heldlock_exit();	/* deal with left-over held locks */
777 
778 	/* block all signals to finish exiting */
779 	block_all_signals(self);
780 	/* also prevent ourself from being suspended */
781 	enter_critical(self);
782 	rwl_free(self);
783 	lmutex_lock(&udp->link_lock);
784 	ulwp_free(self);
785 	(void) ulwp_lock(self, udp);
786 
787 	if (self->ul_mapsiz && !self->ul_detached) {
788 		/*
789 		 * We want to free the stack for reuse but must keep
790 		 * the ulwp_t struct for the benefit of thr_join().
791 		 * For this purpose we allocate a replacement ulwp_t.
792 		 */
793 		if ((replace = udp->ulwp_replace_free) == NULL)
794 			replace = lmalloc(REPLACEMENT_SIZE);
795 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
796 			udp->ulwp_replace_last = NULL;
797 	}
798 
799 	if (udp->all_lwps == self)
800 		udp->all_lwps = self->ul_forw;
801 	if (udp->all_lwps == self)
802 		udp->all_lwps = NULL;
803 	else {
804 		self->ul_forw->ul_back = self->ul_back;
805 		self->ul_back->ul_forw = self->ul_forw;
806 	}
807 	self->ul_forw = self->ul_back = NULL;
808 #if defined(THREAD_DEBUG)
809 	/* collect queue lock statistics before marking ourself dead */
810 	record_spin_locks(self);
811 #endif
812 	self->ul_dead = 1;
813 	self->ul_pleasestop = 0;
814 	if (replace != NULL) {
815 		int ix = self->ul_ix;		/* the hash index */
816 		(void) memcpy(replace, self, REPLACEMENT_SIZE);
817 		replace->ul_self = replace;
818 		replace->ul_next = NULL;	/* clone not on stack list */
819 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
820 		replace->ul_replace = 1;	/* requires clone to be freed */
821 		hash_out_unlocked(self, ix, udp);
822 		hash_in_unlocked(replace, ix, udp);
823 		ASSERT(!(self->ul_detached));
824 		self->ul_detached = 1;		/* this frees the stack */
825 		self->ul_schedctl = NULL;
826 		self->ul_schedctl_called = &udp->uberflags;
827 		set_curthread(self = replace);
828 		/*
829 		 * Having just changed the address of curthread, we
830 		 * must reset the ownership of the locks we hold so
831 		 * that assertions will not fire when we release them.
832 		 */
833 		udp->link_lock.mutex_owner = (uintptr_t)self;
834 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
835 		/*
836 		 * NOTE:
837 		 * On i386, %gs still references the original, not the
838 		 * replacement, ulwp structure.  Fetching the replacement
839 		 * curthread pointer via %gs:0 works correctly since the
840 		 * original ulwp structure will not be reallocated until
841 		 * this lwp has completed its lwp_exit() system call (see
842 		 * dead_and_buried()), but from here on out, we must make
843 		 * no references to %gs:<offset> other than %gs:0.
844 		 */
845 	}
846 	/*
847 	 * Put non-detached terminated threads in the all_zombies list.
848 	 */
849 	if (!self->ul_detached) {
850 		udp->nzombies++;
851 		if (udp->all_zombies == NULL) {
852 			ASSERT(udp->nzombies == 1);
853 			udp->all_zombies = self->ul_forw = self->ul_back = self;
854 		} else {
855 			self->ul_forw = udp->all_zombies;
856 			self->ul_back = udp->all_zombies->ul_back;
857 			self->ul_back->ul_forw = self;
858 			self->ul_forw->ul_back = self;
859 		}
860 	}
861 	/*
862 	 * Notify everyone waiting for this thread.
863 	 */
864 	ulwp_broadcast(self);
865 	(void) ulwp_unlock(self, udp);
866 	/*
867 	 * Prevent any more references to the schedctl data.
868 	 * We are exiting and continue_fork() may not find us.
869 	 * Do this just before dropping link_lock, since fork
870 	 * serializes on link_lock.
871 	 */
872 	self->ul_schedctl = NULL;
873 	self->ul_schedctl_called = &udp->uberflags;
874 	lmutex_unlock(&udp->link_lock);
875 
876 	ASSERT(self->ul_critical == 1);
877 	ASSERT(self->ul_preempt == 0);
878 	_lwp_terminate();	/* never returns */
879 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
880 }
881 
882 #if defined(THREAD_DEBUG)
883 void
884 collect_queue_statistics()
885 {
886 	uberdata_t *udp = curthread->ul_uberdata;
887 	ulwp_t *ulwp;
888 
889 	if (thread_queue_dump) {
890 		lmutex_lock(&udp->link_lock);
891 		if ((ulwp = udp->all_lwps) != NULL) {
892 			do {
893 				record_spin_locks(ulwp);
894 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
895 		}
896 		lmutex_unlock(&udp->link_lock);
897 	}
898 }
899 #endif
900 
901 static void __NORETURN
902 _thrp_exit_common(void *status, int unwind)
903 {
904 	ulwp_t *self = curthread;
905 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
906 
907 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
908 
909 	/*
910 	 * Disable cancellation and call the special DCE cancellation
911 	 * cleanup hook if it is enabled.  Do nothing else before calling
912 	 * the DCE cancellation cleanup hook; it may call longjmp() and
913 	 * never return here.
914 	 */
915 	self->ul_cancel_disabled = 1;
916 	self->ul_cancel_async = 0;
917 	self->ul_save_async = 0;
918 	self->ul_cancelable = 0;
919 	self->ul_cancel_pending = 0;
920 	set_cancel_pending_flag(self, 1);
921 	if (cancelled && cleanuphndlr != NULL)
922 		(*cleanuphndlr)();
923 
924 	/*
925 	 * Block application signals while we are exiting.
926 	 * We call out to C++, TSD, and TLS destructors while exiting
927 	 * and these are application-defined, so we cannot be assured
928 	 * that they won't reset the signal mask.  We use sigoff() to
929 	 * defer any signals that may be received as a result of this
930 	 * bad behavior.  Such signals will be lost to the process
931 	 * when the thread finishes exiting.
932 	 */
933 	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
934 	sigoff(self);
935 
936 	self->ul_rval = status;
937 
938 	/*
939 	 * If thr_exit is being called from the places where
940 	 * C++ destructors are to be called such as cancellation
941 	 * points, then set this flag. It is checked in _t_cancel()
942 	 * to decide whether _ex_unwind() is to be called or not.
943 	 */
944 	if (unwind)
945 		self->ul_unwind = 1;
946 
947 	/*
948 	 * _thrp_unwind() will eventually call _thrp_exit().
949 	 * It never returns.
950 	 */
951 	_thrp_unwind(NULL);
952 	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
953 
954 	for (;;)	/* to shut the compiler up about __NORETURN */
955 		continue;
956 }
957 
958 /*
959  * Called when a thread returns from its start function.
960  * We are at the top of the stack; no unwinding is necessary.
961  */
962 void
963 _thrp_terminate(void *status)
964 {
965 	_thrp_exit_common(status, 0);
966 }
967 
968 #pragma weak pthread_exit = thr_exit
969 #pragma weak _thr_exit = thr_exit
970 void
971 thr_exit(void *status)
972 {
973 	_thrp_exit_common(status, 1);
974 }
975 
976 int
977 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
978 {
979 	uberdata_t *udp = curthread->ul_uberdata;
980 	mutex_t *mp;
981 	void *rval;
982 	thread_t found;
983 	ulwp_t *ulwp;
984 	ulwp_t **ulwpp;
985 	int replace;
986 	int error;
987 
988 	if (do_cancel)
989 		error = lwp_wait(tid, &found);
990 	else {
991 		while ((error = __lwp_wait(tid, &found)) == EINTR)
992 			;
993 	}
994 	if (error)
995 		return (error);
996 
997 	/*
998 	 * We must hold link_lock to avoid a race condition with find_stack().
999 	 */
1000 	lmutex_lock(&udp->link_lock);
1001 	if ((ulwpp = find_lwpp(found)) == NULL) {
1002 		/*
1003 		 * lwp_wait() found an lwp that the library doesn't know
1004 		 * about.  It must have been created with _lwp_create().
1005 		 * Just return its lwpid; we can't know its status.
1006 		 */
1007 		lmutex_unlock(&udp->link_lock);
1008 		rval = NULL;
1009 	} else {
1010 		/*
1011 		 * Remove ulwp from the hash table.
1012 		 */
1013 		ulwp = *ulwpp;
1014 		*ulwpp = ulwp->ul_hash;
1015 		ulwp->ul_hash = NULL;
1016 		/*
1017 		 * Remove ulwp from all_zombies list.
1018 		 */
1019 		ASSERT(udp->nzombies >= 1);
1020 		if (udp->all_zombies == ulwp)
1021 			udp->all_zombies = ulwp->ul_forw;
1022 		if (udp->all_zombies == ulwp)
1023 			udp->all_zombies = NULL;
1024 		else {
1025 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1026 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1027 		}
1028 		ulwp->ul_forw = ulwp->ul_back = NULL;
1029 		udp->nzombies--;
1030 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1031 		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1032 		/*
1033 		 * We can't call ulwp_unlock(ulwp) after we set
1034 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1035 		 * ulwp's hash table mutex now in order to unlock it below.
1036 		 */
1037 		mp = ulwp_mutex(ulwp, udp);
1038 		ulwp->ul_lwpid = (lwpid_t)(-1);
1039 		ulwp->ul_ix = -1;
1040 		rval = ulwp->ul_rval;
1041 		replace = ulwp->ul_replace;
1042 		lmutex_unlock(mp);
1043 		if (replace) {
1044 			ulwp->ul_next = NULL;
1045 			if (udp->ulwp_replace_free == NULL)
1046 				udp->ulwp_replace_free =
1047 				    udp->ulwp_replace_last = ulwp;
1048 			else {
1049 				udp->ulwp_replace_last->ul_next = ulwp;
1050 				udp->ulwp_replace_last = ulwp;
1051 			}
1052 		}
1053 		lmutex_unlock(&udp->link_lock);
1054 	}
1055 
1056 	if (departed != NULL)
1057 		*departed = found;
1058 	if (status != NULL)
1059 		*status = rval;
1060 	return (0);
1061 }
1062 
1063 int
1064 thr_join(thread_t tid, thread_t *departed, void **status)
1065 {
1066 	int error = _thrp_join(tid, departed, status, 1);
1067 	return ((error == EINVAL)? ESRCH : error);
1068 }
1069 
1070 /*
1071  * pthread_join() differs from Solaris thr_join():
1072  * It does not return the departed thread's id
1073  * and hence does not have a "departed" argument.
1074  * It returns EINVAL if tid refers to a detached thread.
1075  */
1076 #pragma weak _pthread_join = pthread_join
1077 int
1078 pthread_join(pthread_t tid, void **status)
1079 {
1080 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1081 }
1082 
1083 int
1084 pthread_detach(pthread_t tid)
1085 {
1086 	uberdata_t *udp = curthread->ul_uberdata;
1087 	ulwp_t *ulwp;
1088 	ulwp_t **ulwpp;
1089 	int error = 0;
1090 
1091 	if ((ulwpp = find_lwpp(tid)) == NULL)
1092 		return (ESRCH);
1093 	ulwp = *ulwpp;
1094 
1095 	if (ulwp->ul_dead) {
1096 		ulwp_unlock(ulwp, udp);
1097 		error = _thrp_join(tid, NULL, NULL, 0);
1098 	} else {
1099 		error = __lwp_detach(tid);
1100 		ulwp->ul_detached = 1;
1101 		ulwp->ul_usropts |= THR_DETACHED;
1102 		ulwp_unlock(ulwp, udp);
1103 	}
1104 	return (error);
1105 }
1106 
1107 static const char *
1108 ematch(const char *ev, const char *match)
1109 {
1110 	int c;
1111 
1112 	while ((c = *match++) != '\0') {
1113 		if (*ev++ != c)
1114 			return (NULL);
1115 	}
1116 	if (*ev++ != '=')
1117 		return (NULL);
1118 	return (ev);
1119 }
1120 
1121 static int
1122 envvar(const char *ev, const char *match, int limit)
1123 {
1124 	int val = -1;
1125 	const char *ename;
1126 
1127 	if ((ename = ematch(ev, match)) != NULL) {
1128 		int c;
1129 		for (val = 0; (c = *ename) != '\0'; ename++) {
1130 			if (!isdigit(c)) {
1131 				val = -1;
1132 				break;
1133 			}
1134 			val = val * 10 + (c - '0');
1135 			if (val > limit) {
1136 				val = limit;
1137 				break;
1138 			}
1139 		}
1140 	}
1141 	return (val);
1142 }
1143 
1144 static void
1145 etest(const char *ev)
1146 {
1147 	int value;
1148 
1149 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1150 		thread_queue_spin = value;
1151 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1152 		thread_adaptive_spin = value;
1153 	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1154 		thread_max_spinners = value;
1155 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1156 		thread_queue_fifo = value;
1157 #if defined(THREAD_DEBUG)
1158 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1159 		thread_queue_verify = value;
1160 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1161 		thread_queue_dump = value;
1162 #endif
1163 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1164 		thread_stack_cache = value;
1165 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1166 		thread_cond_wait_defer = value;
1167 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1168 		thread_error_detection = value;
1169 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1170 		thread_async_safe = value;
1171 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1172 		thread_door_noreserve = value;
1173 	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1174 		thread_locks_misaligned = value;
1175 }
1176 
1177 /*
1178  * Look for and evaluate environment variables of the form "_THREAD_*".
1179  * For compatibility with the past, we also look for environment
1180  * names of the form "LIBTHREAD_*".
1181  */
1182 static void
1183 set_thread_vars()
1184 {
1185 	extern const char **_environ;
1186 	const char **pev;
1187 	const char *ev;
1188 	char c;
1189 
1190 	if ((pev = _environ) == NULL)
1191 		return;
1192 	while ((ev = *pev++) != NULL) {
1193 		c = *ev;
1194 		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1195 			etest(ev + 8);
1196 		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1197 			etest(ev + 10);
1198 	}
1199 }
1200 
1201 /* PROBE_SUPPORT begin */
1202 #pragma weak __tnf_probe_notify
1203 extern void __tnf_probe_notify(void);
1204 /* PROBE_SUPPORT end */
1205 
1206 /* same as atexit() but private to the library */
1207 extern int _atexit(void (*)(void));
1208 
1209 /* same as _cleanup() but private to the library */
1210 extern void __cleanup(void);
1211 
1212 extern void atfork_init(void);
1213 
1214 #ifdef __amd64
1215 extern void __proc64id(void);
1216 #endif
1217 
1218 /*
1219  * libc_init() is called by ld.so.1 for library initialization.
1220  * We perform minimal initialization; enough to work with the main thread.
1221  */
1222 void
1223 libc_init(void)
1224 {
1225 	uberdata_t *udp = &__uberdata;
1226 	ulwp_t *oldself = __curthread();
1227 	ucontext_t uc;
1228 	ulwp_t *self;
1229 	struct rlimit rl;
1230 	caddr_t data;
1231 	size_t tls_size;
1232 	int setmask;
1233 
1234 	/*
1235 	 * For the initial stage of initialization, we must be careful
1236 	 * not to call any function that could possibly call _cerror().
1237 	 * For this purpose, we call only the raw system call wrappers.
1238 	 */
1239 
1240 #ifdef __amd64
1241 	/*
1242 	 * Gather information about cache layouts for optimized
1243 	 * AMD and Intel assembler strfoo() and memfoo() functions.
1244 	 */
1245 	__proc64id();
1246 #endif
1247 
1248 	/*
1249 	 * Every libc, regardless of which link map, must register __cleanup().
1250 	 */
1251 	(void) _atexit(__cleanup);
1252 
1253 	/*
1254 	 * We keep our uberdata on one of (a) the first alternate link map
1255 	 * or (b) the primary link map.  We switch to the primary link map
1256 	 * and stay there once we see it.  All intermediate link maps are
1257 	 * subject to being unloaded at any time.
1258 	 */
1259 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1260 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1261 		mutex_setup();
1262 		atfork_init();	/* every link map needs atfork() processing */
1263 		init_progname();
1264 		return;
1265 	}
1266 
1267 	/*
1268 	 * To establish the main stack information, we have to get our context.
1269 	 * This is also convenient to use for getting our signal mask.
1270 	 */
1271 	uc.uc_flags = UC_ALL;
1272 	(void) __getcontext(&uc);
1273 	ASSERT(uc.uc_link == NULL);
1274 
1275 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1276 	ASSERT(primary_link_map || tls_size == 0);
1277 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1278 	if (data == NULL)
1279 		thr_panic("cannot allocate thread structure for main thread");
1280 	/* LINTED pointer cast may result in improper alignment */
1281 	self = (ulwp_t *)(data + tls_size);
1282 	init_hash_table[0].hash_bucket = self;
1283 
1284 	self->ul_sigmask = uc.uc_sigmask;
1285 	delete_reserved_signals(&self->ul_sigmask);
1286 	/*
1287 	 * Are the old and new sets different?
1288 	 * (This can happen if we are currently blocking SIGCANCEL.)
1289 	 * If so, we must explicitly set our signal mask, below.
1290 	 */
1291 	setmask =
1292 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1293 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1294 	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1295 	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1296 
1297 #ifdef __sparc
1298 	/*
1299 	 * We cache several instructions in the thread structure for use
1300 	 * by the fasttrap DTrace provider. When changing this, read the
1301 	 * comment in fasttrap.h for the all the other places that must
1302 	 * be changed.
1303 	 */
1304 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1305 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1306 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1307 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1308 #endif
1309 
1310 	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1311 	(void) getrlimit(RLIMIT_STACK, &rl);
1312 	self->ul_stksiz = rl.rlim_cur;
1313 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1314 
1315 	self->ul_forw = self->ul_back = self;
1316 	self->ul_hash = NULL;
1317 	self->ul_ix = 0;
1318 	self->ul_lwpid = 1; /* _lwp_self() */
1319 	self->ul_main = 1;
1320 	self->ul_self = self;
1321 	self->ul_policy = -1;		/* initialize only when needed */
1322 	self->ul_pri = 0;
1323 	self->ul_cid = 0;
1324 	self->ul_rtclassid = -1;
1325 	self->ul_uberdata = udp;
1326 	if (oldself != NULL) {
1327 		int i;
1328 
1329 		ASSERT(primary_link_map);
1330 		ASSERT(oldself->ul_main == 1);
1331 		self->ul_stsd = oldself->ul_stsd;
1332 		for (i = 0; i < TSD_NFAST; i++)
1333 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1334 		self->ul_tls = oldself->ul_tls;
1335 		/*
1336 		 * Retrieve all pointers to uberdata allocated
1337 		 * while running on previous link maps.
1338 		 * We would like to do a structure assignment here, but
1339 		 * gcc turns structure assignments into calls to memcpy(),
1340 		 * a function exported from libc.  We can't call any such
1341 		 * external functions until we establish curthread, below,
1342 		 * so we just call our private version of memcpy().
1343 		 */
1344 		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1345 		/*
1346 		 * These items point to global data on the primary link map.
1347 		 */
1348 		udp->thr_hash_table = init_hash_table;
1349 		udp->sigacthandler = sigacthandler;
1350 		udp->tdb.tdb_events = tdb_events;
1351 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1352 		ASSERT(udp->lwp_stacks == NULL);
1353 		ASSERT(udp->ulwp_freelist == NULL);
1354 		ASSERT(udp->ulwp_replace_free == NULL);
1355 		ASSERT(udp->hash_size == 1);
1356 	}
1357 	udp->all_lwps = self;
1358 	udp->ulwp_one = self;
1359 	udp->pid = getpid();
1360 	udp->nthreads = 1;
1361 	/*
1362 	 * In every link map, tdb_bootstrap points to the same piece of
1363 	 * allocated memory.  When the primary link map is initialized,
1364 	 * the allocated memory is assigned a pointer to the one true
1365 	 * uberdata.  This allows libc_db to initialize itself regardless
1366 	 * of which instance of libc it finds in the address space.
1367 	 */
1368 	if (udp->tdb_bootstrap == NULL)
1369 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1370 	__tdb_bootstrap = udp->tdb_bootstrap;
1371 	if (primary_link_map) {
1372 		self->ul_primarymap = 1;
1373 		udp->primary_map = 1;
1374 		*udp->tdb_bootstrap = udp;
1375 	}
1376 	/*
1377 	 * Cancellation can't happen until:
1378 	 *	pthread_cancel() is called
1379 	 * or:
1380 	 *	another thread is created
1381 	 * For now, as a single-threaded process, set the flag that tells
1382 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1383 	 */
1384 	self->ul_nocancel = 1;
1385 
1386 #if defined(__amd64)
1387 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1388 #elif defined(__i386)
1389 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1390 #endif	/* __i386 || __amd64 */
1391 	set_curthread(self);		/* redundant on i386 */
1392 	/*
1393 	 * Now curthread is established and it is safe to call any
1394 	 * function in libc except one that uses thread-local storage.
1395 	 */
1396 	self->ul_errnop = &errno;
1397 	if (oldself != NULL) {
1398 		/* tls_size was zero when oldself was allocated */
1399 		lfree(oldself, sizeof (ulwp_t));
1400 	}
1401 	mutex_setup();
1402 	atfork_init();
1403 	signal_init();
1404 
1405 	/*
1406 	 * If the stack is unlimited, we set the size to zero to disable
1407 	 * stack checking.
1408 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1409 	 */
1410 	if (self->ul_stksiz == RLIM_INFINITY) {
1411 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1412 		self->ul_ustack.ss_size = 0;
1413 	} else {
1414 		self->ul_ustack.ss_sp = self->ul_stk;
1415 		self->ul_ustack.ss_size = self->ul_stksiz;
1416 	}
1417 	self->ul_ustack.ss_flags = 0;
1418 	(void) setustack(&self->ul_ustack);
1419 
1420 	/*
1421 	 * Get the variables that affect thread behavior from the environment.
1422 	 */
1423 	set_thread_vars();
1424 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1425 	udp->thread_stack_cache = thread_stack_cache;
1426 
1427 	/*
1428 	 * Make per-thread copies of global variables, for speed.
1429 	 */
1430 	self->ul_queue_fifo = (char)thread_queue_fifo;
1431 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1432 	self->ul_error_detection = (char)thread_error_detection;
1433 	self->ul_async_safe = (char)thread_async_safe;
1434 	self->ul_door_noreserve = (char)thread_door_noreserve;
1435 	self->ul_misaligned = (char)thread_locks_misaligned;
1436 	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1437 	self->ul_adaptive_spin = thread_adaptive_spin;
1438 	self->ul_queue_spin = thread_queue_spin;
1439 
1440 #if defined(__sparc) && !defined(_LP64)
1441 	if (self->ul_misaligned) {
1442 		/*
1443 		 * Tell the kernel to fix up ldx/stx instructions that
1444 		 * refer to non-8-byte aligned data instead of giving
1445 		 * the process an alignment trap and generating SIGBUS.
1446 		 *
1447 		 * Programs compiled for 32-bit sparc with the Studio SS12
1448 		 * compiler get this done for them automatically (in _init()).
1449 		 * We do it here for the benefit of programs compiled with
1450 		 * other compilers, like gcc.
1451 		 *
1452 		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1453 		 * environment variable horrible hack to work.
1454 		 */
1455 		extern void _do_fix_align(void);
1456 		_do_fix_align();
1457 	}
1458 #endif
1459 
1460 	/*
1461 	 * When we have initialized the primary link map, inform
1462 	 * the dynamic linker about our interface functions.
1463 	 * Set up our pointer to the program name.
1464 	 */
1465 	if (self->ul_primarymap)
1466 		_ld_libc((void *)rtld_funcs);
1467 	init_progname();
1468 
1469 	/*
1470 	 * Defer signals until TLS constructors have been called.
1471 	 */
1472 	sigoff(self);
1473 	tls_setup();
1474 	sigon(self);
1475 	if (setmask)
1476 		(void) restore_signals(self);
1477 
1478 	/*
1479 	 * Make private copies of __xpg4 and __xpg6 so libc can test
1480 	 * them after this point without invoking the dynamic linker.
1481 	 */
1482 	libc__xpg4 = __xpg4;
1483 	libc__xpg6 = __xpg6;
1484 
1485 	/* PROBE_SUPPORT begin */
1486 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1487 		__tnf_probe_notify();
1488 	/* PROBE_SUPPORT end */
1489 
1490 	init_sigev_thread();
1491 	init_aio();
1492 
1493 	/*
1494 	 * We need to reset __threaded dynamically at runtime, so that
1495 	 * __threaded can be bound to __threaded outside libc which may not
1496 	 * have initial value of 1 (without a copy relocation in a.out).
1497 	 */
1498 	__threaded = 1;
1499 }
1500 
1501 #pragma fini(libc_fini)
1502 void
1503 libc_fini()
1504 {
1505 	/*
1506 	 * If we are doing fini processing for the instance of libc
1507 	 * on the first alternate link map (this happens only when
1508 	 * the dynamic linker rejects a bad audit library), then clear
1509 	 * __curthread().  We abandon whatever memory was allocated by
1510 	 * lmalloc() while running on this alternate link-map but we
1511 	 * don't care (and can't find the memory in any case); we just
1512 	 * want to protect the application from this bad audit library.
1513 	 * No fini processing is done by libc in the normal case.
1514 	 */
1515 
1516 	uberdata_t *udp = curthread->ul_uberdata;
1517 
1518 	if (udp->primary_map == 0 && udp == &__uberdata)
1519 		set_curthread(NULL);
1520 }
1521 
1522 /*
1523  * finish_init is called when we are about to become multi-threaded,
1524  * that is, on the first call to thr_create().
1525  */
1526 void
1527 finish_init()
1528 {
1529 	ulwp_t *self = curthread;
1530 	uberdata_t *udp = self->ul_uberdata;
1531 	thr_hash_table_t *htp;
1532 	void *data;
1533 	int i;
1534 
1535 	/*
1536 	 * No locks needed here; we are single-threaded on the first call.
1537 	 * We can be called only after the primary link map has been set up.
1538 	 */
1539 	ASSERT(self->ul_primarymap);
1540 	ASSERT(self == udp->ulwp_one);
1541 	ASSERT(!udp->uberflags.uf_mt);
1542 	ASSERT(udp->hash_size == 1);
1543 
1544 	/*
1545 	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1546 	 */
1547 	update_sched(self);
1548 
1549 	/*
1550 	 * Allocate the queue_head array if not already allocated.
1551 	 */
1552 	if (udp->queue_head == NULL)
1553 		queue_alloc();
1554 
1555 	/*
1556 	 * Now allocate the thread hash table.
1557 	 */
1558 	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1559 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1560 	    == MAP_FAILED)
1561 		thr_panic("cannot allocate thread hash table");
1562 
1563 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1564 	udp->hash_size = HASHTBLSZ;
1565 	udp->hash_mask = HASHTBLSZ - 1;
1566 
1567 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1568 		htp->hash_lock.mutex_flag = LOCK_INITED;
1569 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1570 		htp->hash_cond.cond_magic = COND_MAGIC;
1571 	}
1572 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1573 
1574 	/*
1575 	 * Set up the SIGCANCEL handler for threads cancellation.
1576 	 */
1577 	setup_cancelsig(SIGCANCEL);
1578 
1579 	/*
1580 	 * Arrange to do special things on exit --
1581 	 * - collect queue statistics from all remaining active threads.
1582 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1583 	 * - grab assert_lock to ensure that assertion failures
1584 	 *   and a core dump take precedence over _exit().
1585 	 * (Functions are called in the reverse order of their registration.)
1586 	 */
1587 	(void) _atexit(grab_assert_lock);
1588 #if defined(THREAD_DEBUG)
1589 	(void) _atexit(dump_queue_statistics);
1590 	(void) _atexit(collect_queue_statistics);
1591 #endif
1592 }
1593 
1594 /*
1595  * Used only by postfork1_child(), below.
1596  */
1597 static void
1598 mark_dead_and_buried(ulwp_t *ulwp)
1599 {
1600 	ulwp->ul_dead = 1;
1601 	ulwp->ul_lwpid = (lwpid_t)(-1);
1602 	ulwp->ul_hash = NULL;
1603 	ulwp->ul_ix = -1;
1604 	ulwp->ul_schedctl = NULL;
1605 	ulwp->ul_schedctl_called = NULL;
1606 }
1607 
1608 /*
1609  * This is called from fork1() in the child.
1610  * Reset our data structures to reflect one lwp.
1611  */
1612 void
1613 postfork1_child()
1614 {
1615 	ulwp_t *self = curthread;
1616 	uberdata_t *udp = self->ul_uberdata;
1617 	queue_head_t *qp;
1618 	ulwp_t *next;
1619 	ulwp_t *ulwp;
1620 	int i;
1621 
1622 	/* daemon threads shouldn't call fork1(), but oh well... */
1623 	self->ul_usropts &= ~THR_DAEMON;
1624 	udp->nthreads = 1;
1625 	udp->ndaemons = 0;
1626 	udp->uberflags.uf_mt = 0;
1627 	__libc_threaded = 0;
1628 	for (i = 0; i < udp->hash_size; i++)
1629 		udp->thr_hash_table[i].hash_bucket = NULL;
1630 	self->ul_lwpid = _lwp_self();
1631 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1632 
1633 	/*
1634 	 * Some thread in the parent might have been suspended
1635 	 * while holding udp->callout_lock or udp->ld_lock.
1636 	 * Reinitialize the child's copies.
1637 	 */
1638 	(void) mutex_init(&udp->callout_lock,
1639 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1640 	(void) mutex_init(&udp->ld_lock,
1641 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1642 
1643 	/* no one in the child is on a sleep queue; reinitialize */
1644 	if ((qp = udp->queue_head) != NULL) {
1645 		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1646 		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1647 			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1648 			qp->qh_lock.mutex_flag = LOCK_INITED;
1649 			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1650 			qp->qh_hlist = &qp->qh_def_root;
1651 #if defined(THREAD_DEBUG)
1652 			qp->qh_hlen = 1;
1653 			qp->qh_hmax = 1;
1654 #endif
1655 		}
1656 	}
1657 
1658 	/*
1659 	 * Do post-fork1 processing for subsystems that need it.
1660 	 * We need to do this before unmapping all of the abandoned
1661 	 * threads' stacks, below(), because the post-fork1 actions
1662 	 * might require access to those stacks.
1663 	 */
1664 	postfork1_child_sigev_aio();
1665 	postfork1_child_sigev_mq();
1666 	postfork1_child_sigev_timer();
1667 	postfork1_child_aio();
1668 	/*
1669 	 * The above subsystems use thread pools, so this action
1670 	 * must be performed after those actions.
1671 	 */
1672 	postfork1_child_tpool();
1673 
1674 	/*
1675 	 * All lwps except ourself are gone.  Mark them so.
1676 	 * First mark all of the lwps that have already been freed.
1677 	 * Then mark and free all of the active lwps except ourself.
1678 	 * Since we are single-threaded, no locks are required here.
1679 	 */
1680 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1681 		mark_dead_and_buried(ulwp);
1682 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1683 		mark_dead_and_buried(ulwp);
1684 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1685 		next = ulwp->ul_forw;
1686 		ulwp->ul_forw = ulwp->ul_back = NULL;
1687 		mark_dead_and_buried(ulwp);
1688 		tsd_free(ulwp);
1689 		tls_free(ulwp);
1690 		rwl_free(ulwp);
1691 		heldlock_free(ulwp);
1692 		ulwp_free(ulwp);
1693 	}
1694 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1695 	if (self != udp->ulwp_one)
1696 		mark_dead_and_buried(udp->ulwp_one);
1697 	if ((ulwp = udp->all_zombies) != NULL) {
1698 		ASSERT(udp->nzombies != 0);
1699 		do {
1700 			next = ulwp->ul_forw;
1701 			ulwp->ul_forw = ulwp->ul_back = NULL;
1702 			mark_dead_and_buried(ulwp);
1703 			udp->nzombies--;
1704 			if (ulwp->ul_replace) {
1705 				ulwp->ul_next = NULL;
1706 				if (udp->ulwp_replace_free == NULL) {
1707 					udp->ulwp_replace_free =
1708 					    udp->ulwp_replace_last = ulwp;
1709 				} else {
1710 					udp->ulwp_replace_last->ul_next = ulwp;
1711 					udp->ulwp_replace_last = ulwp;
1712 				}
1713 			}
1714 		} while ((ulwp = next) != udp->all_zombies);
1715 		ASSERT(udp->nzombies == 0);
1716 		udp->all_zombies = NULL;
1717 		udp->nzombies = 0;
1718 	}
1719 	trim_stack_cache(0);
1720 }
1721 
1722 lwpid_t
1723 lwp_self(void)
1724 {
1725 	return (curthread->ul_lwpid);
1726 }
1727 
1728 #pragma weak _ti_thr_self = thr_self
1729 #pragma weak pthread_self = thr_self
1730 thread_t
1731 thr_self()
1732 {
1733 	return (curthread->ul_lwpid);
1734 }
1735 
1736 int
1737 thr_main()
1738 {
1739 	ulwp_t *self = __curthread();
1740 
1741 	return ((self == NULL)? -1 : self->ul_main);
1742 }
1743 
1744 int
1745 _thrp_cancelled(void)
1746 {
1747 	return (curthread->ul_rval == PTHREAD_CANCELED);
1748 }
1749 
1750 int
1751 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1752 {
1753 	stk->ss_sp = (void *)ulwp->ul_stktop;
1754 	stk->ss_size = ulwp->ul_stksiz;
1755 	stk->ss_flags = 0;
1756 	return (0);
1757 }
1758 
1759 #pragma weak _thr_stksegment = thr_stksegment
1760 int
1761 thr_stksegment(stack_t *stk)
1762 {
1763 	return (_thrp_stksegment(curthread, stk));
1764 }
1765 
1766 void
1767 force_continue(ulwp_t *ulwp)
1768 {
1769 #if defined(THREAD_DEBUG)
1770 	ulwp_t *self = curthread;
1771 	uberdata_t *udp = self->ul_uberdata;
1772 #endif
1773 	int error;
1774 	timespec_t ts;
1775 
1776 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1777 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1778 
1779 	for (;;) {
1780 		error = _lwp_continue(ulwp->ul_lwpid);
1781 		if (error != 0 && error != EINTR)
1782 			break;
1783 		error = 0;
1784 		if (ulwp->ul_stopping) {	/* he is stopping himself */
1785 			ts.tv_sec = 0;		/* give him a chance to run */
1786 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1787 			(void) __nanosleep(&ts, NULL);
1788 		}
1789 		if (!ulwp->ul_stopping)		/* he is running now */
1790 			break;			/* so we are done */
1791 		/*
1792 		 * He is marked as being in the process of stopping
1793 		 * himself.  Loop around and continue him again.
1794 		 * He may not have been stopped the first time.
1795 		 */
1796 	}
1797 }
1798 
1799 /*
1800  * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1801  * that is, to a point where ul_critical and ul_rtld are both zero.
1802  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1803  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1804  * If we have to drop link_lock, we store 1 through link_dropped.
1805  * If the lwp exits before it can be suspended, we return ESRCH.
1806  */
1807 int
1808 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1809 {
1810 	ulwp_t *self = curthread;
1811 	uberdata_t *udp = self->ul_uberdata;
1812 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1813 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1814 	thread_t tid = ulwp->ul_lwpid;
1815 	int ix = ulwp->ul_ix;
1816 	int error = 0;
1817 
1818 	ASSERT(whystopped == TSTP_REGULAR ||
1819 	    whystopped == TSTP_MUTATOR ||
1820 	    whystopped == TSTP_FORK);
1821 	ASSERT(ulwp != self);
1822 	ASSERT(!ulwp->ul_stop);
1823 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1824 	ASSERT(MUTEX_OWNED(mp, self));
1825 
1826 	if (link_dropped != NULL)
1827 		*link_dropped = 0;
1828 
1829 	/*
1830 	 * We must grab the target's spin lock before suspending it.
1831 	 * See the comments below and in _thrp_suspend() for why.
1832 	 */
1833 	spin_lock_set(&ulwp->ul_spinlock);
1834 	(void) ___lwp_suspend(tid);
1835 	spin_lock_clear(&ulwp->ul_spinlock);
1836 
1837 top:
1838 	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1839 	    ulwp->ul_stopping) {
1840 		/* thread is already safe */
1841 		ulwp->ul_stop |= whystopped;
1842 	} else {
1843 		/*
1844 		 * Setting ul_pleasestop causes the target thread to stop
1845 		 * itself in _thrp_suspend(), below, after we drop its lock.
1846 		 * We must continue the critical thread before dropping
1847 		 * link_lock because the critical thread may be holding
1848 		 * the queue lock for link_lock.  This is delicate.
1849 		 */
1850 		ulwp->ul_pleasestop |= whystopped;
1851 		force_continue(ulwp);
1852 		if (link_dropped != NULL) {
1853 			*link_dropped = 1;
1854 			lmutex_unlock(&udp->link_lock);
1855 			/* be sure to drop link_lock only once */
1856 			link_dropped = NULL;
1857 		}
1858 
1859 		/*
1860 		 * The thread may disappear by calling thr_exit() so we
1861 		 * cannot rely on the ulwp pointer after dropping the lock.
1862 		 * Instead, we search the hash table to find it again.
1863 		 * When we return, we may find that the thread has been
1864 		 * continued by some other thread.  The suspend/continue
1865 		 * interfaces are prone to such race conditions by design.
1866 		 */
1867 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1868 		    (ulwp->ul_pleasestop & whystopped)) {
1869 			(void) __cond_wait(cvp, mp);
1870 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1871 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1872 				if (ulwp->ul_lwpid == tid)
1873 					break;
1874 			}
1875 		}
1876 
1877 		if (ulwp == NULL || ulwp->ul_dead)
1878 			error = ESRCH;
1879 		else {
1880 			/*
1881 			 * Do another lwp_suspend() to make sure we don't
1882 			 * return until the target thread is fully stopped
1883 			 * in the kernel.  Don't apply lwp_suspend() until
1884 			 * we know that the target is not holding any
1885 			 * queue locks, that is, that it has completed
1886 			 * ulwp_unlock(self) and has, or at least is
1887 			 * about to, call lwp_suspend() on itself.  We do
1888 			 * this by grabbing the target's spin lock.
1889 			 */
1890 			ASSERT(ulwp->ul_lwpid == tid);
1891 			spin_lock_set(&ulwp->ul_spinlock);
1892 			(void) ___lwp_suspend(tid);
1893 			spin_lock_clear(&ulwp->ul_spinlock);
1894 			/*
1895 			 * If some other thread did a thr_continue()
1896 			 * on the target thread we have to start over.
1897 			 */
1898 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1899 				goto top;
1900 		}
1901 	}
1902 
1903 	(void) cond_broadcast(cvp);
1904 	lmutex_unlock(mp);
1905 	return (error);
1906 }
1907 
1908 int
1909 _thrp_suspend(thread_t tid, uchar_t whystopped)
1910 {
1911 	ulwp_t *self = curthread;
1912 	uberdata_t *udp = self->ul_uberdata;
1913 	ulwp_t *ulwp;
1914 	int error = 0;
1915 
1916 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1917 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1918 
1919 	/*
1920 	 * We can't suspend anyone except ourself while
1921 	 * some other thread is performing a fork.
1922 	 * This also allows only one suspension at a time.
1923 	 */
1924 	if (tid != self->ul_lwpid)
1925 		fork_lock_enter();
1926 
1927 	if ((ulwp = find_lwp(tid)) == NULL)
1928 		error = ESRCH;
1929 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1930 		ulwp_unlock(ulwp, udp);
1931 		error = EINVAL;
1932 	} else if (ulwp->ul_stop) {	/* already stopped */
1933 		ulwp->ul_stop |= whystopped;
1934 		ulwp_broadcast(ulwp);
1935 		ulwp_unlock(ulwp, udp);
1936 	} else if (ulwp != self) {
1937 		/*
1938 		 * After suspending the other thread, move it out of a
1939 		 * critical section and deal with the schedctl mappings.
1940 		 * safe_suspend() suspends the other thread, calls
1941 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1942 		 */
1943 		error = safe_suspend(ulwp, whystopped, NULL);
1944 	} else {
1945 		int schedctl_after_fork = 0;
1946 
1947 		/*
1948 		 * We are suspending ourself.  We must not take a signal
1949 		 * until we return from lwp_suspend() and clear ul_stopping.
1950 		 * This is to guard against siglongjmp().
1951 		 */
1952 		enter_critical(self);
1953 		self->ul_sp = stkptr();
1954 		_flush_windows();	/* sparc */
1955 		self->ul_pleasestop = 0;
1956 		self->ul_stop |= whystopped;
1957 		/*
1958 		 * Grab our spin lock before dropping ulwp_mutex(self).
1959 		 * This prevents the suspending thread from applying
1960 		 * lwp_suspend() to us before we emerge from
1961 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1962 		 */
1963 		spin_lock_set(&self->ul_spinlock);
1964 		self->ul_stopping = 1;
1965 		ulwp_broadcast(self);
1966 		ulwp_unlock(self, udp);
1967 		/*
1968 		 * From this point until we return from lwp_suspend(),
1969 		 * we must not call any function that might invoke the
1970 		 * dynamic linker, that is, we can only call functions
1971 		 * private to the library.
1972 		 *
1973 		 * Also, this is a nasty race condition for a process
1974 		 * that is undergoing a forkall() operation:
1975 		 * Once we clear our spinlock (below), we are vulnerable
1976 		 * to being suspended by the forkall() thread before
1977 		 * we manage to suspend ourself in ___lwp_suspend().
1978 		 * See safe_suspend() and force_continue().
1979 		 *
1980 		 * To avoid a SIGSEGV due to the disappearance
1981 		 * of the schedctl mappings in the child process,
1982 		 * which can happen in spin_lock_clear() if we
1983 		 * are suspended while we are in the middle of
1984 		 * its call to preempt(), we preemptively clear
1985 		 * our own schedctl pointer before dropping our
1986 		 * spinlock.  We reinstate it, in both the parent
1987 		 * and (if this really is a forkall()) the child.
1988 		 */
1989 		if (whystopped & TSTP_FORK) {
1990 			schedctl_after_fork = 1;
1991 			self->ul_schedctl = NULL;
1992 			self->ul_schedctl_called = &udp->uberflags;
1993 		}
1994 		spin_lock_clear(&self->ul_spinlock);
1995 		(void) ___lwp_suspend(tid);
1996 		/*
1997 		 * Somebody else continued us.
1998 		 * We can't grab ulwp_lock(self)
1999 		 * until after clearing ul_stopping.
2000 		 * force_continue() relies on this.
2001 		 */
2002 		self->ul_stopping = 0;
2003 		self->ul_sp = 0;
2004 		if (schedctl_after_fork) {
2005 			self->ul_schedctl_called = NULL;
2006 			self->ul_schedctl = NULL;
2007 			(void) setup_schedctl();
2008 		}
2009 		ulwp_lock(self, udp);
2010 		ulwp_broadcast(self);
2011 		ulwp_unlock(self, udp);
2012 		exit_critical(self);
2013 	}
2014 
2015 	if (tid != self->ul_lwpid)
2016 		fork_lock_exit();
2017 
2018 	return (error);
2019 }
2020 
2021 /*
2022  * Suspend all lwps other than ourself in preparation for fork.
2023  */
2024 void
2025 suspend_fork()
2026 {
2027 	ulwp_t *self = curthread;
2028 	uberdata_t *udp = self->ul_uberdata;
2029 	ulwp_t *ulwp;
2030 	int link_dropped;
2031 
2032 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2033 top:
2034 	lmutex_lock(&udp->link_lock);
2035 
2036 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2037 		ulwp_lock(ulwp, udp);
2038 		if (ulwp->ul_stop) {	/* already stopped */
2039 			ulwp->ul_stop |= TSTP_FORK;
2040 			ulwp_broadcast(ulwp);
2041 			ulwp_unlock(ulwp, udp);
2042 		} else {
2043 			/*
2044 			 * Move the stopped lwp out of a critical section.
2045 			 */
2046 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2047 			    link_dropped)
2048 				goto top;
2049 		}
2050 	}
2051 
2052 	lmutex_unlock(&udp->link_lock);
2053 }
2054 
2055 void
2056 continue_fork(int child)
2057 {
2058 	ulwp_t *self = curthread;
2059 	uberdata_t *udp = self->ul_uberdata;
2060 	ulwp_t *ulwp;
2061 
2062 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2063 
2064 	/*
2065 	 * Clear the schedctl pointers in the child of forkall().
2066 	 */
2067 	if (child) {
2068 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2069 			ulwp->ul_schedctl_called =
2070 			    ulwp->ul_dead? &udp->uberflags : NULL;
2071 			ulwp->ul_schedctl = NULL;
2072 		}
2073 	}
2074 
2075 	/*
2076 	 * Set all lwps that were stopped for fork() running again.
2077 	 */
2078 	lmutex_lock(&udp->link_lock);
2079 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2080 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2081 		lmutex_lock(mp);
2082 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2083 		ulwp->ul_stop &= ~TSTP_FORK;
2084 		ulwp_broadcast(ulwp);
2085 		if (!ulwp->ul_stop)
2086 			force_continue(ulwp);
2087 		lmutex_unlock(mp);
2088 	}
2089 	lmutex_unlock(&udp->link_lock);
2090 }
2091 
2092 int
2093 _thrp_continue(thread_t tid, uchar_t whystopped)
2094 {
2095 	uberdata_t *udp = curthread->ul_uberdata;
2096 	ulwp_t *ulwp;
2097 	mutex_t *mp;
2098 	int error = 0;
2099 
2100 	ASSERT(whystopped == TSTP_REGULAR ||
2101 	    whystopped == TSTP_MUTATOR);
2102 
2103 	/*
2104 	 * We single-thread the entire thread suspend/continue mechanism.
2105 	 */
2106 	fork_lock_enter();
2107 
2108 	if ((ulwp = find_lwp(tid)) == NULL) {
2109 		fork_lock_exit();
2110 		return (ESRCH);
2111 	}
2112 
2113 	mp = ulwp_mutex(ulwp, udp);
2114 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2115 		error = EINVAL;
2116 	} else if (ulwp->ul_stop & whystopped) {
2117 		ulwp->ul_stop &= ~whystopped;
2118 		ulwp_broadcast(ulwp);
2119 		if (!ulwp->ul_stop) {
2120 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2121 				ulwp->ul_sp = 0;
2122 				ulwp->ul_created = 0;
2123 			}
2124 			force_continue(ulwp);
2125 		}
2126 	}
2127 	lmutex_unlock(mp);
2128 
2129 	fork_lock_exit();
2130 	return (error);
2131 }
2132 
2133 int
2134 thr_suspend(thread_t tid)
2135 {
2136 	return (_thrp_suspend(tid, TSTP_REGULAR));
2137 }
2138 
2139 int
2140 thr_continue(thread_t tid)
2141 {
2142 	return (_thrp_continue(tid, TSTP_REGULAR));
2143 }
2144 
2145 void
2146 thr_yield()
2147 {
2148 	yield();
2149 }
2150 
2151 #pragma weak pthread_kill = thr_kill
2152 #pragma weak _thr_kill = thr_kill
2153 int
2154 thr_kill(thread_t tid, int sig)
2155 {
2156 	if (sig == SIGCANCEL)
2157 		return (EINVAL);
2158 	return (_lwp_kill(tid, sig));
2159 }
2160 
2161 /*
2162  * Exit a critical section, take deferred actions if necessary.
2163  * Called from exit_critical() and from sigon().
2164  */
2165 void
2166 do_exit_critical()
2167 {
2168 	ulwp_t *self = curthread;
2169 	int sig;
2170 
2171 	ASSERT(self->ul_critical == 0);
2172 
2173 	/*
2174 	 * Don't suspend ourself or take a deferred signal while dying
2175 	 * or while executing inside the dynamic linker (ld.so.1).
2176 	 */
2177 	if (self->ul_dead || self->ul_rtld)
2178 		return;
2179 
2180 	while (self->ul_pleasestop ||
2181 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2182 		/*
2183 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2184 		 * by keeping self->ul_critical == 1 here.
2185 		 */
2186 		self->ul_critical++;
2187 		while (self->ul_pleasestop) {
2188 			/*
2189 			 * Guard against suspending ourself while on a sleep
2190 			 * queue.  See the comments in call_user_handler().
2191 			 */
2192 			unsleep_self();
2193 			set_parking_flag(self, 0);
2194 			(void) _thrp_suspend(self->ul_lwpid,
2195 			    self->ul_pleasestop);
2196 		}
2197 		self->ul_critical--;
2198 
2199 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2200 			/*
2201 			 * Clear ul_cursig before proceeding.
2202 			 * This protects us from the dynamic linker's
2203 			 * calls to bind_guard()/bind_clear() in the
2204 			 * event that it is invoked to resolve a symbol
2205 			 * like take_deferred_signal() below.
2206 			 */
2207 			self->ul_cursig = 0;
2208 			take_deferred_signal(sig);
2209 			ASSERT(self->ul_cursig == 0);
2210 		}
2211 	}
2212 	ASSERT(self->ul_critical == 0);
2213 }
2214 
2215 /*
2216  * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2217  * (ld.so.1) when it has do do something, like resolve a symbol to be called
2218  * by the application or one of its libraries.  _ti_bind_guard() is called
2219  * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2220  * application.  The dynamic linker gets special dispensation from libc to
2221  * run in a critical region (all signals deferred and no thread suspension
2222  * or forking allowed), and to be immune from cancellation for the duration.
2223  */
2224 int
2225 _ti_bind_guard(int flags)
2226 {
2227 	ulwp_t *self = curthread;
2228 	uberdata_t *udp = self->ul_uberdata;
2229 	int bindflag = (flags & THR_FLG_RTLD);
2230 
2231 	if ((self->ul_bindflags & bindflag) == bindflag)
2232 		return (0);
2233 	self->ul_bindflags |= bindflag;
2234 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2235 		sigoff(self);	/* see no signals while holding ld_lock */
2236 		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2237 		(void) mutex_lock(&udp->ld_lock);
2238 	}
2239 	enter_critical(self);
2240 	self->ul_save_state = self->ul_cancel_disabled;
2241 	self->ul_cancel_disabled = 1;
2242 	set_cancel_pending_flag(self, 0);
2243 	return (1);
2244 }
2245 
2246 int
2247 _ti_bind_clear(int flags)
2248 {
2249 	ulwp_t *self = curthread;
2250 	uberdata_t *udp = self->ul_uberdata;
2251 	int bindflag = (flags & THR_FLG_RTLD);
2252 
2253 	if ((self->ul_bindflags & bindflag) == 0)
2254 		return (self->ul_bindflags);
2255 	self->ul_bindflags &= ~bindflag;
2256 	self->ul_cancel_disabled = self->ul_save_state;
2257 	set_cancel_pending_flag(self, 0);
2258 	exit_critical(self);
2259 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2260 		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2261 			(void) mutex_unlock(&udp->ld_lock);
2262 			self->ul_rtld--;
2263 			sigon(self);	/* reenable signals */
2264 		}
2265 	}
2266 	return (self->ul_bindflags);
2267 }
2268 
2269 /*
2270  * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2271  * a critical region in libc.  Return zero if not, else return non-zero.
2272  */
2273 int
2274 _ti_critical(void)
2275 {
2276 	ulwp_t *self = curthread;
2277 	int level = self->ul_critical;
2278 
2279 	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2280 		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2281 	return (level - 1);
2282 }
2283 
2284 /*
2285  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2286  * it does in the old libthread (see the comments in cond_wait_queue()).
2287  * Also, signals are deferred at thread startup until TLS constructors
2288  * have all been called, at which time _thrp_setup() calls sigon().
2289  *
2290  * _sigoff() and _sigon() are external consolidation-private interfaces to
2291  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2292  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2293  * (librtc.so) to defer signals during its critical sections (not to be
2294  * confused with libc critical sections [see exit_critical() above]).
2295  */
2296 void
2297 _sigoff(void)
2298 {
2299 	ulwp_t *self = curthread;
2300 
2301 	sigoff(self);
2302 }
2303 
2304 void
2305 _sigon(void)
2306 {
2307 	ulwp_t *self = curthread;
2308 
2309 	ASSERT(self->ul_sigdefer > 0);
2310 	sigon(self);
2311 }
2312 
2313 int
2314 thr_getconcurrency()
2315 {
2316 	return (thr_concurrency);
2317 }
2318 
2319 int
2320 pthread_getconcurrency()
2321 {
2322 	return (pthread_concurrency);
2323 }
2324 
2325 int
2326 thr_setconcurrency(int new_level)
2327 {
2328 	uberdata_t *udp = curthread->ul_uberdata;
2329 
2330 	if (new_level < 0)
2331 		return (EINVAL);
2332 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2333 		return (EAGAIN);
2334 	lmutex_lock(&udp->link_lock);
2335 	if (new_level > thr_concurrency)
2336 		thr_concurrency = new_level;
2337 	lmutex_unlock(&udp->link_lock);
2338 	return (0);
2339 }
2340 
2341 int
2342 pthread_setconcurrency(int new_level)
2343 {
2344 	if (new_level < 0)
2345 		return (EINVAL);
2346 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2347 		return (EAGAIN);
2348 	pthread_concurrency = new_level;
2349 	return (0);
2350 }
2351 
2352 size_t
2353 thr_min_stack(void)
2354 {
2355 	return (MINSTACK);
2356 }
2357 
2358 int
2359 __nthreads(void)
2360 {
2361 	return (curthread->ul_uberdata->nthreads);
2362 }
2363 
2364 /*
2365  * XXX
2366  * The remainder of this file implements the private interfaces to java for
2367  * garbage collection.  It is no longer used, at least by java 1.2.
2368  * It can all go away once all old JVMs have disappeared.
2369  */
2370 
2371 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2372 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2373 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2374 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2375 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2376 
2377 /*
2378  * Get the available register state for the target thread.
2379  * Return non-volatile registers: TRS_NONVOLATILE
2380  */
2381 #pragma weak _thr_getstate = thr_getstate
2382 int
2383 thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2384 {
2385 	ulwp_t *self = curthread;
2386 	uberdata_t *udp = self->ul_uberdata;
2387 	ulwp_t **ulwpp;
2388 	ulwp_t *ulwp;
2389 	int error = 0;
2390 	int trs_flag = TRS_LWPID;
2391 
2392 	if (tid == 0 || self->ul_lwpid == tid) {
2393 		ulwp = self;
2394 		ulwp_lock(ulwp, udp);
2395 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2396 		ulwp = *ulwpp;
2397 	} else {
2398 		if (flag)
2399 			*flag = TRS_INVALID;
2400 		return (ESRCH);
2401 	}
2402 
2403 	if (ulwp->ul_dead) {
2404 		trs_flag = TRS_INVALID;
2405 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2406 		error = EINVAL;
2407 		trs_flag = TRS_INVALID;
2408 	} else if (ulwp->ul_stop) {
2409 		trs_flag = TRS_NONVOLATILE;
2410 		getgregs(ulwp, rs);
2411 	}
2412 
2413 	if (flag)
2414 		*flag = trs_flag;
2415 	if (lwp)
2416 		*lwp = tid;
2417 	if (ss != NULL)
2418 		(void) _thrp_stksegment(ulwp, ss);
2419 
2420 	ulwp_unlock(ulwp, udp);
2421 	return (error);
2422 }
2423 
2424 /*
2425  * Set the appropriate register state for the target thread.
2426  * This is not used by java.  It exists solely for the MSTC test suite.
2427  */
2428 #pragma weak _thr_setstate = thr_setstate
2429 int
2430 thr_setstate(thread_t tid, int flag, gregset_t rs)
2431 {
2432 	uberdata_t *udp = curthread->ul_uberdata;
2433 	ulwp_t *ulwp;
2434 	int error = 0;
2435 
2436 	if ((ulwp = find_lwp(tid)) == NULL)
2437 		return (ESRCH);
2438 
2439 	if (!ulwp->ul_stop && !suspendedallmutators)
2440 		error = EINVAL;
2441 	else if (rs != NULL) {
2442 		switch (flag) {
2443 		case TRS_NONVOLATILE:
2444 			/* do /proc stuff here? */
2445 			if (ulwp->ul_stop)
2446 				setgregs(ulwp, rs);
2447 			else
2448 				error = EINVAL;
2449 			break;
2450 		case TRS_LWPID:		/* do /proc stuff here? */
2451 		default:
2452 			error = EINVAL;
2453 			break;
2454 		}
2455 	}
2456 
2457 	ulwp_unlock(ulwp, udp);
2458 	return (error);
2459 }
2460 
2461 int
2462 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2463 {
2464 	extern ssize_t __pread(int, void *, size_t, off_t);
2465 	char buf[100];
2466 	int fd;
2467 
2468 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2469 	(void) strcpy(buf, "/proc/self/lwp/");
2470 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2471 	(void) strcat(buf, "/lwpstatus");
2472 	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2473 		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2474 			if (sp->pr_flags & PR_STOPPED) {
2475 				(void) __close(fd);
2476 				return (0);
2477 			}
2478 			yield();	/* give him a chance to stop */
2479 		}
2480 		(void) __close(fd);
2481 	}
2482 	return (-1);
2483 }
2484 
2485 int
2486 putlwpregs(thread_t tid, prgregset_t prp)
2487 {
2488 	extern ssize_t __writev(int, const struct iovec *, int);
2489 	char buf[100];
2490 	int fd;
2491 	long dstop_sreg[2];
2492 	long run_null[2];
2493 	iovec_t iov[3];
2494 
2495 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2496 	(void) strcpy(buf, "/proc/self/lwp/");
2497 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2498 	(void) strcat(buf, "/lwpctl");
2499 	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2500 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2501 		dstop_sreg[1] = PCSREG;		/* set the registers */
2502 		iov[0].iov_base = (caddr_t)dstop_sreg;
2503 		iov[0].iov_len = sizeof (dstop_sreg);
2504 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2505 		iov[1].iov_len = sizeof (prgregset_t);
2506 		run_null[0] = PCRUN;		/* make it runnable again */
2507 		run_null[1] = 0;
2508 		iov[2].iov_base = (caddr_t)run_null;
2509 		iov[2].iov_len = sizeof (run_null);
2510 		if (__writev(fd, iov, 3) >= 0) {
2511 			(void) __close(fd);
2512 			return (0);
2513 		}
2514 		(void) __close(fd);
2515 	}
2516 	return (-1);
2517 }
2518 
2519 static ulong_t
2520 gettsp_slow(thread_t tid)
2521 {
2522 	char buf[100];
2523 	struct lwpstatus status;
2524 
2525 	if (getlwpstatus(tid, &status) != 0) {
2526 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2527 		(void) strcpy(buf, "__gettsp(");
2528 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2529 		(void) strcat(buf, "): can't read lwpstatus");
2530 		thr_panic(buf);
2531 	}
2532 	return (status.pr_reg[R_SP]);
2533 }
2534 
2535 ulong_t
2536 __gettsp(thread_t tid)
2537 {
2538 	uberdata_t *udp = curthread->ul_uberdata;
2539 	ulwp_t *ulwp;
2540 	ulong_t result;
2541 
2542 	if ((ulwp = find_lwp(tid)) == NULL)
2543 		return (0);
2544 
2545 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2546 		ulwp_unlock(ulwp, udp);
2547 		return (result);
2548 	}
2549 
2550 	result = gettsp_slow(tid);
2551 	ulwp_unlock(ulwp, udp);
2552 	return (result);
2553 }
2554 
2555 /*
2556  * This tells java stack walkers how to find the ucontext
2557  * structure passed to signal handlers.
2558  */
2559 #pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2560 void
2561 thr_sighndlrinfo(void (**func)(), int *funcsize)
2562 {
2563 	*func = &__sighndlr;
2564 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2565 }
2566 
2567 /*
2568  * Mark a thread a mutator or reset a mutator to being a default,
2569  * non-mutator thread.
2570  */
2571 #pragma weak _thr_setmutator = thr_setmutator
2572 int
2573 thr_setmutator(thread_t tid, int enabled)
2574 {
2575 	ulwp_t *self = curthread;
2576 	uberdata_t *udp = self->ul_uberdata;
2577 	ulwp_t *ulwp;
2578 	int error;
2579 	int cancel_state;
2580 
2581 	enabled = enabled? 1 : 0;
2582 top:
2583 	if (tid == 0) {
2584 		ulwp = self;
2585 		ulwp_lock(ulwp, udp);
2586 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2587 		return (ESRCH);
2588 	}
2589 
2590 	/*
2591 	 * The target thread should be the caller itself or a suspended thread.
2592 	 * This prevents the target from also changing its ul_mutator field.
2593 	 */
2594 	error = 0;
2595 	if (ulwp != self && !ulwp->ul_stop && enabled)
2596 		error = EINVAL;
2597 	else if (ulwp->ul_mutator != enabled) {
2598 		lmutex_lock(&mutatorslock);
2599 		if (mutatorsbarrier) {
2600 			ulwp_unlock(ulwp, udp);
2601 			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2602 			    &cancel_state);
2603 			while (mutatorsbarrier)
2604 				(void) cond_wait(&mutatorscv, &mutatorslock);
2605 			(void) pthread_setcancelstate(cancel_state, NULL);
2606 			lmutex_unlock(&mutatorslock);
2607 			goto top;
2608 		}
2609 		ulwp->ul_mutator = enabled;
2610 		lmutex_unlock(&mutatorslock);
2611 	}
2612 
2613 	ulwp_unlock(ulwp, udp);
2614 	return (error);
2615 }
2616 
2617 /*
2618  * Establish a barrier against new mutators.  Any non-mutator trying
2619  * to become a mutator is suspended until the barrier is removed.
2620  */
2621 #pragma weak _thr_mutators_barrier = thr_mutators_barrier
2622 void
2623 thr_mutators_barrier(int enabled)
2624 {
2625 	int oldvalue;
2626 	int cancel_state;
2627 
2628 	lmutex_lock(&mutatorslock);
2629 
2630 	/*
2631 	 * Wait if trying to set the barrier while it is already set.
2632 	 */
2633 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2634 	while (mutatorsbarrier && enabled)
2635 		(void) cond_wait(&mutatorscv, &mutatorslock);
2636 	(void) pthread_setcancelstate(cancel_state, NULL);
2637 
2638 	oldvalue = mutatorsbarrier;
2639 	mutatorsbarrier = enabled;
2640 	/*
2641 	 * Wakeup any blocked non-mutators when barrier is removed.
2642 	 */
2643 	if (oldvalue && !enabled)
2644 		(void) cond_broadcast(&mutatorscv);
2645 	lmutex_unlock(&mutatorslock);
2646 }
2647 
2648 /*
2649  * Suspend the set of all mutators except for the caller.  The list
2650  * of actively running threads is searched and only the mutators
2651  * in this list are suspended.  Actively running non-mutators remain
2652  * running.  Any other thread is suspended.
2653  */
2654 #pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2655 int
2656 thr_suspend_allmutators(void)
2657 {
2658 	ulwp_t *self = curthread;
2659 	uberdata_t *udp = self->ul_uberdata;
2660 	ulwp_t *ulwp;
2661 	int link_dropped;
2662 
2663 	/*
2664 	 * We single-thread the entire thread suspend/continue mechanism.
2665 	 */
2666 	fork_lock_enter();
2667 
2668 top:
2669 	lmutex_lock(&udp->link_lock);
2670 
2671 	if (suspendingallmutators || suspendedallmutators) {
2672 		lmutex_unlock(&udp->link_lock);
2673 		fork_lock_exit();
2674 		return (EINVAL);
2675 	}
2676 	suspendingallmutators = 1;
2677 
2678 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2679 		ulwp_lock(ulwp, udp);
2680 		if (!ulwp->ul_mutator) {
2681 			ulwp_unlock(ulwp, udp);
2682 		} else if (ulwp->ul_stop) {	/* already stopped */
2683 			ulwp->ul_stop |= TSTP_MUTATOR;
2684 			ulwp_broadcast(ulwp);
2685 			ulwp_unlock(ulwp, udp);
2686 		} else {
2687 			/*
2688 			 * Move the stopped lwp out of a critical section.
2689 			 */
2690 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2691 			    link_dropped) {
2692 				suspendingallmutators = 0;
2693 				goto top;
2694 			}
2695 		}
2696 	}
2697 
2698 	suspendedallmutators = 1;
2699 	suspendingallmutators = 0;
2700 	lmutex_unlock(&udp->link_lock);
2701 	fork_lock_exit();
2702 	return (0);
2703 }
2704 
2705 /*
2706  * Suspend the target mutator.  The caller is permitted to suspend
2707  * itself.  If a mutator barrier is enabled, the caller will suspend
2708  * itself as though it had been suspended by thr_suspend_allmutators().
2709  * When the barrier is removed, this thread will be resumed.  Any
2710  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2711  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2712  */
2713 #pragma weak _thr_suspend_mutator = thr_suspend_mutator
2714 int
2715 thr_suspend_mutator(thread_t tid)
2716 {
2717 	if (tid == 0)
2718 		tid = curthread->ul_lwpid;
2719 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2720 }
2721 
2722 /*
2723  * Resume the set of all suspended mutators.
2724  */
2725 #pragma weak _thr_continue_allmutators = thr_continue_allmutators
2726 int
2727 thr_continue_allmutators()
2728 {
2729 	ulwp_t *self = curthread;
2730 	uberdata_t *udp = self->ul_uberdata;
2731 	ulwp_t *ulwp;
2732 
2733 	/*
2734 	 * We single-thread the entire thread suspend/continue mechanism.
2735 	 */
2736 	fork_lock_enter();
2737 
2738 	lmutex_lock(&udp->link_lock);
2739 	if (!suspendedallmutators) {
2740 		lmutex_unlock(&udp->link_lock);
2741 		fork_lock_exit();
2742 		return (EINVAL);
2743 	}
2744 	suspendedallmutators = 0;
2745 
2746 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2747 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2748 		lmutex_lock(mp);
2749 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2750 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2751 			ulwp_broadcast(ulwp);
2752 			if (!ulwp->ul_stop)
2753 				force_continue(ulwp);
2754 		}
2755 		lmutex_unlock(mp);
2756 	}
2757 
2758 	lmutex_unlock(&udp->link_lock);
2759 	fork_lock_exit();
2760 	return (0);
2761 }
2762 
2763 /*
2764  * Resume a suspended mutator.
2765  */
2766 #pragma weak _thr_continue_mutator = thr_continue_mutator
2767 int
2768 thr_continue_mutator(thread_t tid)
2769 {
2770 	return (_thrp_continue(tid, TSTP_MUTATOR));
2771 }
2772 
2773 #pragma weak _thr_wait_mutator = thr_wait_mutator
2774 int
2775 thr_wait_mutator(thread_t tid, int dontwait)
2776 {
2777 	uberdata_t *udp = curthread->ul_uberdata;
2778 	ulwp_t *ulwp;
2779 	int cancel_state;
2780 	int error = 0;
2781 
2782 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2783 top:
2784 	if ((ulwp = find_lwp(tid)) == NULL) {
2785 		(void) pthread_setcancelstate(cancel_state, NULL);
2786 		return (ESRCH);
2787 	}
2788 
2789 	if (!ulwp->ul_mutator)
2790 		error = EINVAL;
2791 	else if (dontwait) {
2792 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2793 			error = EWOULDBLOCK;
2794 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2795 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2796 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2797 
2798 		(void) cond_wait(cvp, mp);
2799 		(void) lmutex_unlock(mp);
2800 		goto top;
2801 	}
2802 
2803 	ulwp_unlock(ulwp, udp);
2804 	(void) pthread_setcancelstate(cancel_state, NULL);
2805 	return (error);
2806 }
2807 
2808 /* PROBE_SUPPORT begin */
2809 
2810 void
2811 thr_probe_setup(void *data)
2812 {
2813 	curthread->ul_tpdp = data;
2814 }
2815 
2816 static void *
2817 _thread_probe_getfunc()
2818 {
2819 	return (curthread->ul_tpdp);
2820 }
2821 
2822 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2823 
2824 /* ARGSUSED */
2825 void
2826 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2827 {
2828 	/* never called */
2829 }
2830 
2831 /* ARGSUSED */
2832 void
2833 _resume_ret(ulwp_t *oldlwp)
2834 {
2835 	/* never called */
2836 }
2837 
2838 /* PROBE_SUPPORT end */
2839