xref: /titanic_52/usr/src/lib/libc/port/threads/thr.c (revision dcdfe824b3dff2df12578b936adf1daf000aa129)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2017 by The MathWorks, Inc. All rights reserved.
25  */
26 /*
27  * Copyright 2016 Joyent, Inc.
28  */
29 
30 #include "lint.h"
31 #include "thr_uberdata.h"
32 #include <pthread.h>
33 #include <procfs.h>
34 #include <sys/uio.h>
35 #include <ctype.h>
36 #include "libc.h"
37 
38 /*
39  * These symbols should not be exported from libc, but
40  * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
41  * Also, some older versions of the Studio compiler/debugger
42  * components reference them.  These need to be fixed, too.
43  */
44 #pragma weak _thr_main = thr_main
45 #pragma weak _thr_create = thr_create
46 #pragma weak _thr_join = thr_join
47 #pragma weak _thr_self = thr_self
48 
49 #undef errno
50 extern int errno;
51 
52 /*
53  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
54  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
55  * system used it illegally (it is a consolidation private symbol).
56  * To accommodate this and possibly other abusers of the symbol,
57  * we make it always equal to 1 now that libthread has been folded
58  * into libc.  The new __libc_threaded symbol is used to indicate
59  * the new meaning, "more than one thread exists".
60  */
61 int __threaded = 1;		/* always equal to 1 */
62 int __libc_threaded = 0;	/* zero until first thr_create() */
63 
64 /*
65  * thr_concurrency and pthread_concurrency are not used by the library.
66  * They exist solely to hold and return the values set by calls to
67  * thr_setconcurrency() and pthread_setconcurrency().
68  * Because thr_concurrency is affected by the THR_NEW_LWP flag
69  * to thr_create(), thr_concurrency is protected by link_lock.
70  */
71 static	int	thr_concurrency = 1;
72 static	int	pthread_concurrency;
73 
74 #define	HASHTBLSZ	1024	/* must be a power of two */
75 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
76 
77 /* initial allocation, just enough for one lwp */
78 #pragma align 64(init_hash_table)
79 thr_hash_table_t init_hash_table[1] = {
80 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
81 };
82 
83 extern const Lc_interface rtld_funcs[];
84 
85 /*
86  * The weak version is known to libc_db and mdb.
87  */
88 #pragma weak _uberdata = __uberdata
89 uberdata_t __uberdata = {
90 	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
91 	{ RECURSIVEMUTEX, NULL, 0 },	/* ld_lock */
92 	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
93 	{ RECURSIVEMUTEX, NULL, 0 },	/* atfork_lock */
94 	{ RECURSIVEMUTEX, NULL, 0 },	/* callout_lock */
95 	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
96 	{ 0, },				/* tdb_hash_lock_stats */
97 	{ { 0 }, },			/* siguaction[NSIG] */
98 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
99 	{ DEFAULTMUTEX, NULL, 0 },
100 	{ DEFAULTMUTEX, NULL, 0 },
101 	{ DEFAULTMUTEX, NULL, 0 },
102 	{ DEFAULTMUTEX, NULL, 0 },
103 	{ DEFAULTMUTEX, NULL, 0 },
104 	{ DEFAULTMUTEX, NULL, 0 },
105 	{ DEFAULTMUTEX, NULL, 0 },
106 	{ DEFAULTMUTEX, NULL, 0 },
107 	{ DEFAULTMUTEX, NULL, 0 }},
108 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
109 	{ RECURSIVEMUTEX, NULL },		/* quickexit_root */
110 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
111 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
112 	0,			/* primary_map */
113 	0,			/* bucket_init */
114 	0,			/* pad[0] */
115 	0,			/* pad[1] */
116 	{ 0 },			/* uberflags */
117 	NULL,			/* queue_head */
118 	init_hash_table,	/* thr_hash_table */
119 	1,			/* hash_size: size of the hash table */
120 	0,			/* hash_mask: hash_size - 1 */
121 	NULL,			/* ulwp_one */
122 	NULL,			/* all_lwps */
123 	NULL,			/* all_zombies */
124 	0,			/* nthreads */
125 	0,			/* nzombies */
126 	0,			/* ndaemons */
127 	0,			/* pid */
128 	sigacthandler,		/* sigacthandler */
129 	NULL,			/* lwp_stacks */
130 	NULL,			/* lwp_laststack */
131 	0,			/* nfreestack */
132 	10,			/* thread_stack_cache */
133 	NULL,			/* ulwp_freelist */
134 	NULL,			/* ulwp_lastfree */
135 	NULL,			/* ulwp_replace_free */
136 	NULL,			/* ulwp_replace_last */
137 	NULL,			/* atforklist */
138 	NULL,			/* robustlocks */
139 	NULL,			/* robustlist */
140 	NULL,			/* progname */
141 	NULL,			/* ub_comm_page */
142 	NULL,			/* __tdb_bootstrap */
143 	{			/* tdb */
144 		NULL,		/* tdb_sync_addr_hash */
145 		0,		/* tdb_register_count */
146 		0,		/* tdb_hash_alloc_failed */
147 		NULL,		/* tdb_sync_addr_free */
148 		NULL,		/* tdb_sync_addr_last */
149 		0,		/* tdb_sync_alloc */
150 		{ 0, 0 },	/* tdb_ev_global_mask */
151 		tdb_events,	/* tdb_events array */
152 	},
153 };
154 
155 /*
156  * The weak version is known to libc_db and mdb.
157  */
158 #pragma weak _tdb_bootstrap = __tdb_bootstrap
159 uberdata_t **__tdb_bootstrap = NULL;
160 
161 int	thread_queue_fifo = 4;
162 int	thread_queue_dump = 0;
163 int	thread_cond_wait_defer = 0;
164 int	thread_error_detection = 0;
165 int	thread_async_safe = 0;
166 int	thread_stack_cache = 10;
167 int	thread_door_noreserve = 0;
168 int	thread_locks_misaligned = 0;
169 
170 static	ulwp_t	*ulwp_alloc(void);
171 static	void	ulwp_free(ulwp_t *);
172 
173 /*
174  * Insert the lwp into the hash table.
175  */
176 void
177 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
178 {
179 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
180 	udp->thr_hash_table[ix].hash_bucket = ulwp;
181 	ulwp->ul_ix = ix;
182 }
183 
184 void
185 hash_in(ulwp_t *ulwp, uberdata_t *udp)
186 {
187 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
188 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
189 
190 	lmutex_lock(mp);
191 	hash_in_unlocked(ulwp, ix, udp);
192 	lmutex_unlock(mp);
193 }
194 
195 /*
196  * Delete the lwp from the hash table.
197  */
198 void
199 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
200 {
201 	ulwp_t **ulwpp;
202 
203 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
204 	    ulwp != *ulwpp;
205 	    ulwpp = &(*ulwpp)->ul_hash)
206 		;
207 	*ulwpp = ulwp->ul_hash;
208 	ulwp->ul_hash = NULL;
209 	ulwp->ul_ix = -1;
210 }
211 
212 void
213 hash_out(ulwp_t *ulwp, uberdata_t *udp)
214 {
215 	int ix;
216 
217 	if ((ix = ulwp->ul_ix) >= 0) {
218 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
219 
220 		lmutex_lock(mp);
221 		hash_out_unlocked(ulwp, ix, udp);
222 		lmutex_unlock(mp);
223 	}
224 }
225 
226 /*
227  * Retain stack information for thread structures that are being recycled for
228  * new threads.  All other members of the thread structure should be zeroed.
229  */
230 static void
231 ulwp_clean(ulwp_t *ulwp)
232 {
233 	caddr_t stk = ulwp->ul_stk;
234 	size_t mapsiz = ulwp->ul_mapsiz;
235 	size_t guardsize = ulwp->ul_guardsize;
236 	uintptr_t stktop = ulwp->ul_stktop;
237 	size_t stksiz = ulwp->ul_stksiz;
238 
239 	(void) memset(ulwp, 0, sizeof (*ulwp));
240 
241 	ulwp->ul_stk = stk;
242 	ulwp->ul_mapsiz = mapsiz;
243 	ulwp->ul_guardsize = guardsize;
244 	ulwp->ul_stktop = stktop;
245 	ulwp->ul_stksiz = stksiz;
246 }
247 
248 static int stackprot;
249 
250 /*
251  * Answer the question, "Is the lwp in question really dead?"
252  * We must inquire of the operating system to be really sure
253  * because the lwp may have called lwp_exit() but it has not
254  * yet completed the exit.
255  */
256 static int
257 dead_and_buried(ulwp_t *ulwp)
258 {
259 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
260 		return (1);
261 	if (ulwp->ul_dead && ulwp->ul_detached &&
262 	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
263 		ulwp->ul_lwpid = (lwpid_t)(-1);
264 		return (1);
265 	}
266 	return (0);
267 }
268 
269 /*
270  * Attempt to keep the stack cache within the specified cache limit.
271  */
272 static void
273 trim_stack_cache(int cache_limit)
274 {
275 	ulwp_t *self = curthread;
276 	uberdata_t *udp = self->ul_uberdata;
277 	ulwp_t *prev = NULL;
278 	ulwp_t **ulwpp = &udp->lwp_stacks;
279 	ulwp_t *ulwp;
280 
281 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
282 
283 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
284 		if (dead_and_buried(ulwp)) {
285 			*ulwpp = ulwp->ul_next;
286 			if (ulwp == udp->lwp_laststack)
287 				udp->lwp_laststack = prev;
288 			hash_out(ulwp, udp);
289 			udp->nfreestack--;
290 			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
291 			/*
292 			 * Now put the free ulwp on the ulwp freelist.
293 			 */
294 			ulwp->ul_mapsiz = 0;
295 			ulwp->ul_next = NULL;
296 			if (udp->ulwp_freelist == NULL)
297 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
298 			else {
299 				udp->ulwp_lastfree->ul_next = ulwp;
300 				udp->ulwp_lastfree = ulwp;
301 			}
302 		} else {
303 			prev = ulwp;
304 			ulwpp = &ulwp->ul_next;
305 		}
306 	}
307 }
308 
309 /*
310  * Find an unused stack of the requested size
311  * or create a new stack of the requested size.
312  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
313  * thr_exit() stores 1 in the ul_dead member.
314  * thr_join() stores -1 in the ul_lwpid member.
315  */
316 static ulwp_t *
317 find_stack(size_t stksize, size_t guardsize)
318 {
319 	static size_t pagesize = 0;
320 
321 	uberdata_t *udp = curthread->ul_uberdata;
322 	size_t mapsize;
323 	ulwp_t *prev;
324 	ulwp_t *ulwp;
325 	ulwp_t **ulwpp;
326 	void *stk;
327 
328 	/*
329 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
330 	 * unless overridden by the system's configuration.
331 	 */
332 	if (stackprot == 0) {	/* do this once */
333 		long lprot = _sysconf(_SC_STACK_PROT);
334 		if (lprot <= 0)
335 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
336 		stackprot = (int)lprot;
337 	}
338 	if (pagesize == 0)	/* do this once */
339 		pagesize = _sysconf(_SC_PAGESIZE);
340 
341 	/*
342 	 * One megabyte stacks by default, but subtract off
343 	 * two pages for the system-created red zones.
344 	 * Round up a non-zero stack size to a pagesize multiple.
345 	 */
346 	if (stksize == 0)
347 		stksize = DEFAULTSTACK - 2 * pagesize;
348 	else
349 		stksize = ((stksize + pagesize - 1) & -pagesize);
350 
351 	/*
352 	 * Round up the mapping size to a multiple of pagesize.
353 	 * Note: mmap() provides at least one page of red zone
354 	 * so we deduct that from the value of guardsize.
355 	 */
356 	if (guardsize != 0)
357 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
358 	mapsize = stksize + guardsize;
359 
360 	lmutex_lock(&udp->link_lock);
361 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
362 	    (ulwp = *ulwpp) != NULL;
363 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
364 		if (ulwp->ul_mapsiz == mapsize &&
365 		    ulwp->ul_guardsize == guardsize &&
366 		    dead_and_buried(ulwp)) {
367 			/*
368 			 * The previous lwp is gone; reuse the stack.
369 			 * Remove the ulwp from the stack list.
370 			 */
371 			*ulwpp = ulwp->ul_next;
372 			ulwp->ul_next = NULL;
373 			if (ulwp == udp->lwp_laststack)
374 				udp->lwp_laststack = prev;
375 			hash_out(ulwp, udp);
376 			udp->nfreestack--;
377 			lmutex_unlock(&udp->link_lock);
378 			ulwp_clean(ulwp);
379 			return (ulwp);
380 		}
381 	}
382 
383 	/*
384 	 * None of the cached stacks matched our mapping size.
385 	 * Reduce the stack cache to get rid of possibly
386 	 * very old stacks that will never be reused.
387 	 */
388 	if (udp->nfreestack > udp->thread_stack_cache)
389 		trim_stack_cache(udp->thread_stack_cache);
390 	else if (udp->nfreestack > 0)
391 		trim_stack_cache(udp->nfreestack - 1);
392 	lmutex_unlock(&udp->link_lock);
393 
394 	/*
395 	 * Create a new stack.
396 	 */
397 	if ((stk = mmap(NULL, mapsize, stackprot,
398 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
399 		/*
400 		 * We have allocated our stack.  Now allocate the ulwp.
401 		 */
402 		ulwp = ulwp_alloc();
403 		if (ulwp == NULL)
404 			(void) munmap(stk, mapsize);
405 		else {
406 			ulwp->ul_stk = stk;
407 			ulwp->ul_mapsiz = mapsize;
408 			ulwp->ul_guardsize = guardsize;
409 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
410 			ulwp->ul_stksiz = stksize;
411 			if (guardsize)	/* protect the extra red zone */
412 				(void) mprotect(stk, guardsize, PROT_NONE);
413 		}
414 	}
415 	return (ulwp);
416 }
417 
418 /*
419  * Get a ulwp_t structure from the free list or allocate a new one.
420  * Such ulwp_t's do not have a stack allocated by the library.
421  */
422 static ulwp_t *
423 ulwp_alloc(void)
424 {
425 	ulwp_t *self = curthread;
426 	uberdata_t *udp = self->ul_uberdata;
427 	size_t tls_size;
428 	ulwp_t *prev;
429 	ulwp_t *ulwp;
430 	ulwp_t **ulwpp;
431 	caddr_t data;
432 
433 	lmutex_lock(&udp->link_lock);
434 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
435 	    (ulwp = *ulwpp) != NULL;
436 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
437 		if (dead_and_buried(ulwp)) {
438 			*ulwpp = ulwp->ul_next;
439 			ulwp->ul_next = NULL;
440 			if (ulwp == udp->ulwp_lastfree)
441 				udp->ulwp_lastfree = prev;
442 			hash_out(ulwp, udp);
443 			lmutex_unlock(&udp->link_lock);
444 			ulwp_clean(ulwp);
445 			return (ulwp);
446 		}
447 	}
448 	lmutex_unlock(&udp->link_lock);
449 
450 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
451 	data = lmalloc(sizeof (*ulwp) + tls_size);
452 	if (data != NULL) {
453 		/* LINTED pointer cast may result in improper alignment */
454 		ulwp = (ulwp_t *)(data + tls_size);
455 	}
456 	return (ulwp);
457 }
458 
459 /*
460  * Free a ulwp structure.
461  * If there is an associated stack, put it on the stack list and
462  * munmap() previously freed stacks up to the residual cache limit.
463  * Else put it on the ulwp free list and never call lfree() on it.
464  */
465 static void
466 ulwp_free(ulwp_t *ulwp)
467 {
468 	uberdata_t *udp = curthread->ul_uberdata;
469 
470 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
471 	ulwp->ul_next = NULL;
472 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
473 		/*EMPTY*/;
474 	else if (ulwp->ul_mapsiz != 0) {
475 		if (udp->lwp_stacks == NULL)
476 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
477 		else {
478 			udp->lwp_laststack->ul_next = ulwp;
479 			udp->lwp_laststack = ulwp;
480 		}
481 		if (++udp->nfreestack > udp->thread_stack_cache)
482 			trim_stack_cache(udp->thread_stack_cache);
483 	} else {
484 		if (udp->ulwp_freelist == NULL)
485 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
486 		else {
487 			udp->ulwp_lastfree->ul_next = ulwp;
488 			udp->ulwp_lastfree = ulwp;
489 		}
490 	}
491 }
492 
493 /*
494  * Find a named lwp and return a pointer to its hash list location.
495  * On success, returns with the hash lock held.
496  */
497 ulwp_t **
498 find_lwpp(thread_t tid)
499 {
500 	uberdata_t *udp = curthread->ul_uberdata;
501 	int ix = TIDHASH(tid, udp);
502 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
503 	ulwp_t *ulwp;
504 	ulwp_t **ulwpp;
505 
506 	if (tid == 0)
507 		return (NULL);
508 
509 	lmutex_lock(mp);
510 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
511 	    (ulwp = *ulwpp) != NULL;
512 	    ulwpp = &ulwp->ul_hash) {
513 		if (ulwp->ul_lwpid == tid)
514 			return (ulwpp);
515 	}
516 	lmutex_unlock(mp);
517 	return (NULL);
518 }
519 
520 /*
521  * Wake up all lwps waiting on this lwp for some reason.
522  */
523 void
524 ulwp_broadcast(ulwp_t *ulwp)
525 {
526 	ulwp_t *self = curthread;
527 	uberdata_t *udp = self->ul_uberdata;
528 
529 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
530 	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
531 }
532 
533 /*
534  * Find a named lwp and return a pointer to it.
535  * Returns with the hash lock held.
536  */
537 ulwp_t *
538 find_lwp(thread_t tid)
539 {
540 	ulwp_t *self = curthread;
541 	uberdata_t *udp = self->ul_uberdata;
542 	ulwp_t *ulwp = NULL;
543 	ulwp_t **ulwpp;
544 
545 	if (self->ul_lwpid == tid) {
546 		ulwp = self;
547 		ulwp_lock(ulwp, udp);
548 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
549 		ulwp = *ulwpp;
550 	}
551 
552 	if (ulwp && ulwp->ul_dead) {
553 		ulwp_unlock(ulwp, udp);
554 		ulwp = NULL;
555 	}
556 
557 	return (ulwp);
558 }
559 
560 int
561 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
562     long flags, thread_t *new_thread, size_t guardsize)
563 {
564 	ulwp_t *self = curthread;
565 	uberdata_t *udp = self->ul_uberdata;
566 	ucontext_t uc;
567 	uint_t lwp_flags;
568 	thread_t tid;
569 	int error;
570 	ulwp_t *ulwp;
571 
572 	/*
573 	 * Enforce the restriction of not creating any threads
574 	 * until the primary link map has been initialized.
575 	 * Also, disallow thread creation to a child of vfork().
576 	 */
577 	if (!self->ul_primarymap || self->ul_vfork)
578 		return (ENOTSUP);
579 
580 	if (udp->hash_size == 1)
581 		finish_init();
582 
583 	if ((stk || stksize) && stksize < MINSTACK)
584 		return (EINVAL);
585 
586 	if (stk == NULL) {
587 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
588 			return (ENOMEM);
589 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
590 	} else {
591 		/* initialize the private stack */
592 		if ((ulwp = ulwp_alloc()) == NULL)
593 			return (ENOMEM);
594 		ulwp->ul_stk = stk;
595 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
596 		ulwp->ul_stksiz = stksize;
597 	}
598 	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
599 	ulwp->ul_ix = -1;
600 	ulwp->ul_errnop = &ulwp->ul_errno;
601 
602 	lwp_flags = LWP_SUSPENDED;
603 	if (flags & (THR_DETACHED|THR_DAEMON)) {
604 		flags |= THR_DETACHED;
605 		lwp_flags |= LWP_DETACHED;
606 	}
607 	if (flags & THR_DAEMON)
608 		lwp_flags |= LWP_DAEMON;
609 
610 	/* creating a thread: enforce mt-correctness in mutex_lock() */
611 	self->ul_async_safe = 1;
612 
613 	/* per-thread copies of global variables, for speed */
614 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
615 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
616 	ulwp->ul_error_detection = self->ul_error_detection;
617 	ulwp->ul_async_safe = self->ul_async_safe;
618 	ulwp->ul_max_spinners = self->ul_max_spinners;
619 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
620 	ulwp->ul_queue_spin = self->ul_queue_spin;
621 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
622 	ulwp->ul_misaligned = self->ul_misaligned;
623 
624 	/* new thread inherits creating thread's scheduling parameters */
625 	ulwp->ul_policy = self->ul_policy;
626 	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
627 	ulwp->ul_cid = self->ul_cid;
628 	ulwp->ul_rtclassid = self->ul_rtclassid;
629 
630 	ulwp->ul_primarymap = self->ul_primarymap;
631 	ulwp->ul_self = ulwp;
632 	ulwp->ul_uberdata = udp;
633 
634 	/* debugger support */
635 	ulwp->ul_usropts = flags;
636 
637 #ifdef __sparc
638 	/*
639 	 * We cache several instructions in the thread structure for use
640 	 * by the fasttrap DTrace provider. When changing this, read the
641 	 * comment in fasttrap.h for the all the other places that must
642 	 * be changed.
643 	 */
644 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
645 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
646 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
647 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
648 #endif
649 
650 	ulwp->ul_startpc = func;
651 	ulwp->ul_startarg = arg;
652 	_fpinherit(ulwp);
653 	/*
654 	 * Defer signals on the new thread until its TLS constructors
655 	 * have been called.  _thrp_setup() will call sigon() after
656 	 * it has called tls_setup().
657 	 */
658 	ulwp->ul_sigdefer = 1;
659 
660 	error = setup_context(&uc, _thrp_setup, ulwp,
661 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
662 	if (error != 0 && stk != NULL)	/* inaccessible stack */
663 		error = EFAULT;
664 
665 	/*
666 	 * Call enter_critical() to avoid being suspended until we
667 	 * have linked the new thread into the proper lists.
668 	 * This is necessary because forkall() and fork1() must
669 	 * suspend all threads and they must see a complete list.
670 	 */
671 	enter_critical(self);
672 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
673 	if (error != 0 ||
674 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
675 		exit_critical(self);
676 		ulwp->ul_lwpid = (lwpid_t)(-1);
677 		ulwp->ul_dead = 1;
678 		ulwp->ul_detached = 1;
679 		lmutex_lock(&udp->link_lock);
680 		ulwp_free(ulwp);
681 		lmutex_unlock(&udp->link_lock);
682 		return (error);
683 	}
684 	self->ul_nocancel = 0;	/* cancellation is now possible */
685 	udp->uberflags.uf_mt = 1;
686 	if (new_thread)
687 		*new_thread = tid;
688 	if (flags & THR_DETACHED)
689 		ulwp->ul_detached = 1;
690 	ulwp->ul_lwpid = tid;
691 	ulwp->ul_stop = TSTP_REGULAR;
692 	if (flags & THR_SUSPENDED)
693 		ulwp->ul_created = 1;
694 
695 	lmutex_lock(&udp->link_lock);
696 	ulwp->ul_forw = udp->all_lwps;
697 	ulwp->ul_back = udp->all_lwps->ul_back;
698 	ulwp->ul_back->ul_forw = ulwp;
699 	ulwp->ul_forw->ul_back = ulwp;
700 	hash_in(ulwp, udp);
701 	udp->nthreads++;
702 	if (flags & THR_DAEMON)
703 		udp->ndaemons++;
704 	if (flags & THR_NEW_LWP)
705 		thr_concurrency++;
706 	__libc_threaded = 1;		/* inform stdio */
707 	lmutex_unlock(&udp->link_lock);
708 
709 	if (__td_event_report(self, TD_CREATE, udp)) {
710 		self->ul_td_evbuf.eventnum = TD_CREATE;
711 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
712 		tdb_event(TD_CREATE, udp);
713 	}
714 
715 	exit_critical(self);
716 
717 	if (!(flags & THR_SUSPENDED))
718 		(void) _thrp_continue(tid, TSTP_REGULAR);
719 
720 	return (0);
721 }
722 
723 int
724 thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
725     long flags, thread_t *new_thread)
726 {
727 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
728 }
729 
730 /*
731  * A special cancellation cleanup hook for DCE.
732  * cleanuphndlr, when it is not NULL, will contain a callback
733  * function to be called before a thread is terminated in
734  * thr_exit() as a result of being cancelled.
735  */
736 static void (*cleanuphndlr)(void) = NULL;
737 
738 /*
739  * _pthread_setcleanupinit: sets the cleanup hook.
740  */
741 int
742 _pthread_setcleanupinit(void (*func)(void))
743 {
744 	cleanuphndlr = func;
745 	return (0);
746 }
747 
748 void
749 _thrp_exit()
750 {
751 	ulwp_t *self = curthread;
752 	uberdata_t *udp = self->ul_uberdata;
753 	ulwp_t *replace = NULL;
754 
755 	if (__td_event_report(self, TD_DEATH, udp)) {
756 		self->ul_td_evbuf.eventnum = TD_DEATH;
757 		tdb_event(TD_DEATH, udp);
758 	}
759 
760 	ASSERT(self->ul_sigdefer != 0);
761 
762 	lmutex_lock(&udp->link_lock);
763 	udp->nthreads--;
764 	if (self->ul_usropts & THR_NEW_LWP)
765 		thr_concurrency--;
766 	if (self->ul_usropts & THR_DAEMON)
767 		udp->ndaemons--;
768 	else if (udp->nthreads == udp->ndaemons) {
769 		/*
770 		 * We are the last non-daemon thread exiting.
771 		 * Exit the process.  We retain our TSD and TLS so
772 		 * that atexit() application functions can use them.
773 		 */
774 		lmutex_unlock(&udp->link_lock);
775 		exit(0);
776 		thr_panic("_thrp_exit(): exit(0) returned");
777 	}
778 	lmutex_unlock(&udp->link_lock);
779 
780 	/*
781 	 * tsd_exit() may call its destructor free(), thus depending on
782 	 * tmem, therefore tmem_exit() needs to be called after tsd_exit()
783 	 * and tls_exit().
784 	 */
785 	tsd_exit();		/* deallocate thread-specific data */
786 	tls_exit();		/* deallocate thread-local storage */
787 	tmem_exit();		/* deallocate tmem allocations */
788 	heldlock_exit();	/* deal with left-over held locks */
789 
790 	/* block all signals to finish exiting */
791 	block_all_signals(self);
792 	/* also prevent ourself from being suspended */
793 	enter_critical(self);
794 	rwl_free(self);
795 	lmutex_lock(&udp->link_lock);
796 	ulwp_free(self);
797 	(void) ulwp_lock(self, udp);
798 
799 	if (self->ul_mapsiz && !self->ul_detached) {
800 		/*
801 		 * We want to free the stack for reuse but must keep
802 		 * the ulwp_t struct for the benefit of thr_join().
803 		 * For this purpose we allocate a replacement ulwp_t.
804 		 */
805 		if ((replace = udp->ulwp_replace_free) == NULL)
806 			replace = lmalloc(REPLACEMENT_SIZE);
807 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
808 			udp->ulwp_replace_last = NULL;
809 	}
810 
811 	if (udp->all_lwps == self)
812 		udp->all_lwps = self->ul_forw;
813 	if (udp->all_lwps == self)
814 		udp->all_lwps = NULL;
815 	else {
816 		self->ul_forw->ul_back = self->ul_back;
817 		self->ul_back->ul_forw = self->ul_forw;
818 	}
819 	self->ul_forw = self->ul_back = NULL;
820 #if defined(THREAD_DEBUG)
821 	/* collect queue lock statistics before marking ourself dead */
822 	record_spin_locks(self);
823 #endif
824 	self->ul_dead = 1;
825 	self->ul_pleasestop = 0;
826 	if (replace != NULL) {
827 		int ix = self->ul_ix;		/* the hash index */
828 		(void) memcpy(replace, self, REPLACEMENT_SIZE);
829 		replace->ul_self = replace;
830 		replace->ul_next = NULL;	/* clone not on stack list */
831 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
832 		replace->ul_replace = 1;	/* requires clone to be freed */
833 		hash_out_unlocked(self, ix, udp);
834 		hash_in_unlocked(replace, ix, udp);
835 		ASSERT(!(self->ul_detached));
836 		self->ul_detached = 1;		/* this frees the stack */
837 		self->ul_schedctl = NULL;
838 		self->ul_schedctl_called = &udp->uberflags;
839 		set_curthread(self = replace);
840 		/*
841 		 * Having just changed the address of curthread, we
842 		 * must reset the ownership of the locks we hold so
843 		 * that assertions will not fire when we release them.
844 		 */
845 		udp->link_lock.mutex_owner = (uintptr_t)self;
846 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
847 		/*
848 		 * NOTE:
849 		 * On i386, %gs still references the original, not the
850 		 * replacement, ulwp structure.  Fetching the replacement
851 		 * curthread pointer via %gs:0 works correctly since the
852 		 * original ulwp structure will not be reallocated until
853 		 * this lwp has completed its lwp_exit() system call (see
854 		 * dead_and_buried()), but from here on out, we must make
855 		 * no references to %gs:<offset> other than %gs:0.
856 		 */
857 	}
858 	/*
859 	 * Put non-detached terminated threads in the all_zombies list.
860 	 */
861 	if (!self->ul_detached) {
862 		udp->nzombies++;
863 		if (udp->all_zombies == NULL) {
864 			ASSERT(udp->nzombies == 1);
865 			udp->all_zombies = self->ul_forw = self->ul_back = self;
866 		} else {
867 			self->ul_forw = udp->all_zombies;
868 			self->ul_back = udp->all_zombies->ul_back;
869 			self->ul_back->ul_forw = self;
870 			self->ul_forw->ul_back = self;
871 		}
872 	}
873 	/*
874 	 * Notify everyone waiting for this thread.
875 	 */
876 	ulwp_broadcast(self);
877 	(void) ulwp_unlock(self, udp);
878 	/*
879 	 * Prevent any more references to the schedctl data.
880 	 * We are exiting and continue_fork() may not find us.
881 	 * Do this just before dropping link_lock, since fork
882 	 * serializes on link_lock.
883 	 */
884 	self->ul_schedctl = NULL;
885 	self->ul_schedctl_called = &udp->uberflags;
886 	lmutex_unlock(&udp->link_lock);
887 
888 	ASSERT(self->ul_critical == 1);
889 	ASSERT(self->ul_preempt == 0);
890 	_lwp_terminate();	/* never returns */
891 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
892 }
893 
894 #if defined(THREAD_DEBUG)
895 void
896 collect_queue_statistics()
897 {
898 	uberdata_t *udp = curthread->ul_uberdata;
899 	ulwp_t *ulwp;
900 
901 	if (thread_queue_dump) {
902 		lmutex_lock(&udp->link_lock);
903 		if ((ulwp = udp->all_lwps) != NULL) {
904 			do {
905 				record_spin_locks(ulwp);
906 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
907 		}
908 		lmutex_unlock(&udp->link_lock);
909 	}
910 }
911 #endif
912 
913 static void __NORETURN
914 _thrp_exit_common(void *status, int unwind)
915 {
916 	ulwp_t *self = curthread;
917 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
918 
919 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
920 
921 	/*
922 	 * Disable cancellation and call the special DCE cancellation
923 	 * cleanup hook if it is enabled.  Do nothing else before calling
924 	 * the DCE cancellation cleanup hook; it may call longjmp() and
925 	 * never return here.
926 	 */
927 	self->ul_cancel_disabled = 1;
928 	self->ul_cancel_async = 0;
929 	self->ul_save_async = 0;
930 	self->ul_cancelable = 0;
931 	self->ul_cancel_pending = 0;
932 	set_cancel_pending_flag(self, 1);
933 	if (cancelled && cleanuphndlr != NULL)
934 		(*cleanuphndlr)();
935 
936 	/*
937 	 * Block application signals while we are exiting.
938 	 * We call out to C++, TSD, and TLS destructors while exiting
939 	 * and these are application-defined, so we cannot be assured
940 	 * that they won't reset the signal mask.  We use sigoff() to
941 	 * defer any signals that may be received as a result of this
942 	 * bad behavior.  Such signals will be lost to the process
943 	 * when the thread finishes exiting.
944 	 */
945 	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
946 	sigoff(self);
947 
948 	self->ul_rval = status;
949 
950 	/*
951 	 * If thr_exit is being called from the places where
952 	 * C++ destructors are to be called such as cancellation
953 	 * points, then set this flag. It is checked in _t_cancel()
954 	 * to decide whether _ex_unwind() is to be called or not.
955 	 */
956 	if (unwind)
957 		self->ul_unwind = 1;
958 
959 	/*
960 	 * _thrp_unwind() will eventually call _thrp_exit().
961 	 * It never returns.
962 	 */
963 	_thrp_unwind(NULL);
964 	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
965 
966 	for (;;)	/* to shut the compiler up about __NORETURN */
967 		continue;
968 }
969 
970 /*
971  * Called when a thread returns from its start function.
972  * We are at the top of the stack; no unwinding is necessary.
973  */
974 void
975 _thrp_terminate(void *status)
976 {
977 	_thrp_exit_common(status, 0);
978 }
979 
980 #pragma weak pthread_exit = thr_exit
981 #pragma weak _thr_exit = thr_exit
982 void
983 thr_exit(void *status)
984 {
985 	_thrp_exit_common(status, 1);
986 }
987 
988 int
989 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
990 {
991 	uberdata_t *udp = curthread->ul_uberdata;
992 	mutex_t *mp;
993 	void *rval;
994 	thread_t found;
995 	ulwp_t *ulwp;
996 	ulwp_t **ulwpp;
997 	int replace;
998 	int error;
999 
1000 	if (do_cancel)
1001 		error = lwp_wait(tid, &found);
1002 	else {
1003 		while ((error = __lwp_wait(tid, &found)) == EINTR)
1004 			;
1005 	}
1006 	if (error)
1007 		return (error);
1008 
1009 	/*
1010 	 * We must hold link_lock to avoid a race condition with find_stack().
1011 	 */
1012 	lmutex_lock(&udp->link_lock);
1013 	if ((ulwpp = find_lwpp(found)) == NULL) {
1014 		/*
1015 		 * lwp_wait() found an lwp that the library doesn't know
1016 		 * about.  It must have been created with _lwp_create().
1017 		 * Just return its lwpid; we can't know its status.
1018 		 */
1019 		lmutex_unlock(&udp->link_lock);
1020 		rval = NULL;
1021 	} else {
1022 		/*
1023 		 * Remove ulwp from the hash table.
1024 		 */
1025 		ulwp = *ulwpp;
1026 		*ulwpp = ulwp->ul_hash;
1027 		ulwp->ul_hash = NULL;
1028 		/*
1029 		 * Remove ulwp from all_zombies list.
1030 		 */
1031 		ASSERT(udp->nzombies >= 1);
1032 		if (udp->all_zombies == ulwp)
1033 			udp->all_zombies = ulwp->ul_forw;
1034 		if (udp->all_zombies == ulwp)
1035 			udp->all_zombies = NULL;
1036 		else {
1037 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1038 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1039 		}
1040 		ulwp->ul_forw = ulwp->ul_back = NULL;
1041 		udp->nzombies--;
1042 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1043 		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1044 		/*
1045 		 * We can't call ulwp_unlock(ulwp) after we set
1046 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1047 		 * ulwp's hash table mutex now in order to unlock it below.
1048 		 */
1049 		mp = ulwp_mutex(ulwp, udp);
1050 		ulwp->ul_lwpid = (lwpid_t)(-1);
1051 		ulwp->ul_ix = -1;
1052 		rval = ulwp->ul_rval;
1053 		replace = ulwp->ul_replace;
1054 		lmutex_unlock(mp);
1055 		if (replace) {
1056 			ulwp->ul_next = NULL;
1057 			if (udp->ulwp_replace_free == NULL)
1058 				udp->ulwp_replace_free =
1059 				    udp->ulwp_replace_last = ulwp;
1060 			else {
1061 				udp->ulwp_replace_last->ul_next = ulwp;
1062 				udp->ulwp_replace_last = ulwp;
1063 			}
1064 		}
1065 		lmutex_unlock(&udp->link_lock);
1066 	}
1067 
1068 	if (departed != NULL)
1069 		*departed = found;
1070 	if (status != NULL)
1071 		*status = rval;
1072 	return (0);
1073 }
1074 
1075 int
1076 thr_join(thread_t tid, thread_t *departed, void **status)
1077 {
1078 	int error = _thrp_join(tid, departed, status, 1);
1079 	return ((error == EINVAL)? ESRCH : error);
1080 }
1081 
1082 /*
1083  * pthread_join() differs from Solaris thr_join():
1084  * It does not return the departed thread's id
1085  * and hence does not have a "departed" argument.
1086  * It returns EINVAL if tid refers to a detached thread.
1087  */
1088 #pragma weak _pthread_join = pthread_join
1089 int
1090 pthread_join(pthread_t tid, void **status)
1091 {
1092 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1093 }
1094 
1095 int
1096 pthread_detach(pthread_t tid)
1097 {
1098 	uberdata_t *udp = curthread->ul_uberdata;
1099 	ulwp_t *ulwp;
1100 	ulwp_t **ulwpp;
1101 	int error = 0;
1102 
1103 	if ((ulwpp = find_lwpp(tid)) == NULL)
1104 		return (ESRCH);
1105 	ulwp = *ulwpp;
1106 
1107 	if (ulwp->ul_dead) {
1108 		ulwp_unlock(ulwp, udp);
1109 		error = _thrp_join(tid, NULL, NULL, 0);
1110 	} else {
1111 		error = __lwp_detach(tid);
1112 		ulwp->ul_detached = 1;
1113 		ulwp->ul_usropts |= THR_DETACHED;
1114 		ulwp_unlock(ulwp, udp);
1115 	}
1116 	return (error);
1117 }
1118 
1119 static const char *
1120 ematch(const char *ev, const char *match)
1121 {
1122 	int c;
1123 
1124 	while ((c = *match++) != '\0') {
1125 		if (*ev++ != c)
1126 			return (NULL);
1127 	}
1128 	if (*ev++ != '=')
1129 		return (NULL);
1130 	return (ev);
1131 }
1132 
1133 static int
1134 envvar(const char *ev, const char *match, int limit)
1135 {
1136 	int val = -1;
1137 	const char *ename;
1138 
1139 	if ((ename = ematch(ev, match)) != NULL) {
1140 		int c;
1141 		for (val = 0; (c = *ename) != '\0'; ename++) {
1142 			if (!isdigit(c)) {
1143 				val = -1;
1144 				break;
1145 			}
1146 			val = val * 10 + (c - '0');
1147 			if (val > limit) {
1148 				val = limit;
1149 				break;
1150 			}
1151 		}
1152 	}
1153 	return (val);
1154 }
1155 
1156 static void
1157 etest(const char *ev)
1158 {
1159 	int value;
1160 
1161 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1162 		thread_queue_spin = value;
1163 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1164 		thread_adaptive_spin = value;
1165 	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1166 		thread_max_spinners = value;
1167 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1168 		thread_queue_fifo = value;
1169 #if defined(THREAD_DEBUG)
1170 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1171 		thread_queue_verify = value;
1172 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1173 		thread_queue_dump = value;
1174 #endif
1175 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1176 		thread_stack_cache = value;
1177 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1178 		thread_cond_wait_defer = value;
1179 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1180 		thread_error_detection = value;
1181 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1182 		thread_async_safe = value;
1183 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1184 		thread_door_noreserve = value;
1185 	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1186 		thread_locks_misaligned = value;
1187 }
1188 
1189 /*
1190  * Look for and evaluate environment variables of the form "_THREAD_*".
1191  * For compatibility with the past, we also look for environment
1192  * names of the form "LIBTHREAD_*".
1193  */
1194 static void
1195 set_thread_vars()
1196 {
1197 	extern const char **_environ;
1198 	const char **pev;
1199 	const char *ev;
1200 	char c;
1201 
1202 	if ((pev = _environ) == NULL)
1203 		return;
1204 	while ((ev = *pev++) != NULL) {
1205 		c = *ev;
1206 		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1207 			etest(ev + 8);
1208 		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1209 			etest(ev + 10);
1210 	}
1211 }
1212 
1213 /* PROBE_SUPPORT begin */
1214 #pragma weak __tnf_probe_notify
1215 extern void __tnf_probe_notify(void);
1216 /* PROBE_SUPPORT end */
1217 
1218 /* same as atexit() but private to the library */
1219 extern int _atexit(void (*)(void));
1220 
1221 /* same as _cleanup() but private to the library */
1222 extern void __cleanup(void);
1223 
1224 extern void atfork_init(void);
1225 
1226 #ifdef __amd64
1227 extern void __proc64id(void);
1228 #endif
1229 
1230 static void
1231 init_auxv_data(uberdata_t *udp)
1232 {
1233 	Dl_argsinfo_t args;
1234 
1235 	udp->ub_comm_page = NULL;
1236 	if (dlinfo(RTLD_SELF, RTLD_DI_ARGSINFO, &args) < 0)
1237 		return;
1238 
1239 	while (args.dla_auxv->a_type != AT_NULL) {
1240 		if (args.dla_auxv->a_type == AT_SUN_COMMPAGE) {
1241 			udp->ub_comm_page = args.dla_auxv->a_un.a_ptr;
1242 		}
1243 		args.dla_auxv++;
1244 	}
1245 }
1246 
1247 /*
1248  * libc_init() is called by ld.so.1 for library initialization.
1249  * We perform minimal initialization; enough to work with the main thread.
1250  */
1251 void
1252 libc_init(void)
1253 {
1254 	uberdata_t *udp = &__uberdata;
1255 	ulwp_t *oldself = __curthread();
1256 	ucontext_t uc;
1257 	ulwp_t *self;
1258 	struct rlimit rl;
1259 	caddr_t data;
1260 	size_t tls_size;
1261 	int setmask;
1262 
1263 	/*
1264 	 * For the initial stage of initialization, we must be careful
1265 	 * not to call any function that could possibly call _cerror().
1266 	 * For this purpose, we call only the raw system call wrappers.
1267 	 */
1268 
1269 #ifdef __amd64
1270 	/*
1271 	 * Gather information about cache layouts for optimized
1272 	 * AMD and Intel assembler strfoo() and memfoo() functions.
1273 	 */
1274 	__proc64id();
1275 #endif
1276 
1277 	/*
1278 	 * Every libc, regardless of which link map, must register __cleanup().
1279 	 */
1280 	(void) _atexit(__cleanup);
1281 
1282 	/*
1283 	 * Every libc, regardless of link map, needs to go through and check
1284 	 * its aux vectors.  Doing so will indicate whether or not this has
1285 	 * been given a comm page (to optimize certain system actions).
1286 	 */
1287 	init_auxv_data(udp);
1288 
1289 	/*
1290 	 * We keep our uberdata on one of (a) the first alternate link map
1291 	 * or (b) the primary link map.  We switch to the primary link map
1292 	 * and stay there once we see it.  All intermediate link maps are
1293 	 * subject to being unloaded at any time.
1294 	 */
1295 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1296 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1297 		mutex_setup();
1298 		atfork_init();	/* every link map needs atfork() processing */
1299 		init_progname();
1300 		return;
1301 	}
1302 
1303 	/*
1304 	 * To establish the main stack information, we have to get our context.
1305 	 * This is also convenient to use for getting our signal mask.
1306 	 */
1307 	uc.uc_flags = UC_ALL;
1308 	(void) __getcontext(&uc);
1309 	ASSERT(uc.uc_link == NULL);
1310 
1311 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1312 	ASSERT(primary_link_map || tls_size == 0);
1313 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1314 	if (data == NULL)
1315 		thr_panic("cannot allocate thread structure for main thread");
1316 	/* LINTED pointer cast may result in improper alignment */
1317 	self = (ulwp_t *)(data + tls_size);
1318 	init_hash_table[0].hash_bucket = self;
1319 
1320 	self->ul_sigmask = uc.uc_sigmask;
1321 	delete_reserved_signals(&self->ul_sigmask);
1322 	/*
1323 	 * Are the old and new sets different?
1324 	 * (This can happen if we are currently blocking SIGCANCEL.)
1325 	 * If so, we must explicitly set our signal mask, below.
1326 	 */
1327 	setmask =
1328 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1329 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1330 	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1331 	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1332 
1333 #ifdef __sparc
1334 	/*
1335 	 * We cache several instructions in the thread structure for use
1336 	 * by the fasttrap DTrace provider. When changing this, read the
1337 	 * comment in fasttrap.h for the all the other places that must
1338 	 * be changed.
1339 	 */
1340 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1341 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1342 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1343 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1344 #endif
1345 
1346 	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1347 	(void) getrlimit(RLIMIT_STACK, &rl);
1348 	self->ul_stksiz = rl.rlim_cur;
1349 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1350 
1351 	self->ul_forw = self->ul_back = self;
1352 	self->ul_hash = NULL;
1353 	self->ul_ix = 0;
1354 	self->ul_lwpid = 1; /* _lwp_self() */
1355 	self->ul_main = 1;
1356 	self->ul_self = self;
1357 	self->ul_policy = -1;		/* initialize only when needed */
1358 	self->ul_pri = 0;
1359 	self->ul_cid = 0;
1360 	self->ul_rtclassid = -1;
1361 	self->ul_uberdata = udp;
1362 	if (oldself != NULL) {
1363 		int i;
1364 
1365 		ASSERT(primary_link_map);
1366 		ASSERT(oldself->ul_main == 1);
1367 		self->ul_stsd = oldself->ul_stsd;
1368 		for (i = 0; i < TSD_NFAST; i++)
1369 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1370 		self->ul_tls = oldself->ul_tls;
1371 		/*
1372 		 * Retrieve all pointers to uberdata allocated
1373 		 * while running on previous link maps.
1374 		 * We would like to do a structure assignment here, but
1375 		 * gcc turns structure assignments into calls to memcpy(),
1376 		 * a function exported from libc.  We can't call any such
1377 		 * external functions until we establish curthread, below,
1378 		 * so we just call our private version of memcpy().
1379 		 */
1380 		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1381 		/*
1382 		 * These items point to global data on the primary link map.
1383 		 */
1384 		udp->thr_hash_table = init_hash_table;
1385 		udp->sigacthandler = sigacthandler;
1386 		udp->tdb.tdb_events = tdb_events;
1387 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1388 		ASSERT(udp->lwp_stacks == NULL);
1389 		ASSERT(udp->ulwp_freelist == NULL);
1390 		ASSERT(udp->ulwp_replace_free == NULL);
1391 		ASSERT(udp->hash_size == 1);
1392 	}
1393 	udp->all_lwps = self;
1394 	udp->ulwp_one = self;
1395 	udp->pid = getpid();
1396 	udp->nthreads = 1;
1397 	/*
1398 	 * In every link map, tdb_bootstrap points to the same piece of
1399 	 * allocated memory.  When the primary link map is initialized,
1400 	 * the allocated memory is assigned a pointer to the one true
1401 	 * uberdata.  This allows libc_db to initialize itself regardless
1402 	 * of which instance of libc it finds in the address space.
1403 	 */
1404 	if (udp->tdb_bootstrap == NULL)
1405 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1406 	__tdb_bootstrap = udp->tdb_bootstrap;
1407 	if (primary_link_map) {
1408 		self->ul_primarymap = 1;
1409 		udp->primary_map = 1;
1410 		*udp->tdb_bootstrap = udp;
1411 	}
1412 	/*
1413 	 * Cancellation can't happen until:
1414 	 *	pthread_cancel() is called
1415 	 * or:
1416 	 *	another thread is created
1417 	 * For now, as a single-threaded process, set the flag that tells
1418 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1419 	 */
1420 	self->ul_nocancel = 1;
1421 
1422 #if defined(__amd64)
1423 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1424 #elif defined(__i386)
1425 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1426 #endif	/* __i386 || __amd64 */
1427 	set_curthread(self);		/* redundant on i386 */
1428 	/*
1429 	 * Now curthread is established and it is safe to call any
1430 	 * function in libc except one that uses thread-local storage.
1431 	 */
1432 	self->ul_errnop = &errno;
1433 	if (oldself != NULL) {
1434 		/* tls_size was zero when oldself was allocated */
1435 		lfree(oldself, sizeof (ulwp_t));
1436 	}
1437 	mutex_setup();
1438 	atfork_init();
1439 	signal_init();
1440 
1441 	/*
1442 	 * If the stack is unlimited, we set the size to zero to disable
1443 	 * stack checking.
1444 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1445 	 */
1446 	if (self->ul_stksiz == RLIM_INFINITY) {
1447 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1448 		self->ul_ustack.ss_size = 0;
1449 	} else {
1450 		self->ul_ustack.ss_sp = self->ul_stk;
1451 		self->ul_ustack.ss_size = self->ul_stksiz;
1452 	}
1453 	self->ul_ustack.ss_flags = 0;
1454 	(void) setustack(&self->ul_ustack);
1455 
1456 	/*
1457 	 * Get the variables that affect thread behavior from the environment.
1458 	 */
1459 	set_thread_vars();
1460 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1461 	udp->thread_stack_cache = thread_stack_cache;
1462 
1463 	/*
1464 	 * Make per-thread copies of global variables, for speed.
1465 	 */
1466 	self->ul_queue_fifo = (char)thread_queue_fifo;
1467 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1468 	self->ul_error_detection = (char)thread_error_detection;
1469 	self->ul_async_safe = (char)thread_async_safe;
1470 	self->ul_door_noreserve = (char)thread_door_noreserve;
1471 	self->ul_misaligned = (char)thread_locks_misaligned;
1472 	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1473 	self->ul_adaptive_spin = thread_adaptive_spin;
1474 	self->ul_queue_spin = thread_queue_spin;
1475 
1476 #if defined(__sparc) && !defined(_LP64)
1477 	if (self->ul_misaligned) {
1478 		/*
1479 		 * Tell the kernel to fix up ldx/stx instructions that
1480 		 * refer to non-8-byte aligned data instead of giving
1481 		 * the process an alignment trap and generating SIGBUS.
1482 		 *
1483 		 * Programs compiled for 32-bit sparc with the Studio SS12
1484 		 * compiler get this done for them automatically (in _init()).
1485 		 * We do it here for the benefit of programs compiled with
1486 		 * other compilers, like gcc.
1487 		 *
1488 		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1489 		 * environment variable horrible hack to work.
1490 		 */
1491 		extern void _do_fix_align(void);
1492 		_do_fix_align();
1493 	}
1494 #endif
1495 
1496 	/*
1497 	 * When we have initialized the primary link map, inform
1498 	 * the dynamic linker about our interface functions.
1499 	 * Set up our pointer to the program name.
1500 	 */
1501 	if (self->ul_primarymap)
1502 		_ld_libc((void *)rtld_funcs);
1503 	init_progname();
1504 
1505 	/*
1506 	 * Defer signals until TLS constructors have been called.
1507 	 */
1508 	sigoff(self);
1509 	tls_setup();
1510 	sigon(self);
1511 	if (setmask)
1512 		(void) restore_signals(self);
1513 
1514 	/*
1515 	 * Make private copies of __xpg4 and __xpg6 so libc can test
1516 	 * them after this point without invoking the dynamic linker.
1517 	 */
1518 	libc__xpg4 = __xpg4;
1519 	libc__xpg6 = __xpg6;
1520 
1521 	/* PROBE_SUPPORT begin */
1522 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1523 		__tnf_probe_notify();
1524 	/* PROBE_SUPPORT end */
1525 
1526 	init_sigev_thread();
1527 	init_aio();
1528 
1529 	/*
1530 	 * We need to reset __threaded dynamically at runtime, so that
1531 	 * __threaded can be bound to __threaded outside libc which may not
1532 	 * have initial value of 1 (without a copy relocation in a.out).
1533 	 */
1534 	__threaded = 1;
1535 }
1536 
1537 #pragma fini(libc_fini)
1538 void
1539 libc_fini()
1540 {
1541 	/*
1542 	 * If we are doing fini processing for the instance of libc
1543 	 * on the first alternate link map (this happens only when
1544 	 * the dynamic linker rejects a bad audit library), then clear
1545 	 * __curthread().  We abandon whatever memory was allocated by
1546 	 * lmalloc() while running on this alternate link-map but we
1547 	 * don't care (and can't find the memory in any case); we just
1548 	 * want to protect the application from this bad audit library.
1549 	 * No fini processing is done by libc in the normal case.
1550 	 */
1551 
1552 	uberdata_t *udp = curthread->ul_uberdata;
1553 
1554 	if (udp->primary_map == 0 && udp == &__uberdata)
1555 		set_curthread(NULL);
1556 }
1557 
1558 /*
1559  * finish_init is called when we are about to become multi-threaded,
1560  * that is, on the first call to thr_create().
1561  */
1562 void
1563 finish_init()
1564 {
1565 	ulwp_t *self = curthread;
1566 	uberdata_t *udp = self->ul_uberdata;
1567 	thr_hash_table_t *htp;
1568 	void *data;
1569 	int i;
1570 
1571 	/*
1572 	 * No locks needed here; we are single-threaded on the first call.
1573 	 * We can be called only after the primary link map has been set up.
1574 	 */
1575 	ASSERT(self->ul_primarymap);
1576 	ASSERT(self == udp->ulwp_one);
1577 	ASSERT(!udp->uberflags.uf_mt);
1578 	ASSERT(udp->hash_size == 1);
1579 
1580 	/*
1581 	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1582 	 */
1583 	update_sched(self);
1584 
1585 	/*
1586 	 * Allocate the queue_head array if not already allocated.
1587 	 */
1588 	if (udp->queue_head == NULL)
1589 		queue_alloc();
1590 
1591 	/*
1592 	 * Now allocate the thread hash table.
1593 	 */
1594 	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1595 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1596 	    == MAP_FAILED)
1597 		thr_panic("cannot allocate thread hash table");
1598 
1599 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1600 	udp->hash_size = HASHTBLSZ;
1601 	udp->hash_mask = HASHTBLSZ - 1;
1602 
1603 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1604 		htp->hash_lock.mutex_flag = LOCK_INITED;
1605 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1606 		htp->hash_cond.cond_magic = COND_MAGIC;
1607 	}
1608 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1609 
1610 	/*
1611 	 * Set up the SIGCANCEL handler for threads cancellation.
1612 	 */
1613 	setup_cancelsig(SIGCANCEL);
1614 
1615 	/*
1616 	 * Arrange to do special things on exit --
1617 	 * - collect queue statistics from all remaining active threads.
1618 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1619 	 * - grab assert_lock to ensure that assertion failures
1620 	 *   and a core dump take precedence over _exit().
1621 	 * (Functions are called in the reverse order of their registration.)
1622 	 */
1623 	(void) _atexit(grab_assert_lock);
1624 #if defined(THREAD_DEBUG)
1625 	(void) _atexit(dump_queue_statistics);
1626 	(void) _atexit(collect_queue_statistics);
1627 #endif
1628 }
1629 
1630 /*
1631  * Used only by postfork1_child(), below.
1632  */
1633 static void
1634 mark_dead_and_buried(ulwp_t *ulwp)
1635 {
1636 	ulwp->ul_dead = 1;
1637 	ulwp->ul_lwpid = (lwpid_t)(-1);
1638 	ulwp->ul_hash = NULL;
1639 	ulwp->ul_ix = -1;
1640 	ulwp->ul_schedctl = NULL;
1641 	ulwp->ul_schedctl_called = NULL;
1642 }
1643 
1644 /*
1645  * This is called from fork1() in the child.
1646  * Reset our data structures to reflect one lwp.
1647  */
1648 void
1649 postfork1_child()
1650 {
1651 	ulwp_t *self = curthread;
1652 	uberdata_t *udp = self->ul_uberdata;
1653 	queue_head_t *qp;
1654 	ulwp_t *next;
1655 	ulwp_t *ulwp;
1656 	int i;
1657 
1658 	/* daemon threads shouldn't call fork1(), but oh well... */
1659 	self->ul_usropts &= ~THR_DAEMON;
1660 	udp->nthreads = 1;
1661 	udp->ndaemons = 0;
1662 	udp->uberflags.uf_mt = 0;
1663 	__libc_threaded = 0;
1664 	for (i = 0; i < udp->hash_size; i++)
1665 		udp->thr_hash_table[i].hash_bucket = NULL;
1666 	self->ul_lwpid = _lwp_self();
1667 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1668 
1669 	/*
1670 	 * Some thread in the parent might have been suspended
1671 	 * while holding udp->callout_lock or udp->ld_lock.
1672 	 * Reinitialize the child's copies.
1673 	 */
1674 	(void) mutex_init(&udp->callout_lock,
1675 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1676 	(void) mutex_init(&udp->ld_lock,
1677 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1678 
1679 	/* no one in the child is on a sleep queue; reinitialize */
1680 	if ((qp = udp->queue_head) != NULL) {
1681 		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1682 		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1683 			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1684 			qp->qh_lock.mutex_flag = LOCK_INITED;
1685 			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1686 			qp->qh_hlist = &qp->qh_def_root;
1687 #if defined(THREAD_DEBUG)
1688 			qp->qh_hlen = 1;
1689 			qp->qh_hmax = 1;
1690 #endif
1691 		}
1692 	}
1693 
1694 	/*
1695 	 * Do post-fork1 processing for subsystems that need it.
1696 	 * We need to do this before unmapping all of the abandoned
1697 	 * threads' stacks, below(), because the post-fork1 actions
1698 	 * might require access to those stacks.
1699 	 */
1700 	postfork1_child_sigev_aio();
1701 	postfork1_child_sigev_mq();
1702 	postfork1_child_sigev_timer();
1703 	postfork1_child_aio();
1704 	/*
1705 	 * The above subsystems use thread pools, so this action
1706 	 * must be performed after those actions.
1707 	 */
1708 	postfork1_child_tpool();
1709 
1710 	/*
1711 	 * All lwps except ourself are gone.  Mark them so.
1712 	 * First mark all of the lwps that have already been freed.
1713 	 * Then mark and free all of the active lwps except ourself.
1714 	 * Since we are single-threaded, no locks are required here.
1715 	 */
1716 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1717 		mark_dead_and_buried(ulwp);
1718 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1719 		mark_dead_and_buried(ulwp);
1720 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1721 		next = ulwp->ul_forw;
1722 		ulwp->ul_forw = ulwp->ul_back = NULL;
1723 		mark_dead_and_buried(ulwp);
1724 		tsd_free(ulwp);
1725 		tls_free(ulwp);
1726 		rwl_free(ulwp);
1727 		heldlock_free(ulwp);
1728 		ulwp_free(ulwp);
1729 	}
1730 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1731 	if (self != udp->ulwp_one)
1732 		mark_dead_and_buried(udp->ulwp_one);
1733 	if ((ulwp = udp->all_zombies) != NULL) {
1734 		ASSERT(udp->nzombies != 0);
1735 		do {
1736 			next = ulwp->ul_forw;
1737 			ulwp->ul_forw = ulwp->ul_back = NULL;
1738 			mark_dead_and_buried(ulwp);
1739 			udp->nzombies--;
1740 			if (ulwp->ul_replace) {
1741 				ulwp->ul_next = NULL;
1742 				if (udp->ulwp_replace_free == NULL) {
1743 					udp->ulwp_replace_free =
1744 					    udp->ulwp_replace_last = ulwp;
1745 				} else {
1746 					udp->ulwp_replace_last->ul_next = ulwp;
1747 					udp->ulwp_replace_last = ulwp;
1748 				}
1749 			}
1750 		} while ((ulwp = next) != udp->all_zombies);
1751 		ASSERT(udp->nzombies == 0);
1752 		udp->all_zombies = NULL;
1753 		udp->nzombies = 0;
1754 	}
1755 	trim_stack_cache(0);
1756 }
1757 
1758 lwpid_t
1759 lwp_self(void)
1760 {
1761 	return (curthread->ul_lwpid);
1762 }
1763 
1764 #pragma weak _ti_thr_self = thr_self
1765 #pragma weak pthread_self = thr_self
1766 thread_t
1767 thr_self()
1768 {
1769 	return (curthread->ul_lwpid);
1770 }
1771 
1772 int
1773 thr_main()
1774 {
1775 	ulwp_t *self = __curthread();
1776 
1777 	return ((self == NULL)? -1 : self->ul_main);
1778 }
1779 
1780 int
1781 _thrp_cancelled(void)
1782 {
1783 	return (curthread->ul_rval == PTHREAD_CANCELED);
1784 }
1785 
1786 int
1787 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1788 {
1789 	stk->ss_sp = (void *)ulwp->ul_stktop;
1790 	stk->ss_size = ulwp->ul_stksiz;
1791 	stk->ss_flags = 0;
1792 	return (0);
1793 }
1794 
1795 #pragma weak _thr_stksegment = thr_stksegment
1796 int
1797 thr_stksegment(stack_t *stk)
1798 {
1799 	return (_thrp_stksegment(curthread, stk));
1800 }
1801 
1802 void
1803 force_continue(ulwp_t *ulwp)
1804 {
1805 #if defined(THREAD_DEBUG)
1806 	ulwp_t *self = curthread;
1807 	uberdata_t *udp = self->ul_uberdata;
1808 #endif
1809 	int error;
1810 	timespec_t ts;
1811 
1812 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1813 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1814 
1815 	for (;;) {
1816 		error = _lwp_continue(ulwp->ul_lwpid);
1817 		if (error != 0 && error != EINTR)
1818 			break;
1819 		error = 0;
1820 		if (ulwp->ul_stopping) {	/* he is stopping himself */
1821 			ts.tv_sec = 0;		/* give him a chance to run */
1822 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1823 			(void) __nanosleep(&ts, NULL);
1824 		}
1825 		if (!ulwp->ul_stopping)		/* he is running now */
1826 			break;			/* so we are done */
1827 		/*
1828 		 * He is marked as being in the process of stopping
1829 		 * himself.  Loop around and continue him again.
1830 		 * He may not have been stopped the first time.
1831 		 */
1832 	}
1833 }
1834 
1835 /*
1836  * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1837  * that is, to a point where ul_critical and ul_rtld are both zero.
1838  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1839  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1840  * If we have to drop link_lock, we store 1 through link_dropped.
1841  * If the lwp exits before it can be suspended, we return ESRCH.
1842  */
1843 int
1844 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1845 {
1846 	ulwp_t *self = curthread;
1847 	uberdata_t *udp = self->ul_uberdata;
1848 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1849 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1850 	thread_t tid = ulwp->ul_lwpid;
1851 	int ix = ulwp->ul_ix;
1852 	int error = 0;
1853 
1854 	ASSERT(whystopped == TSTP_REGULAR ||
1855 	    whystopped == TSTP_MUTATOR ||
1856 	    whystopped == TSTP_FORK);
1857 	ASSERT(ulwp != self);
1858 	ASSERT(!ulwp->ul_stop);
1859 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1860 	ASSERT(MUTEX_OWNED(mp, self));
1861 
1862 	if (link_dropped != NULL)
1863 		*link_dropped = 0;
1864 
1865 	/*
1866 	 * We must grab the target's spin lock before suspending it.
1867 	 * See the comments below and in _thrp_suspend() for why.
1868 	 */
1869 	spin_lock_set(&ulwp->ul_spinlock);
1870 	(void) ___lwp_suspend(tid);
1871 	spin_lock_clear(&ulwp->ul_spinlock);
1872 
1873 top:
1874 	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1875 	    ulwp->ul_stopping) {
1876 		/* thread is already safe */
1877 		ulwp->ul_stop |= whystopped;
1878 	} else {
1879 		/*
1880 		 * Setting ul_pleasestop causes the target thread to stop
1881 		 * itself in _thrp_suspend(), below, after we drop its lock.
1882 		 * We must continue the critical thread before dropping
1883 		 * link_lock because the critical thread may be holding
1884 		 * the queue lock for link_lock.  This is delicate.
1885 		 */
1886 		ulwp->ul_pleasestop |= whystopped;
1887 		force_continue(ulwp);
1888 		if (link_dropped != NULL) {
1889 			*link_dropped = 1;
1890 			lmutex_unlock(&udp->link_lock);
1891 			/* be sure to drop link_lock only once */
1892 			link_dropped = NULL;
1893 		}
1894 
1895 		/*
1896 		 * The thread may disappear by calling thr_exit() so we
1897 		 * cannot rely on the ulwp pointer after dropping the lock.
1898 		 * Instead, we search the hash table to find it again.
1899 		 * When we return, we may find that the thread has been
1900 		 * continued by some other thread.  The suspend/continue
1901 		 * interfaces are prone to such race conditions by design.
1902 		 */
1903 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1904 		    (ulwp->ul_pleasestop & whystopped)) {
1905 			(void) __cond_wait(cvp, mp);
1906 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1907 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1908 				if (ulwp->ul_lwpid == tid)
1909 					break;
1910 			}
1911 		}
1912 
1913 		if (ulwp == NULL || ulwp->ul_dead)
1914 			error = ESRCH;
1915 		else {
1916 			/*
1917 			 * Do another lwp_suspend() to make sure we don't
1918 			 * return until the target thread is fully stopped
1919 			 * in the kernel.  Don't apply lwp_suspend() until
1920 			 * we know that the target is not holding any
1921 			 * queue locks, that is, that it has completed
1922 			 * ulwp_unlock(self) and has, or at least is
1923 			 * about to, call lwp_suspend() on itself.  We do
1924 			 * this by grabbing the target's spin lock.
1925 			 */
1926 			ASSERT(ulwp->ul_lwpid == tid);
1927 			spin_lock_set(&ulwp->ul_spinlock);
1928 			(void) ___lwp_suspend(tid);
1929 			spin_lock_clear(&ulwp->ul_spinlock);
1930 			/*
1931 			 * If some other thread did a thr_continue()
1932 			 * on the target thread we have to start over.
1933 			 */
1934 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1935 				goto top;
1936 		}
1937 	}
1938 
1939 	(void) cond_broadcast(cvp);
1940 	lmutex_unlock(mp);
1941 	return (error);
1942 }
1943 
1944 int
1945 _thrp_suspend(thread_t tid, uchar_t whystopped)
1946 {
1947 	ulwp_t *self = curthread;
1948 	uberdata_t *udp = self->ul_uberdata;
1949 	ulwp_t *ulwp;
1950 	int error = 0;
1951 
1952 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1953 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1954 
1955 	/*
1956 	 * We can't suspend anyone except ourself while
1957 	 * some other thread is performing a fork.
1958 	 * This also allows only one suspension at a time.
1959 	 */
1960 	if (tid != self->ul_lwpid)
1961 		fork_lock_enter();
1962 
1963 	if ((ulwp = find_lwp(tid)) == NULL)
1964 		error = ESRCH;
1965 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1966 		ulwp_unlock(ulwp, udp);
1967 		error = EINVAL;
1968 	} else if (ulwp->ul_stop) {	/* already stopped */
1969 		ulwp->ul_stop |= whystopped;
1970 		ulwp_broadcast(ulwp);
1971 		ulwp_unlock(ulwp, udp);
1972 	} else if (ulwp != self) {
1973 		/*
1974 		 * After suspending the other thread, move it out of a
1975 		 * critical section and deal with the schedctl mappings.
1976 		 * safe_suspend() suspends the other thread, calls
1977 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1978 		 */
1979 		error = safe_suspend(ulwp, whystopped, NULL);
1980 	} else {
1981 		int schedctl_after_fork = 0;
1982 
1983 		/*
1984 		 * We are suspending ourself.  We must not take a signal
1985 		 * until we return from lwp_suspend() and clear ul_stopping.
1986 		 * This is to guard against siglongjmp().
1987 		 */
1988 		enter_critical(self);
1989 		self->ul_sp = stkptr();
1990 		_flush_windows();	/* sparc */
1991 		self->ul_pleasestop = 0;
1992 		self->ul_stop |= whystopped;
1993 		/*
1994 		 * Grab our spin lock before dropping ulwp_mutex(self).
1995 		 * This prevents the suspending thread from applying
1996 		 * lwp_suspend() to us before we emerge from
1997 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1998 		 */
1999 		spin_lock_set(&self->ul_spinlock);
2000 		self->ul_stopping = 1;
2001 		ulwp_broadcast(self);
2002 		ulwp_unlock(self, udp);
2003 		/*
2004 		 * From this point until we return from lwp_suspend(),
2005 		 * we must not call any function that might invoke the
2006 		 * dynamic linker, that is, we can only call functions
2007 		 * private to the library.
2008 		 *
2009 		 * Also, this is a nasty race condition for a process
2010 		 * that is undergoing a forkall() operation:
2011 		 * Once we clear our spinlock (below), we are vulnerable
2012 		 * to being suspended by the forkall() thread before
2013 		 * we manage to suspend ourself in ___lwp_suspend().
2014 		 * See safe_suspend() and force_continue().
2015 		 *
2016 		 * To avoid a SIGSEGV due to the disappearance
2017 		 * of the schedctl mappings in the child process,
2018 		 * which can happen in spin_lock_clear() if we
2019 		 * are suspended while we are in the middle of
2020 		 * its call to preempt(), we preemptively clear
2021 		 * our own schedctl pointer before dropping our
2022 		 * spinlock.  We reinstate it, in both the parent
2023 		 * and (if this really is a forkall()) the child.
2024 		 */
2025 		if (whystopped & TSTP_FORK) {
2026 			schedctl_after_fork = 1;
2027 			self->ul_schedctl = NULL;
2028 			self->ul_schedctl_called = &udp->uberflags;
2029 		}
2030 		spin_lock_clear(&self->ul_spinlock);
2031 		(void) ___lwp_suspend(tid);
2032 		/*
2033 		 * Somebody else continued us.
2034 		 * We can't grab ulwp_lock(self)
2035 		 * until after clearing ul_stopping.
2036 		 * force_continue() relies on this.
2037 		 */
2038 		self->ul_stopping = 0;
2039 		self->ul_sp = 0;
2040 		if (schedctl_after_fork) {
2041 			self->ul_schedctl_called = NULL;
2042 			self->ul_schedctl = NULL;
2043 			(void) setup_schedctl();
2044 		}
2045 		ulwp_lock(self, udp);
2046 		ulwp_broadcast(self);
2047 		ulwp_unlock(self, udp);
2048 		exit_critical(self);
2049 	}
2050 
2051 	if (tid != self->ul_lwpid)
2052 		fork_lock_exit();
2053 
2054 	return (error);
2055 }
2056 
2057 /*
2058  * Suspend all lwps other than ourself in preparation for fork.
2059  */
2060 void
2061 suspend_fork()
2062 {
2063 	ulwp_t *self = curthread;
2064 	uberdata_t *udp = self->ul_uberdata;
2065 	ulwp_t *ulwp;
2066 	int link_dropped;
2067 
2068 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2069 top:
2070 	lmutex_lock(&udp->link_lock);
2071 
2072 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2073 		ulwp_lock(ulwp, udp);
2074 		if (ulwp->ul_stop) {	/* already stopped */
2075 			ulwp->ul_stop |= TSTP_FORK;
2076 			ulwp_broadcast(ulwp);
2077 			ulwp_unlock(ulwp, udp);
2078 		} else {
2079 			/*
2080 			 * Move the stopped lwp out of a critical section.
2081 			 */
2082 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2083 			    link_dropped)
2084 				goto top;
2085 		}
2086 	}
2087 
2088 	lmutex_unlock(&udp->link_lock);
2089 }
2090 
2091 void
2092 continue_fork(int child)
2093 {
2094 	ulwp_t *self = curthread;
2095 	uberdata_t *udp = self->ul_uberdata;
2096 	ulwp_t *ulwp;
2097 
2098 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2099 
2100 	/*
2101 	 * Clear the schedctl pointers in the child of forkall().
2102 	 */
2103 	if (child) {
2104 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2105 			ulwp->ul_schedctl_called =
2106 			    ulwp->ul_dead? &udp->uberflags : NULL;
2107 			ulwp->ul_schedctl = NULL;
2108 		}
2109 	}
2110 
2111 	/*
2112 	 * Set all lwps that were stopped for fork() running again.
2113 	 */
2114 	lmutex_lock(&udp->link_lock);
2115 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2116 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2117 		lmutex_lock(mp);
2118 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2119 		ulwp->ul_stop &= ~TSTP_FORK;
2120 		ulwp_broadcast(ulwp);
2121 		if (!ulwp->ul_stop)
2122 			force_continue(ulwp);
2123 		lmutex_unlock(mp);
2124 	}
2125 	lmutex_unlock(&udp->link_lock);
2126 }
2127 
2128 int
2129 _thrp_continue(thread_t tid, uchar_t whystopped)
2130 {
2131 	uberdata_t *udp = curthread->ul_uberdata;
2132 	ulwp_t *ulwp;
2133 	mutex_t *mp;
2134 	int error = 0;
2135 
2136 	ASSERT(whystopped == TSTP_REGULAR ||
2137 	    whystopped == TSTP_MUTATOR);
2138 
2139 	/*
2140 	 * We single-thread the entire thread suspend/continue mechanism.
2141 	 */
2142 	fork_lock_enter();
2143 
2144 	if ((ulwp = find_lwp(tid)) == NULL) {
2145 		fork_lock_exit();
2146 		return (ESRCH);
2147 	}
2148 
2149 	mp = ulwp_mutex(ulwp, udp);
2150 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2151 		error = EINVAL;
2152 	} else if (ulwp->ul_stop & whystopped) {
2153 		ulwp->ul_stop &= ~whystopped;
2154 		ulwp_broadcast(ulwp);
2155 		if (!ulwp->ul_stop) {
2156 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2157 				ulwp->ul_sp = 0;
2158 				ulwp->ul_created = 0;
2159 			}
2160 			force_continue(ulwp);
2161 		}
2162 	}
2163 	lmutex_unlock(mp);
2164 
2165 	fork_lock_exit();
2166 	return (error);
2167 }
2168 
2169 int
2170 thr_suspend(thread_t tid)
2171 {
2172 	return (_thrp_suspend(tid, TSTP_REGULAR));
2173 }
2174 
2175 int
2176 thr_continue(thread_t tid)
2177 {
2178 	return (_thrp_continue(tid, TSTP_REGULAR));
2179 }
2180 
2181 void
2182 thr_yield()
2183 {
2184 	yield();
2185 }
2186 
2187 #pragma weak pthread_kill = thr_kill
2188 #pragma weak _thr_kill = thr_kill
2189 int
2190 thr_kill(thread_t tid, int sig)
2191 {
2192 	if (sig == SIGCANCEL)
2193 		return (EINVAL);
2194 	return (_lwp_kill(tid, sig));
2195 }
2196 
2197 /*
2198  * Exit a critical section, take deferred actions if necessary.
2199  * Called from exit_critical() and from sigon().
2200  */
2201 void
2202 do_exit_critical()
2203 {
2204 	ulwp_t *self = curthread;
2205 	int sig;
2206 
2207 	ASSERT(self->ul_critical == 0);
2208 
2209 	/*
2210 	 * Don't suspend ourself or take a deferred signal while dying
2211 	 * or while executing inside the dynamic linker (ld.so.1).
2212 	 */
2213 	if (self->ul_dead || self->ul_rtld)
2214 		return;
2215 
2216 	while (self->ul_pleasestop ||
2217 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2218 		/*
2219 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2220 		 * by keeping self->ul_critical == 1 here.
2221 		 */
2222 		self->ul_critical++;
2223 		while (self->ul_pleasestop) {
2224 			/*
2225 			 * Guard against suspending ourself while on a sleep
2226 			 * queue.  See the comments in call_user_handler().
2227 			 */
2228 			unsleep_self();
2229 			set_parking_flag(self, 0);
2230 			(void) _thrp_suspend(self->ul_lwpid,
2231 			    self->ul_pleasestop);
2232 		}
2233 		self->ul_critical--;
2234 
2235 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2236 			/*
2237 			 * Clear ul_cursig before proceeding.
2238 			 * This protects us from the dynamic linker's
2239 			 * calls to bind_guard()/bind_clear() in the
2240 			 * event that it is invoked to resolve a symbol
2241 			 * like take_deferred_signal() below.
2242 			 */
2243 			self->ul_cursig = 0;
2244 			take_deferred_signal(sig);
2245 			ASSERT(self->ul_cursig == 0);
2246 		}
2247 	}
2248 	ASSERT(self->ul_critical == 0);
2249 }
2250 
2251 /*
2252  * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2253  * (ld.so.1) when it has do do something, like resolve a symbol to be called
2254  * by the application or one of its libraries.  _ti_bind_guard() is called
2255  * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2256  * application.  The dynamic linker gets special dispensation from libc to
2257  * run in a critical region (all signals deferred and no thread suspension
2258  * or forking allowed), and to be immune from cancellation for the duration.
2259  */
2260 int
2261 _ti_bind_guard(int flags)
2262 {
2263 	ulwp_t *self = curthread;
2264 	uberdata_t *udp = self->ul_uberdata;
2265 	int bindflag = (flags & THR_FLG_RTLD);
2266 
2267 	if ((self->ul_bindflags & bindflag) == bindflag)
2268 		return (0);
2269 	self->ul_bindflags |= bindflag;
2270 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2271 		sigoff(self);	/* see no signals while holding ld_lock */
2272 		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2273 		(void) mutex_lock(&udp->ld_lock);
2274 	}
2275 	enter_critical(self);
2276 	self->ul_save_state = self->ul_cancel_disabled;
2277 	self->ul_cancel_disabled = 1;
2278 	set_cancel_pending_flag(self, 0);
2279 	return (1);
2280 }
2281 
2282 int
2283 _ti_bind_clear(int flags)
2284 {
2285 	ulwp_t *self = curthread;
2286 	uberdata_t *udp = self->ul_uberdata;
2287 	int bindflag = (flags & THR_FLG_RTLD);
2288 
2289 	if ((self->ul_bindflags & bindflag) == 0)
2290 		return (self->ul_bindflags);
2291 	self->ul_bindflags &= ~bindflag;
2292 	self->ul_cancel_disabled = self->ul_save_state;
2293 	set_cancel_pending_flag(self, 0);
2294 	exit_critical(self);
2295 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2296 		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2297 			(void) mutex_unlock(&udp->ld_lock);
2298 			self->ul_rtld--;
2299 			sigon(self);	/* reenable signals */
2300 		}
2301 	}
2302 	return (self->ul_bindflags);
2303 }
2304 
2305 /*
2306  * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2307  * a critical region in libc.  Return zero if not, else return non-zero.
2308  */
2309 int
2310 _ti_critical(void)
2311 {
2312 	ulwp_t *self = curthread;
2313 	int level = self->ul_critical;
2314 
2315 	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2316 		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2317 	return (level - 1);
2318 }
2319 
2320 /*
2321  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2322  * it does in the old libthread (see the comments in cond_wait_queue()).
2323  * Also, signals are deferred at thread startup until TLS constructors
2324  * have all been called, at which time _thrp_setup() calls sigon().
2325  *
2326  * _sigoff() and _sigon() are external consolidation-private interfaces to
2327  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2328  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2329  * (librtc.so) to defer signals during its critical sections (not to be
2330  * confused with libc critical sections [see exit_critical() above]).
2331  */
2332 void
2333 _sigoff(void)
2334 {
2335 	ulwp_t *self = curthread;
2336 
2337 	sigoff(self);
2338 }
2339 
2340 void
2341 _sigon(void)
2342 {
2343 	ulwp_t *self = curthread;
2344 
2345 	ASSERT(self->ul_sigdefer > 0);
2346 	sigon(self);
2347 }
2348 
2349 int
2350 thr_getconcurrency()
2351 {
2352 	return (thr_concurrency);
2353 }
2354 
2355 int
2356 pthread_getconcurrency()
2357 {
2358 	return (pthread_concurrency);
2359 }
2360 
2361 int
2362 thr_setconcurrency(int new_level)
2363 {
2364 	uberdata_t *udp = curthread->ul_uberdata;
2365 
2366 	if (new_level < 0)
2367 		return (EINVAL);
2368 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2369 		return (EAGAIN);
2370 	lmutex_lock(&udp->link_lock);
2371 	if (new_level > thr_concurrency)
2372 		thr_concurrency = new_level;
2373 	lmutex_unlock(&udp->link_lock);
2374 	return (0);
2375 }
2376 
2377 int
2378 pthread_setconcurrency(int new_level)
2379 {
2380 	if (new_level < 0)
2381 		return (EINVAL);
2382 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2383 		return (EAGAIN);
2384 	pthread_concurrency = new_level;
2385 	return (0);
2386 }
2387 
2388 size_t
2389 thr_min_stack(void)
2390 {
2391 	return (MINSTACK);
2392 }
2393 
2394 int
2395 __nthreads(void)
2396 {
2397 	return (curthread->ul_uberdata->nthreads);
2398 }
2399 
2400 /*
2401  * XXX
2402  * The remainder of this file implements the private interfaces to java for
2403  * garbage collection.  It is no longer used, at least by java 1.2.
2404  * It can all go away once all old JVMs have disappeared.
2405  */
2406 
2407 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2408 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2409 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2410 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2411 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2412 
2413 /*
2414  * Get the available register state for the target thread.
2415  * Return non-volatile registers: TRS_NONVOLATILE
2416  */
2417 #pragma weak _thr_getstate = thr_getstate
2418 int
2419 thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2420 {
2421 	ulwp_t *self = curthread;
2422 	uberdata_t *udp = self->ul_uberdata;
2423 	ulwp_t **ulwpp;
2424 	ulwp_t *ulwp;
2425 	int error = 0;
2426 	int trs_flag = TRS_LWPID;
2427 
2428 	if (tid == 0 || self->ul_lwpid == tid) {
2429 		ulwp = self;
2430 		ulwp_lock(ulwp, udp);
2431 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2432 		ulwp = *ulwpp;
2433 	} else {
2434 		if (flag)
2435 			*flag = TRS_INVALID;
2436 		return (ESRCH);
2437 	}
2438 
2439 	if (ulwp->ul_dead) {
2440 		trs_flag = TRS_INVALID;
2441 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2442 		error = EINVAL;
2443 		trs_flag = TRS_INVALID;
2444 	} else if (ulwp->ul_stop) {
2445 		trs_flag = TRS_NONVOLATILE;
2446 		getgregs(ulwp, rs);
2447 	}
2448 
2449 	if (flag)
2450 		*flag = trs_flag;
2451 	if (lwp)
2452 		*lwp = tid;
2453 	if (ss != NULL)
2454 		(void) _thrp_stksegment(ulwp, ss);
2455 
2456 	ulwp_unlock(ulwp, udp);
2457 	return (error);
2458 }
2459 
2460 /*
2461  * Set the appropriate register state for the target thread.
2462  * This is not used by java.  It exists solely for the MSTC test suite.
2463  */
2464 #pragma weak _thr_setstate = thr_setstate
2465 int
2466 thr_setstate(thread_t tid, int flag, gregset_t rs)
2467 {
2468 	uberdata_t *udp = curthread->ul_uberdata;
2469 	ulwp_t *ulwp;
2470 	int error = 0;
2471 
2472 	if ((ulwp = find_lwp(tid)) == NULL)
2473 		return (ESRCH);
2474 
2475 	if (!ulwp->ul_stop && !suspendedallmutators)
2476 		error = EINVAL;
2477 	else if (rs != NULL) {
2478 		switch (flag) {
2479 		case TRS_NONVOLATILE:
2480 			/* do /proc stuff here? */
2481 			if (ulwp->ul_stop)
2482 				setgregs(ulwp, rs);
2483 			else
2484 				error = EINVAL;
2485 			break;
2486 		case TRS_LWPID:		/* do /proc stuff here? */
2487 		default:
2488 			error = EINVAL;
2489 			break;
2490 		}
2491 	}
2492 
2493 	ulwp_unlock(ulwp, udp);
2494 	return (error);
2495 }
2496 
2497 int
2498 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2499 {
2500 	extern ssize_t __pread(int, void *, size_t, off_t);
2501 	char buf[100];
2502 	int fd;
2503 
2504 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2505 	(void) strcpy(buf, "/proc/self/lwp/");
2506 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2507 	(void) strcat(buf, "/lwpstatus");
2508 	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2509 		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2510 			if (sp->pr_flags & PR_STOPPED) {
2511 				(void) __close(fd);
2512 				return (0);
2513 			}
2514 			yield();	/* give him a chance to stop */
2515 		}
2516 		(void) __close(fd);
2517 	}
2518 	return (-1);
2519 }
2520 
2521 int
2522 putlwpregs(thread_t tid, prgregset_t prp)
2523 {
2524 	extern ssize_t __writev(int, const struct iovec *, int);
2525 	char buf[100];
2526 	int fd;
2527 	long dstop_sreg[2];
2528 	long run_null[2];
2529 	iovec_t iov[3];
2530 
2531 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2532 	(void) strcpy(buf, "/proc/self/lwp/");
2533 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2534 	(void) strcat(buf, "/lwpctl");
2535 	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2536 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2537 		dstop_sreg[1] = PCSREG;		/* set the registers */
2538 		iov[0].iov_base = (caddr_t)dstop_sreg;
2539 		iov[0].iov_len = sizeof (dstop_sreg);
2540 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2541 		iov[1].iov_len = sizeof (prgregset_t);
2542 		run_null[0] = PCRUN;		/* make it runnable again */
2543 		run_null[1] = 0;
2544 		iov[2].iov_base = (caddr_t)run_null;
2545 		iov[2].iov_len = sizeof (run_null);
2546 		if (__writev(fd, iov, 3) >= 0) {
2547 			(void) __close(fd);
2548 			return (0);
2549 		}
2550 		(void) __close(fd);
2551 	}
2552 	return (-1);
2553 }
2554 
2555 static ulong_t
2556 gettsp_slow(thread_t tid)
2557 {
2558 	char buf[100];
2559 	struct lwpstatus status;
2560 
2561 	if (getlwpstatus(tid, &status) != 0) {
2562 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2563 		(void) strcpy(buf, "__gettsp(");
2564 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2565 		(void) strcat(buf, "): can't read lwpstatus");
2566 		thr_panic(buf);
2567 	}
2568 	return (status.pr_reg[R_SP]);
2569 }
2570 
2571 ulong_t
2572 __gettsp(thread_t tid)
2573 {
2574 	uberdata_t *udp = curthread->ul_uberdata;
2575 	ulwp_t *ulwp;
2576 	ulong_t result;
2577 
2578 	if ((ulwp = find_lwp(tid)) == NULL)
2579 		return (0);
2580 
2581 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2582 		ulwp_unlock(ulwp, udp);
2583 		return (result);
2584 	}
2585 
2586 	result = gettsp_slow(tid);
2587 	ulwp_unlock(ulwp, udp);
2588 	return (result);
2589 }
2590 
2591 /*
2592  * This tells java stack walkers how to find the ucontext
2593  * structure passed to signal handlers.
2594  */
2595 #pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2596 void
2597 thr_sighndlrinfo(void (**func)(), int *funcsize)
2598 {
2599 	*func = &__sighndlr;
2600 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2601 }
2602 
2603 /*
2604  * Mark a thread a mutator or reset a mutator to being a default,
2605  * non-mutator thread.
2606  */
2607 #pragma weak _thr_setmutator = thr_setmutator
2608 int
2609 thr_setmutator(thread_t tid, int enabled)
2610 {
2611 	ulwp_t *self = curthread;
2612 	uberdata_t *udp = self->ul_uberdata;
2613 	ulwp_t *ulwp;
2614 	int error;
2615 	int cancel_state;
2616 
2617 	enabled = enabled? 1 : 0;
2618 top:
2619 	if (tid == 0) {
2620 		ulwp = self;
2621 		ulwp_lock(ulwp, udp);
2622 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2623 		return (ESRCH);
2624 	}
2625 
2626 	/*
2627 	 * The target thread should be the caller itself or a suspended thread.
2628 	 * This prevents the target from also changing its ul_mutator field.
2629 	 */
2630 	error = 0;
2631 	if (ulwp != self && !ulwp->ul_stop && enabled)
2632 		error = EINVAL;
2633 	else if (ulwp->ul_mutator != enabled) {
2634 		lmutex_lock(&mutatorslock);
2635 		if (mutatorsbarrier) {
2636 			ulwp_unlock(ulwp, udp);
2637 			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2638 			    &cancel_state);
2639 			while (mutatorsbarrier)
2640 				(void) cond_wait(&mutatorscv, &mutatorslock);
2641 			(void) pthread_setcancelstate(cancel_state, NULL);
2642 			lmutex_unlock(&mutatorslock);
2643 			goto top;
2644 		}
2645 		ulwp->ul_mutator = enabled;
2646 		lmutex_unlock(&mutatorslock);
2647 	}
2648 
2649 	ulwp_unlock(ulwp, udp);
2650 	return (error);
2651 }
2652 
2653 /*
2654  * Establish a barrier against new mutators.  Any non-mutator trying
2655  * to become a mutator is suspended until the barrier is removed.
2656  */
2657 #pragma weak _thr_mutators_barrier = thr_mutators_barrier
2658 void
2659 thr_mutators_barrier(int enabled)
2660 {
2661 	int oldvalue;
2662 	int cancel_state;
2663 
2664 	lmutex_lock(&mutatorslock);
2665 
2666 	/*
2667 	 * Wait if trying to set the barrier while it is already set.
2668 	 */
2669 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2670 	while (mutatorsbarrier && enabled)
2671 		(void) cond_wait(&mutatorscv, &mutatorslock);
2672 	(void) pthread_setcancelstate(cancel_state, NULL);
2673 
2674 	oldvalue = mutatorsbarrier;
2675 	mutatorsbarrier = enabled;
2676 	/*
2677 	 * Wakeup any blocked non-mutators when barrier is removed.
2678 	 */
2679 	if (oldvalue && !enabled)
2680 		(void) cond_broadcast(&mutatorscv);
2681 	lmutex_unlock(&mutatorslock);
2682 }
2683 
2684 /*
2685  * Suspend the set of all mutators except for the caller.  The list
2686  * of actively running threads is searched and only the mutators
2687  * in this list are suspended.  Actively running non-mutators remain
2688  * running.  Any other thread is suspended.
2689  */
2690 #pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2691 int
2692 thr_suspend_allmutators(void)
2693 {
2694 	ulwp_t *self = curthread;
2695 	uberdata_t *udp = self->ul_uberdata;
2696 	ulwp_t *ulwp;
2697 	int link_dropped;
2698 
2699 	/*
2700 	 * We single-thread the entire thread suspend/continue mechanism.
2701 	 */
2702 	fork_lock_enter();
2703 
2704 top:
2705 	lmutex_lock(&udp->link_lock);
2706 
2707 	if (suspendingallmutators || suspendedallmutators) {
2708 		lmutex_unlock(&udp->link_lock);
2709 		fork_lock_exit();
2710 		return (EINVAL);
2711 	}
2712 	suspendingallmutators = 1;
2713 
2714 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2715 		ulwp_lock(ulwp, udp);
2716 		if (!ulwp->ul_mutator) {
2717 			ulwp_unlock(ulwp, udp);
2718 		} else if (ulwp->ul_stop) {	/* already stopped */
2719 			ulwp->ul_stop |= TSTP_MUTATOR;
2720 			ulwp_broadcast(ulwp);
2721 			ulwp_unlock(ulwp, udp);
2722 		} else {
2723 			/*
2724 			 * Move the stopped lwp out of a critical section.
2725 			 */
2726 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2727 			    link_dropped) {
2728 				suspendingallmutators = 0;
2729 				goto top;
2730 			}
2731 		}
2732 	}
2733 
2734 	suspendedallmutators = 1;
2735 	suspendingallmutators = 0;
2736 	lmutex_unlock(&udp->link_lock);
2737 	fork_lock_exit();
2738 	return (0);
2739 }
2740 
2741 /*
2742  * Suspend the target mutator.  The caller is permitted to suspend
2743  * itself.  If a mutator barrier is enabled, the caller will suspend
2744  * itself as though it had been suspended by thr_suspend_allmutators().
2745  * When the barrier is removed, this thread will be resumed.  Any
2746  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2747  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2748  */
2749 #pragma weak _thr_suspend_mutator = thr_suspend_mutator
2750 int
2751 thr_suspend_mutator(thread_t tid)
2752 {
2753 	if (tid == 0)
2754 		tid = curthread->ul_lwpid;
2755 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2756 }
2757 
2758 /*
2759  * Resume the set of all suspended mutators.
2760  */
2761 #pragma weak _thr_continue_allmutators = thr_continue_allmutators
2762 int
2763 thr_continue_allmutators()
2764 {
2765 	ulwp_t *self = curthread;
2766 	uberdata_t *udp = self->ul_uberdata;
2767 	ulwp_t *ulwp;
2768 
2769 	/*
2770 	 * We single-thread the entire thread suspend/continue mechanism.
2771 	 */
2772 	fork_lock_enter();
2773 
2774 	lmutex_lock(&udp->link_lock);
2775 	if (!suspendedallmutators) {
2776 		lmutex_unlock(&udp->link_lock);
2777 		fork_lock_exit();
2778 		return (EINVAL);
2779 	}
2780 	suspendedallmutators = 0;
2781 
2782 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2783 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2784 		lmutex_lock(mp);
2785 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2786 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2787 			ulwp_broadcast(ulwp);
2788 			if (!ulwp->ul_stop)
2789 				force_continue(ulwp);
2790 		}
2791 		lmutex_unlock(mp);
2792 	}
2793 
2794 	lmutex_unlock(&udp->link_lock);
2795 	fork_lock_exit();
2796 	return (0);
2797 }
2798 
2799 /*
2800  * Resume a suspended mutator.
2801  */
2802 #pragma weak _thr_continue_mutator = thr_continue_mutator
2803 int
2804 thr_continue_mutator(thread_t tid)
2805 {
2806 	return (_thrp_continue(tid, TSTP_MUTATOR));
2807 }
2808 
2809 #pragma weak _thr_wait_mutator = thr_wait_mutator
2810 int
2811 thr_wait_mutator(thread_t tid, int dontwait)
2812 {
2813 	uberdata_t *udp = curthread->ul_uberdata;
2814 	ulwp_t *ulwp;
2815 	int cancel_state;
2816 	int error = 0;
2817 
2818 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2819 top:
2820 	if ((ulwp = find_lwp(tid)) == NULL) {
2821 		(void) pthread_setcancelstate(cancel_state, NULL);
2822 		return (ESRCH);
2823 	}
2824 
2825 	if (!ulwp->ul_mutator)
2826 		error = EINVAL;
2827 	else if (dontwait) {
2828 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2829 			error = EWOULDBLOCK;
2830 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2831 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2832 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2833 
2834 		(void) cond_wait(cvp, mp);
2835 		(void) lmutex_unlock(mp);
2836 		goto top;
2837 	}
2838 
2839 	ulwp_unlock(ulwp, udp);
2840 	(void) pthread_setcancelstate(cancel_state, NULL);
2841 	return (error);
2842 }
2843 
2844 /* PROBE_SUPPORT begin */
2845 
2846 void
2847 thr_probe_setup(void *data)
2848 {
2849 	curthread->ul_tpdp = data;
2850 }
2851 
2852 static void *
2853 _thread_probe_getfunc()
2854 {
2855 	return (curthread->ul_tpdp);
2856 }
2857 
2858 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2859 
2860 /* ARGSUSED */
2861 void
2862 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2863 {
2864 	/* never called */
2865 }
2866 
2867 /* ARGSUSED */
2868 void
2869 _resume_ret(ulwp_t *oldlwp)
2870 {
2871 	/* never called */
2872 }
2873 
2874 /* PROBE_SUPPORT end */
2875