xref: /titanic_44/usr/src/lib/libc/port/threads/thr.c (revision fc33347812f84907261f6fd501e2409da108b8d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include "lint.h"
27 #include "thr_uberdata.h"
28 #include <pthread.h>
29 #include <procfs.h>
30 #include <sys/uio.h>
31 #include <ctype.h>
32 #include "libc.h"
33 
34 /*
35  * These symbols should not be exported from libc, but
36  * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
37  * Also, some older versions of the Studio compiler/debugger
38  * components reference them.  These need to be fixed, too.
39  */
40 #pragma weak _thr_main = thr_main
41 #pragma weak _thr_create = thr_create
42 #pragma weak _thr_join = thr_join
43 #pragma weak _thr_self = thr_self
44 
45 #undef errno
46 extern int errno;
47 
48 /*
49  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
50  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
51  * system used it illegally (it is a consolidation private symbol).
52  * To accommodate this and possibly other abusers of the symbol,
53  * we make it always equal to 1 now that libthread has been folded
54  * into libc.  The new __libc_threaded symbol is used to indicate
55  * the new meaning, "more than one thread exists".
56  */
57 int __threaded = 1;		/* always equal to 1 */
58 int __libc_threaded = 0;	/* zero until first thr_create() */
59 
60 /*
61  * thr_concurrency and pthread_concurrency are not used by the library.
62  * They exist solely to hold and return the values set by calls to
63  * thr_setconcurrency() and pthread_setconcurrency().
64  * Because thr_concurrency is affected by the THR_NEW_LWP flag
65  * to thr_create(), thr_concurrency is protected by link_lock.
66  */
67 static	int	thr_concurrency = 1;
68 static	int	pthread_concurrency;
69 
70 #define	HASHTBLSZ	1024	/* must be a power of two */
71 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
72 
73 /* initial allocation, just enough for one lwp */
74 #pragma align 64(init_hash_table)
75 thr_hash_table_t init_hash_table[1] = {
76 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
77 };
78 
79 extern const Lc_interface rtld_funcs[];
80 
81 /*
82  * The weak version is known to libc_db and mdb.
83  */
84 #pragma weak _uberdata = __uberdata
85 uberdata_t __uberdata = {
86 	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
87 	{ RECURSIVEMUTEX, NULL, 0 },	/* ld_lock */
88 	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
89 	{ RECURSIVEMUTEX, NULL, 0 },	/* atfork_lock */
90 	{ RECURSIVEMUTEX, NULL, 0 },	/* callout_lock */
91 	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
92 	{ 0, },				/* tdb_hash_lock_stats */
93 	{ { 0 }, },			/* siguaction[NSIG] */
94 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
95 	{ DEFAULTMUTEX, NULL, 0 },
96 	{ DEFAULTMUTEX, NULL, 0 },
97 	{ DEFAULTMUTEX, NULL, 0 },
98 	{ DEFAULTMUTEX, NULL, 0 },
99 	{ DEFAULTMUTEX, NULL, 0 },
100 	{ DEFAULTMUTEX, NULL, 0 },
101 	{ DEFAULTMUTEX, NULL, 0 },
102 	{ DEFAULTMUTEX, NULL, 0 },
103 	{ DEFAULTMUTEX, NULL, 0 }},
104 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
105 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
106 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
107 	0,			/* primary_map */
108 	0,			/* bucket_init */
109 	0,			/* pad[0] */
110 	0,			/* pad[1] */
111 	{ 0 },			/* uberflags */
112 	NULL,			/* queue_head */
113 	init_hash_table,	/* thr_hash_table */
114 	1,			/* hash_size: size of the hash table */
115 	0,			/* hash_mask: hash_size - 1 */
116 	NULL,			/* ulwp_one */
117 	NULL,			/* all_lwps */
118 	NULL,			/* all_zombies */
119 	0,			/* nthreads */
120 	0,			/* nzombies */
121 	0,			/* ndaemons */
122 	0,			/* pid */
123 	sigacthandler,		/* sigacthandler */
124 	NULL,			/* lwp_stacks */
125 	NULL,			/* lwp_laststack */
126 	0,			/* nfreestack */
127 	10,			/* thread_stack_cache */
128 	NULL,			/* ulwp_freelist */
129 	NULL,			/* ulwp_lastfree */
130 	NULL,			/* ulwp_replace_free */
131 	NULL,			/* ulwp_replace_last */
132 	NULL,			/* atforklist */
133 	NULL,			/* robustlocks */
134 	NULL,			/* robustlist */
135 	NULL,			/* __tdb_bootstrap */
136 	{			/* tdb */
137 		NULL,		/* tdb_sync_addr_hash */
138 		0,		/* tdb_register_count */
139 		0,		/* tdb_hash_alloc_failed */
140 		NULL,		/* tdb_sync_addr_free */
141 		NULL,		/* tdb_sync_addr_last */
142 		0,		/* tdb_sync_alloc */
143 		{ 0, 0 },	/* tdb_ev_global_mask */
144 		tdb_events,	/* tdb_events array */
145 	},
146 };
147 
148 /*
149  * The weak version is known to libc_db and mdb.
150  */
151 #pragma weak _tdb_bootstrap = __tdb_bootstrap
152 uberdata_t **__tdb_bootstrap = NULL;
153 
154 int	thread_queue_fifo = 4;
155 int	thread_queue_dump = 0;
156 int	thread_cond_wait_defer = 0;
157 int	thread_error_detection = 0;
158 int	thread_async_safe = 0;
159 int	thread_stack_cache = 10;
160 int	thread_door_noreserve = 0;
161 int	thread_locks_misaligned = 0;
162 
163 static	ulwp_t	*ulwp_alloc(void);
164 static	void	ulwp_free(ulwp_t *);
165 
166 /*
167  * Insert the lwp into the hash table.
168  */
169 void
170 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
171 {
172 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
173 	udp->thr_hash_table[ix].hash_bucket = ulwp;
174 	ulwp->ul_ix = ix;
175 }
176 
177 void
178 hash_in(ulwp_t *ulwp, uberdata_t *udp)
179 {
180 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
181 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
182 
183 	lmutex_lock(mp);
184 	hash_in_unlocked(ulwp, ix, udp);
185 	lmutex_unlock(mp);
186 }
187 
188 /*
189  * Delete the lwp from the hash table.
190  */
191 void
192 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
193 {
194 	ulwp_t **ulwpp;
195 
196 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
197 	    ulwp != *ulwpp;
198 	    ulwpp = &(*ulwpp)->ul_hash)
199 		;
200 	*ulwpp = ulwp->ul_hash;
201 	ulwp->ul_hash = NULL;
202 	ulwp->ul_ix = -1;
203 }
204 
205 void
206 hash_out(ulwp_t *ulwp, uberdata_t *udp)
207 {
208 	int ix;
209 
210 	if ((ix = ulwp->ul_ix) >= 0) {
211 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
212 
213 		lmutex_lock(mp);
214 		hash_out_unlocked(ulwp, ix, udp);
215 		lmutex_unlock(mp);
216 	}
217 }
218 
219 /*
220  * Retain stack information for thread structures that are being recycled for
221  * new threads.  All other members of the thread structure should be zeroed.
222  */
223 static void
224 ulwp_clean(ulwp_t *ulwp)
225 {
226 	caddr_t stk = ulwp->ul_stk;
227 	size_t mapsiz = ulwp->ul_mapsiz;
228 	size_t guardsize = ulwp->ul_guardsize;
229 	uintptr_t stktop = ulwp->ul_stktop;
230 	size_t stksiz = ulwp->ul_stksiz;
231 
232 	(void) memset(ulwp, 0, sizeof (*ulwp));
233 
234 	ulwp->ul_stk = stk;
235 	ulwp->ul_mapsiz = mapsiz;
236 	ulwp->ul_guardsize = guardsize;
237 	ulwp->ul_stktop = stktop;
238 	ulwp->ul_stksiz = stksiz;
239 }
240 
241 static int stackprot;
242 
243 /*
244  * Answer the question, "Is the lwp in question really dead?"
245  * We must inquire of the operating system to be really sure
246  * because the lwp may have called lwp_exit() but it has not
247  * yet completed the exit.
248  */
249 static int
250 dead_and_buried(ulwp_t *ulwp)
251 {
252 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
253 		return (1);
254 	if (ulwp->ul_dead && ulwp->ul_detached &&
255 	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
256 		ulwp->ul_lwpid = (lwpid_t)(-1);
257 		return (1);
258 	}
259 	return (0);
260 }
261 
262 /*
263  * Attempt to keep the stack cache within the specified cache limit.
264  */
265 static void
266 trim_stack_cache(int cache_limit)
267 {
268 	ulwp_t *self = curthread;
269 	uberdata_t *udp = self->ul_uberdata;
270 	ulwp_t *prev = NULL;
271 	ulwp_t **ulwpp = &udp->lwp_stacks;
272 	ulwp_t *ulwp;
273 
274 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
275 
276 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
277 		if (dead_and_buried(ulwp)) {
278 			*ulwpp = ulwp->ul_next;
279 			if (ulwp == udp->lwp_laststack)
280 				udp->lwp_laststack = prev;
281 			hash_out(ulwp, udp);
282 			udp->nfreestack--;
283 			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
284 			/*
285 			 * Now put the free ulwp on the ulwp freelist.
286 			 */
287 			ulwp->ul_mapsiz = 0;
288 			ulwp->ul_next = NULL;
289 			if (udp->ulwp_freelist == NULL)
290 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
291 			else {
292 				udp->ulwp_lastfree->ul_next = ulwp;
293 				udp->ulwp_lastfree = ulwp;
294 			}
295 		} else {
296 			prev = ulwp;
297 			ulwpp = &ulwp->ul_next;
298 		}
299 	}
300 }
301 
302 /*
303  * Find an unused stack of the requested size
304  * or create a new stack of the requested size.
305  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
306  * thr_exit() stores 1 in the ul_dead member.
307  * thr_join() stores -1 in the ul_lwpid member.
308  */
309 static ulwp_t *
310 find_stack(size_t stksize, size_t guardsize)
311 {
312 	static size_t pagesize = 0;
313 
314 	uberdata_t *udp = curthread->ul_uberdata;
315 	size_t mapsize;
316 	ulwp_t *prev;
317 	ulwp_t *ulwp;
318 	ulwp_t **ulwpp;
319 	void *stk;
320 
321 	/*
322 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
323 	 * unless overridden by the system's configuration.
324 	 */
325 	if (stackprot == 0) {	/* do this once */
326 		long lprot = _sysconf(_SC_STACK_PROT);
327 		if (lprot <= 0)
328 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
329 		stackprot = (int)lprot;
330 	}
331 	if (pagesize == 0)	/* do this once */
332 		pagesize = _sysconf(_SC_PAGESIZE);
333 
334 	/*
335 	 * One megabyte stacks by default, but subtract off
336 	 * two pages for the system-created red zones.
337 	 * Round up a non-zero stack size to a pagesize multiple.
338 	 */
339 	if (stksize == 0)
340 		stksize = DEFAULTSTACK - 2 * pagesize;
341 	else
342 		stksize = ((stksize + pagesize - 1) & -pagesize);
343 
344 	/*
345 	 * Round up the mapping size to a multiple of pagesize.
346 	 * Note: mmap() provides at least one page of red zone
347 	 * so we deduct that from the value of guardsize.
348 	 */
349 	if (guardsize != 0)
350 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
351 	mapsize = stksize + guardsize;
352 
353 	lmutex_lock(&udp->link_lock);
354 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
355 	    (ulwp = *ulwpp) != NULL;
356 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
357 		if (ulwp->ul_mapsiz == mapsize &&
358 		    ulwp->ul_guardsize == guardsize &&
359 		    dead_and_buried(ulwp)) {
360 			/*
361 			 * The previous lwp is gone; reuse the stack.
362 			 * Remove the ulwp from the stack list.
363 			 */
364 			*ulwpp = ulwp->ul_next;
365 			ulwp->ul_next = NULL;
366 			if (ulwp == udp->lwp_laststack)
367 				udp->lwp_laststack = prev;
368 			hash_out(ulwp, udp);
369 			udp->nfreestack--;
370 			lmutex_unlock(&udp->link_lock);
371 			ulwp_clean(ulwp);
372 			return (ulwp);
373 		}
374 	}
375 
376 	/*
377 	 * None of the cached stacks matched our mapping size.
378 	 * Reduce the stack cache to get rid of possibly
379 	 * very old stacks that will never be reused.
380 	 */
381 	if (udp->nfreestack > udp->thread_stack_cache)
382 		trim_stack_cache(udp->thread_stack_cache);
383 	else if (udp->nfreestack > 0)
384 		trim_stack_cache(udp->nfreestack - 1);
385 	lmutex_unlock(&udp->link_lock);
386 
387 	/*
388 	 * Create a new stack.
389 	 */
390 	if ((stk = mmap(NULL, mapsize, stackprot,
391 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
392 		/*
393 		 * We have allocated our stack.  Now allocate the ulwp.
394 		 */
395 		ulwp = ulwp_alloc();
396 		if (ulwp == NULL)
397 			(void) munmap(stk, mapsize);
398 		else {
399 			ulwp->ul_stk = stk;
400 			ulwp->ul_mapsiz = mapsize;
401 			ulwp->ul_guardsize = guardsize;
402 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
403 			ulwp->ul_stksiz = stksize;
404 			if (guardsize)	/* protect the extra red zone */
405 				(void) mprotect(stk, guardsize, PROT_NONE);
406 		}
407 	}
408 	return (ulwp);
409 }
410 
411 /*
412  * Get a ulwp_t structure from the free list or allocate a new one.
413  * Such ulwp_t's do not have a stack allocated by the library.
414  */
415 static ulwp_t *
416 ulwp_alloc(void)
417 {
418 	ulwp_t *self = curthread;
419 	uberdata_t *udp = self->ul_uberdata;
420 	size_t tls_size;
421 	ulwp_t *prev;
422 	ulwp_t *ulwp;
423 	ulwp_t **ulwpp;
424 	caddr_t data;
425 
426 	lmutex_lock(&udp->link_lock);
427 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
428 	    (ulwp = *ulwpp) != NULL;
429 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
430 		if (dead_and_buried(ulwp)) {
431 			*ulwpp = ulwp->ul_next;
432 			ulwp->ul_next = NULL;
433 			if (ulwp == udp->ulwp_lastfree)
434 				udp->ulwp_lastfree = prev;
435 			hash_out(ulwp, udp);
436 			lmutex_unlock(&udp->link_lock);
437 			ulwp_clean(ulwp);
438 			return (ulwp);
439 		}
440 	}
441 	lmutex_unlock(&udp->link_lock);
442 
443 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
444 	data = lmalloc(sizeof (*ulwp) + tls_size);
445 	if (data != NULL) {
446 		/* LINTED pointer cast may result in improper alignment */
447 		ulwp = (ulwp_t *)(data + tls_size);
448 	}
449 	return (ulwp);
450 }
451 
452 /*
453  * Free a ulwp structure.
454  * If there is an associated stack, put it on the stack list and
455  * munmap() previously freed stacks up to the residual cache limit.
456  * Else put it on the ulwp free list and never call lfree() on it.
457  */
458 static void
459 ulwp_free(ulwp_t *ulwp)
460 {
461 	uberdata_t *udp = curthread->ul_uberdata;
462 
463 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
464 	ulwp->ul_next = NULL;
465 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
466 		/*EMPTY*/;
467 	else if (ulwp->ul_mapsiz != 0) {
468 		if (udp->lwp_stacks == NULL)
469 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
470 		else {
471 			udp->lwp_laststack->ul_next = ulwp;
472 			udp->lwp_laststack = ulwp;
473 		}
474 		if (++udp->nfreestack > udp->thread_stack_cache)
475 			trim_stack_cache(udp->thread_stack_cache);
476 	} else {
477 		if (udp->ulwp_freelist == NULL)
478 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
479 		else {
480 			udp->ulwp_lastfree->ul_next = ulwp;
481 			udp->ulwp_lastfree = ulwp;
482 		}
483 	}
484 }
485 
486 /*
487  * Find a named lwp and return a pointer to its hash list location.
488  * On success, returns with the hash lock held.
489  */
490 ulwp_t **
491 find_lwpp(thread_t tid)
492 {
493 	uberdata_t *udp = curthread->ul_uberdata;
494 	int ix = TIDHASH(tid, udp);
495 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
496 	ulwp_t *ulwp;
497 	ulwp_t **ulwpp;
498 
499 	if (tid == 0)
500 		return (NULL);
501 
502 	lmutex_lock(mp);
503 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
504 	    (ulwp = *ulwpp) != NULL;
505 	    ulwpp = &ulwp->ul_hash) {
506 		if (ulwp->ul_lwpid == tid)
507 			return (ulwpp);
508 	}
509 	lmutex_unlock(mp);
510 	return (NULL);
511 }
512 
513 /*
514  * Wake up all lwps waiting on this lwp for some reason.
515  */
516 void
517 ulwp_broadcast(ulwp_t *ulwp)
518 {
519 	ulwp_t *self = curthread;
520 	uberdata_t *udp = self->ul_uberdata;
521 
522 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
523 	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
524 }
525 
526 /*
527  * Find a named lwp and return a pointer to it.
528  * Returns with the hash lock held.
529  */
530 ulwp_t *
531 find_lwp(thread_t tid)
532 {
533 	ulwp_t *self = curthread;
534 	uberdata_t *udp = self->ul_uberdata;
535 	ulwp_t *ulwp = NULL;
536 	ulwp_t **ulwpp;
537 
538 	if (self->ul_lwpid == tid) {
539 		ulwp = self;
540 		ulwp_lock(ulwp, udp);
541 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
542 		ulwp = *ulwpp;
543 	}
544 
545 	if (ulwp && ulwp->ul_dead) {
546 		ulwp_unlock(ulwp, udp);
547 		ulwp = NULL;
548 	}
549 
550 	return (ulwp);
551 }
552 
553 int
554 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
555 	long flags, thread_t *new_thread, size_t guardsize)
556 {
557 	ulwp_t *self = curthread;
558 	uberdata_t *udp = self->ul_uberdata;
559 	ucontext_t uc;
560 	uint_t lwp_flags;
561 	thread_t tid;
562 	int error;
563 	ulwp_t *ulwp;
564 
565 	/*
566 	 * Enforce the restriction of not creating any threads
567 	 * until the primary link map has been initialized.
568 	 * Also, disallow thread creation to a child of vfork().
569 	 */
570 	if (!self->ul_primarymap || self->ul_vfork)
571 		return (ENOTSUP);
572 
573 	if (udp->hash_size == 1)
574 		finish_init();
575 
576 	if ((stk || stksize) && stksize < MINSTACK)
577 		return (EINVAL);
578 
579 	if (stk == NULL) {
580 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
581 			return (ENOMEM);
582 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
583 	} else {
584 		/* initialize the private stack */
585 		if ((ulwp = ulwp_alloc()) == NULL)
586 			return (ENOMEM);
587 		ulwp->ul_stk = stk;
588 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
589 		ulwp->ul_stksiz = stksize;
590 	}
591 	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
592 	ulwp->ul_ix = -1;
593 	ulwp->ul_errnop = &ulwp->ul_errno;
594 
595 	lwp_flags = LWP_SUSPENDED;
596 	if (flags & (THR_DETACHED|THR_DAEMON)) {
597 		flags |= THR_DETACHED;
598 		lwp_flags |= LWP_DETACHED;
599 	}
600 	if (flags & THR_DAEMON)
601 		lwp_flags |= LWP_DAEMON;
602 
603 	/* creating a thread: enforce mt-correctness in mutex_lock() */
604 	self->ul_async_safe = 1;
605 
606 	/* per-thread copies of global variables, for speed */
607 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
608 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
609 	ulwp->ul_error_detection = self->ul_error_detection;
610 	ulwp->ul_async_safe = self->ul_async_safe;
611 	ulwp->ul_max_spinners = self->ul_max_spinners;
612 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
613 	ulwp->ul_queue_spin = self->ul_queue_spin;
614 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
615 	ulwp->ul_misaligned = self->ul_misaligned;
616 
617 	/* new thread inherits creating thread's scheduling parameters */
618 	ulwp->ul_policy = self->ul_policy;
619 	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
620 	ulwp->ul_cid = self->ul_cid;
621 	ulwp->ul_rtclassid = self->ul_rtclassid;
622 
623 	ulwp->ul_primarymap = self->ul_primarymap;
624 	ulwp->ul_self = ulwp;
625 	ulwp->ul_uberdata = udp;
626 
627 	/* debugger support */
628 	ulwp->ul_usropts = flags;
629 
630 #ifdef __sparc
631 	/*
632 	 * We cache several instructions in the thread structure for use
633 	 * by the fasttrap DTrace provider. When changing this, read the
634 	 * comment in fasttrap.h for the all the other places that must
635 	 * be changed.
636 	 */
637 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
638 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
639 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
640 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
641 #endif
642 
643 	ulwp->ul_startpc = func;
644 	ulwp->ul_startarg = arg;
645 	_fpinherit(ulwp);
646 	/*
647 	 * Defer signals on the new thread until its TLS constructors
648 	 * have been called.  _thrp_setup() will call sigon() after
649 	 * it has called tls_setup().
650 	 */
651 	ulwp->ul_sigdefer = 1;
652 
653 	error = setup_context(&uc, _thrp_setup, ulwp,
654 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
655 	if (error != 0 && stk != NULL)	/* inaccessible stack */
656 		error = EFAULT;
657 
658 	/*
659 	 * Call enter_critical() to avoid being suspended until we
660 	 * have linked the new thread into the proper lists.
661 	 * This is necessary because forkall() and fork1() must
662 	 * suspend all threads and they must see a complete list.
663 	 */
664 	enter_critical(self);
665 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
666 	if (error != 0 ||
667 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
668 		exit_critical(self);
669 		ulwp->ul_lwpid = (lwpid_t)(-1);
670 		ulwp->ul_dead = 1;
671 		ulwp->ul_detached = 1;
672 		lmutex_lock(&udp->link_lock);
673 		ulwp_free(ulwp);
674 		lmutex_unlock(&udp->link_lock);
675 		return (error);
676 	}
677 	self->ul_nocancel = 0;	/* cancellation is now possible */
678 	udp->uberflags.uf_mt = 1;
679 	if (new_thread)
680 		*new_thread = tid;
681 	if (flags & THR_DETACHED)
682 		ulwp->ul_detached = 1;
683 	ulwp->ul_lwpid = tid;
684 	ulwp->ul_stop = TSTP_REGULAR;
685 	if (flags & THR_SUSPENDED)
686 		ulwp->ul_created = 1;
687 
688 	lmutex_lock(&udp->link_lock);
689 	ulwp->ul_forw = udp->all_lwps;
690 	ulwp->ul_back = udp->all_lwps->ul_back;
691 	ulwp->ul_back->ul_forw = ulwp;
692 	ulwp->ul_forw->ul_back = ulwp;
693 	hash_in(ulwp, udp);
694 	udp->nthreads++;
695 	if (flags & THR_DAEMON)
696 		udp->ndaemons++;
697 	if (flags & THR_NEW_LWP)
698 		thr_concurrency++;
699 	__libc_threaded = 1;		/* inform stdio */
700 	lmutex_unlock(&udp->link_lock);
701 
702 	if (__td_event_report(self, TD_CREATE, udp)) {
703 		self->ul_td_evbuf.eventnum = TD_CREATE;
704 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
705 		tdb_event(TD_CREATE, udp);
706 	}
707 
708 	exit_critical(self);
709 
710 	if (!(flags & THR_SUSPENDED))
711 		(void) _thrp_continue(tid, TSTP_REGULAR);
712 
713 	return (0);
714 }
715 
716 int
717 thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
718 	long flags, thread_t *new_thread)
719 {
720 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
721 }
722 
723 /*
724  * A special cancellation cleanup hook for DCE.
725  * cleanuphndlr, when it is not NULL, will contain a callback
726  * function to be called before a thread is terminated in
727  * thr_exit() as a result of being cancelled.
728  */
729 static void (*cleanuphndlr)(void) = NULL;
730 
731 /*
732  * _pthread_setcleanupinit: sets the cleanup hook.
733  */
734 int
735 _pthread_setcleanupinit(void (*func)(void))
736 {
737 	cleanuphndlr = func;
738 	return (0);
739 }
740 
741 void
742 _thrp_exit()
743 {
744 	ulwp_t *self = curthread;
745 	uberdata_t *udp = self->ul_uberdata;
746 	ulwp_t *replace = NULL;
747 
748 	if (__td_event_report(self, TD_DEATH, udp)) {
749 		self->ul_td_evbuf.eventnum = TD_DEATH;
750 		tdb_event(TD_DEATH, udp);
751 	}
752 
753 	ASSERT(self->ul_sigdefer != 0);
754 
755 	lmutex_lock(&udp->link_lock);
756 	udp->nthreads--;
757 	if (self->ul_usropts & THR_NEW_LWP)
758 		thr_concurrency--;
759 	if (self->ul_usropts & THR_DAEMON)
760 		udp->ndaemons--;
761 	else if (udp->nthreads == udp->ndaemons) {
762 		/*
763 		 * We are the last non-daemon thread exiting.
764 		 * Exit the process.  We retain our TSD and TLS so
765 		 * that atexit() application functions can use them.
766 		 */
767 		lmutex_unlock(&udp->link_lock);
768 		exit(0);
769 		thr_panic("_thrp_exit(): exit(0) returned");
770 	}
771 	lmutex_unlock(&udp->link_lock);
772 
773 	tsd_exit();		/* deallocate thread-specific data */
774 	tls_exit();		/* deallocate thread-local storage */
775 	heldlock_exit();	/* deal with left-over held locks */
776 
777 	/* block all signals to finish exiting */
778 	block_all_signals(self);
779 	/* also prevent ourself from being suspended */
780 	enter_critical(self);
781 	rwl_free(self);
782 	lmutex_lock(&udp->link_lock);
783 	ulwp_free(self);
784 	(void) ulwp_lock(self, udp);
785 
786 	if (self->ul_mapsiz && !self->ul_detached) {
787 		/*
788 		 * We want to free the stack for reuse but must keep
789 		 * the ulwp_t struct for the benefit of thr_join().
790 		 * For this purpose we allocate a replacement ulwp_t.
791 		 */
792 		if ((replace = udp->ulwp_replace_free) == NULL)
793 			replace = lmalloc(REPLACEMENT_SIZE);
794 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
795 			udp->ulwp_replace_last = NULL;
796 	}
797 
798 	if (udp->all_lwps == self)
799 		udp->all_lwps = self->ul_forw;
800 	if (udp->all_lwps == self)
801 		udp->all_lwps = NULL;
802 	else {
803 		self->ul_forw->ul_back = self->ul_back;
804 		self->ul_back->ul_forw = self->ul_forw;
805 	}
806 	self->ul_forw = self->ul_back = NULL;
807 #if defined(THREAD_DEBUG)
808 	/* collect queue lock statistics before marking ourself dead */
809 	record_spin_locks(self);
810 #endif
811 	self->ul_dead = 1;
812 	self->ul_pleasestop = 0;
813 	if (replace != NULL) {
814 		int ix = self->ul_ix;		/* the hash index */
815 		(void) memcpy(replace, self, REPLACEMENT_SIZE);
816 		replace->ul_self = replace;
817 		replace->ul_next = NULL;	/* clone not on stack list */
818 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
819 		replace->ul_replace = 1;	/* requires clone to be freed */
820 		hash_out_unlocked(self, ix, udp);
821 		hash_in_unlocked(replace, ix, udp);
822 		ASSERT(!(self->ul_detached));
823 		self->ul_detached = 1;		/* this frees the stack */
824 		self->ul_schedctl = NULL;
825 		self->ul_schedctl_called = &udp->uberflags;
826 		set_curthread(self = replace);
827 		/*
828 		 * Having just changed the address of curthread, we
829 		 * must reset the ownership of the locks we hold so
830 		 * that assertions will not fire when we release them.
831 		 */
832 		udp->link_lock.mutex_owner = (uintptr_t)self;
833 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
834 		/*
835 		 * NOTE:
836 		 * On i386, %gs still references the original, not the
837 		 * replacement, ulwp structure.  Fetching the replacement
838 		 * curthread pointer via %gs:0 works correctly since the
839 		 * original ulwp structure will not be reallocated until
840 		 * this lwp has completed its lwp_exit() system call (see
841 		 * dead_and_buried()), but from here on out, we must make
842 		 * no references to %gs:<offset> other than %gs:0.
843 		 */
844 	}
845 	/*
846 	 * Put non-detached terminated threads in the all_zombies list.
847 	 */
848 	if (!self->ul_detached) {
849 		udp->nzombies++;
850 		if (udp->all_zombies == NULL) {
851 			ASSERT(udp->nzombies == 1);
852 			udp->all_zombies = self->ul_forw = self->ul_back = self;
853 		} else {
854 			self->ul_forw = udp->all_zombies;
855 			self->ul_back = udp->all_zombies->ul_back;
856 			self->ul_back->ul_forw = self;
857 			self->ul_forw->ul_back = self;
858 		}
859 	}
860 	/*
861 	 * Notify everyone waiting for this thread.
862 	 */
863 	ulwp_broadcast(self);
864 	(void) ulwp_unlock(self, udp);
865 	/*
866 	 * Prevent any more references to the schedctl data.
867 	 * We are exiting and continue_fork() may not find us.
868 	 * Do this just before dropping link_lock, since fork
869 	 * serializes on link_lock.
870 	 */
871 	self->ul_schedctl = NULL;
872 	self->ul_schedctl_called = &udp->uberflags;
873 	lmutex_unlock(&udp->link_lock);
874 
875 	ASSERT(self->ul_critical == 1);
876 	ASSERT(self->ul_preempt == 0);
877 	_lwp_terminate();	/* never returns */
878 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
879 }
880 
881 #if defined(THREAD_DEBUG)
882 void
883 collect_queue_statistics()
884 {
885 	uberdata_t *udp = curthread->ul_uberdata;
886 	ulwp_t *ulwp;
887 
888 	if (thread_queue_dump) {
889 		lmutex_lock(&udp->link_lock);
890 		if ((ulwp = udp->all_lwps) != NULL) {
891 			do {
892 				record_spin_locks(ulwp);
893 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
894 		}
895 		lmutex_unlock(&udp->link_lock);
896 	}
897 }
898 #endif
899 
900 static void __NORETURN
901 _thrp_exit_common(void *status, int unwind)
902 {
903 	ulwp_t *self = curthread;
904 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
905 
906 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
907 
908 	/*
909 	 * Disable cancellation and call the special DCE cancellation
910 	 * cleanup hook if it is enabled.  Do nothing else before calling
911 	 * the DCE cancellation cleanup hook; it may call longjmp() and
912 	 * never return here.
913 	 */
914 	self->ul_cancel_disabled = 1;
915 	self->ul_cancel_async = 0;
916 	self->ul_save_async = 0;
917 	self->ul_cancelable = 0;
918 	self->ul_cancel_pending = 0;
919 	set_cancel_pending_flag(self, 1);
920 	if (cancelled && cleanuphndlr != NULL)
921 		(*cleanuphndlr)();
922 
923 	/*
924 	 * Block application signals while we are exiting.
925 	 * We call out to C++, TSD, and TLS destructors while exiting
926 	 * and these are application-defined, so we cannot be assured
927 	 * that they won't reset the signal mask.  We use sigoff() to
928 	 * defer any signals that may be received as a result of this
929 	 * bad behavior.  Such signals will be lost to the process
930 	 * when the thread finishes exiting.
931 	 */
932 	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
933 	sigoff(self);
934 
935 	self->ul_rval = status;
936 
937 	/*
938 	 * If thr_exit is being called from the places where
939 	 * C++ destructors are to be called such as cancellation
940 	 * points, then set this flag. It is checked in _t_cancel()
941 	 * to decide whether _ex_unwind() is to be called or not.
942 	 */
943 	if (unwind)
944 		self->ul_unwind = 1;
945 
946 	/*
947 	 * _thrp_unwind() will eventually call _thrp_exit().
948 	 * It never returns.
949 	 */
950 	_thrp_unwind(NULL);
951 	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
952 
953 	for (;;)	/* to shut the compiler up about __NORETURN */
954 		continue;
955 }
956 
957 /*
958  * Called when a thread returns from its start function.
959  * We are at the top of the stack; no unwinding is necessary.
960  */
961 void
962 _thrp_terminate(void *status)
963 {
964 	_thrp_exit_common(status, 0);
965 }
966 
967 #pragma weak pthread_exit = thr_exit
968 #pragma weak _thr_exit = thr_exit
969 void
970 thr_exit(void *status)
971 {
972 	_thrp_exit_common(status, 1);
973 }
974 
975 int
976 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
977 {
978 	uberdata_t *udp = curthread->ul_uberdata;
979 	mutex_t *mp;
980 	void *rval;
981 	thread_t found;
982 	ulwp_t *ulwp;
983 	ulwp_t **ulwpp;
984 	int replace;
985 	int error;
986 
987 	if (do_cancel)
988 		error = lwp_wait(tid, &found);
989 	else {
990 		while ((error = __lwp_wait(tid, &found)) == EINTR)
991 			;
992 	}
993 	if (error)
994 		return (error);
995 
996 	/*
997 	 * We must hold link_lock to avoid a race condition with find_stack().
998 	 */
999 	lmutex_lock(&udp->link_lock);
1000 	if ((ulwpp = find_lwpp(found)) == NULL) {
1001 		/*
1002 		 * lwp_wait() found an lwp that the library doesn't know
1003 		 * about.  It must have been created with _lwp_create().
1004 		 * Just return its lwpid; we can't know its status.
1005 		 */
1006 		lmutex_unlock(&udp->link_lock);
1007 		rval = NULL;
1008 	} else {
1009 		/*
1010 		 * Remove ulwp from the hash table.
1011 		 */
1012 		ulwp = *ulwpp;
1013 		*ulwpp = ulwp->ul_hash;
1014 		ulwp->ul_hash = NULL;
1015 		/*
1016 		 * Remove ulwp from all_zombies list.
1017 		 */
1018 		ASSERT(udp->nzombies >= 1);
1019 		if (udp->all_zombies == ulwp)
1020 			udp->all_zombies = ulwp->ul_forw;
1021 		if (udp->all_zombies == ulwp)
1022 			udp->all_zombies = NULL;
1023 		else {
1024 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1025 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1026 		}
1027 		ulwp->ul_forw = ulwp->ul_back = NULL;
1028 		udp->nzombies--;
1029 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1030 		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1031 		/*
1032 		 * We can't call ulwp_unlock(ulwp) after we set
1033 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1034 		 * ulwp's hash table mutex now in order to unlock it below.
1035 		 */
1036 		mp = ulwp_mutex(ulwp, udp);
1037 		ulwp->ul_lwpid = (lwpid_t)(-1);
1038 		ulwp->ul_ix = -1;
1039 		rval = ulwp->ul_rval;
1040 		replace = ulwp->ul_replace;
1041 		lmutex_unlock(mp);
1042 		if (replace) {
1043 			ulwp->ul_next = NULL;
1044 			if (udp->ulwp_replace_free == NULL)
1045 				udp->ulwp_replace_free =
1046 				    udp->ulwp_replace_last = ulwp;
1047 			else {
1048 				udp->ulwp_replace_last->ul_next = ulwp;
1049 				udp->ulwp_replace_last = ulwp;
1050 			}
1051 		}
1052 		lmutex_unlock(&udp->link_lock);
1053 	}
1054 
1055 	if (departed != NULL)
1056 		*departed = found;
1057 	if (status != NULL)
1058 		*status = rval;
1059 	return (0);
1060 }
1061 
1062 int
1063 thr_join(thread_t tid, thread_t *departed, void **status)
1064 {
1065 	int error = _thrp_join(tid, departed, status, 1);
1066 	return ((error == EINVAL)? ESRCH : error);
1067 }
1068 
1069 /*
1070  * pthread_join() differs from Solaris thr_join():
1071  * It does not return the departed thread's id
1072  * and hence does not have a "departed" argument.
1073  * It returns EINVAL if tid refers to a detached thread.
1074  */
1075 #pragma weak _pthread_join = pthread_join
1076 int
1077 pthread_join(pthread_t tid, void **status)
1078 {
1079 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1080 }
1081 
1082 int
1083 pthread_detach(pthread_t tid)
1084 {
1085 	uberdata_t *udp = curthread->ul_uberdata;
1086 	ulwp_t *ulwp;
1087 	ulwp_t **ulwpp;
1088 	int error = 0;
1089 
1090 	if ((ulwpp = find_lwpp(tid)) == NULL)
1091 		return (ESRCH);
1092 	ulwp = *ulwpp;
1093 
1094 	if (ulwp->ul_dead) {
1095 		ulwp_unlock(ulwp, udp);
1096 		error = _thrp_join(tid, NULL, NULL, 0);
1097 	} else {
1098 		error = __lwp_detach(tid);
1099 		ulwp->ul_detached = 1;
1100 		ulwp->ul_usropts |= THR_DETACHED;
1101 		ulwp_unlock(ulwp, udp);
1102 	}
1103 	return (error);
1104 }
1105 
1106 static const char *
1107 ematch(const char *ev, const char *match)
1108 {
1109 	int c;
1110 
1111 	while ((c = *match++) != '\0') {
1112 		if (*ev++ != c)
1113 			return (NULL);
1114 	}
1115 	if (*ev++ != '=')
1116 		return (NULL);
1117 	return (ev);
1118 }
1119 
1120 static int
1121 envvar(const char *ev, const char *match, int limit)
1122 {
1123 	int val = -1;
1124 	const char *ename;
1125 
1126 	if ((ename = ematch(ev, match)) != NULL) {
1127 		int c;
1128 		for (val = 0; (c = *ename) != '\0'; ename++) {
1129 			if (!isdigit(c)) {
1130 				val = -1;
1131 				break;
1132 			}
1133 			val = val * 10 + (c - '0');
1134 			if (val > limit) {
1135 				val = limit;
1136 				break;
1137 			}
1138 		}
1139 	}
1140 	return (val);
1141 }
1142 
1143 static void
1144 etest(const char *ev)
1145 {
1146 	int value;
1147 
1148 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1149 		thread_queue_spin = value;
1150 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1151 		thread_adaptive_spin = value;
1152 	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1153 		thread_max_spinners = value;
1154 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1155 		thread_queue_fifo = value;
1156 #if defined(THREAD_DEBUG)
1157 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1158 		thread_queue_verify = value;
1159 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1160 		thread_queue_dump = value;
1161 #endif
1162 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1163 		thread_stack_cache = value;
1164 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1165 		thread_cond_wait_defer = value;
1166 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1167 		thread_error_detection = value;
1168 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1169 		thread_async_safe = value;
1170 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1171 		thread_door_noreserve = value;
1172 	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1173 		thread_locks_misaligned = value;
1174 }
1175 
1176 /*
1177  * Look for and evaluate environment variables of the form "_THREAD_*".
1178  * For compatibility with the past, we also look for environment
1179  * names of the form "LIBTHREAD_*".
1180  */
1181 static void
1182 set_thread_vars()
1183 {
1184 	extern const char **_environ;
1185 	const char **pev;
1186 	const char *ev;
1187 	char c;
1188 
1189 	if ((pev = _environ) == NULL)
1190 		return;
1191 	while ((ev = *pev++) != NULL) {
1192 		c = *ev;
1193 		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1194 			etest(ev + 8);
1195 		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1196 			etest(ev + 10);
1197 	}
1198 }
1199 
1200 /* PROBE_SUPPORT begin */
1201 #pragma weak __tnf_probe_notify
1202 extern void __tnf_probe_notify(void);
1203 /* PROBE_SUPPORT end */
1204 
1205 /* same as atexit() but private to the library */
1206 extern int _atexit(void (*)(void));
1207 
1208 /* same as _cleanup() but private to the library */
1209 extern void __cleanup(void);
1210 
1211 extern void atfork_init(void);
1212 
1213 #ifdef __amd64
1214 extern void __proc64id(void);
1215 #endif
1216 
1217 /*
1218  * libc_init() is called by ld.so.1 for library initialization.
1219  * We perform minimal initialization; enough to work with the main thread.
1220  */
1221 void
1222 libc_init(void)
1223 {
1224 	uberdata_t *udp = &__uberdata;
1225 	ulwp_t *oldself = __curthread();
1226 	ucontext_t uc;
1227 	ulwp_t *self;
1228 	struct rlimit rl;
1229 	caddr_t data;
1230 	size_t tls_size;
1231 	int setmask;
1232 
1233 	/*
1234 	 * For the initial stage of initialization, we must be careful
1235 	 * not to call any function that could possibly call _cerror().
1236 	 * For this purpose, we call only the raw system call wrappers.
1237 	 */
1238 
1239 #ifdef __amd64
1240 	/*
1241 	 * Gather information about cache layouts for optimized
1242 	 * AMD and Intel assembler strfoo() and memfoo() functions.
1243 	 */
1244 	__proc64id();
1245 #endif
1246 
1247 	/*
1248 	 * Every libc, regardless of which link map, must register __cleanup().
1249 	 */
1250 	(void) _atexit(__cleanup);
1251 
1252 	/*
1253 	 * We keep our uberdata on one of (a) the first alternate link map
1254 	 * or (b) the primary link map.  We switch to the primary link map
1255 	 * and stay there once we see it.  All intermediate link maps are
1256 	 * subject to being unloaded at any time.
1257 	 */
1258 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1259 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1260 		mutex_setup();
1261 		atfork_init();	/* every link map needs atfork() processing */
1262 		return;
1263 	}
1264 
1265 	/*
1266 	 * To establish the main stack information, we have to get our context.
1267 	 * This is also convenient to use for getting our signal mask.
1268 	 */
1269 	uc.uc_flags = UC_ALL;
1270 	(void) __getcontext(&uc);
1271 	ASSERT(uc.uc_link == NULL);
1272 
1273 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1274 	ASSERT(primary_link_map || tls_size == 0);
1275 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1276 	if (data == NULL)
1277 		thr_panic("cannot allocate thread structure for main thread");
1278 	/* LINTED pointer cast may result in improper alignment */
1279 	self = (ulwp_t *)(data + tls_size);
1280 	init_hash_table[0].hash_bucket = self;
1281 
1282 	self->ul_sigmask = uc.uc_sigmask;
1283 	delete_reserved_signals(&self->ul_sigmask);
1284 	/*
1285 	 * Are the old and new sets different?
1286 	 * (This can happen if we are currently blocking SIGCANCEL.)
1287 	 * If so, we must explicitly set our signal mask, below.
1288 	 */
1289 	setmask =
1290 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1291 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1292 	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1293 	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1294 
1295 #ifdef __sparc
1296 	/*
1297 	 * We cache several instructions in the thread structure for use
1298 	 * by the fasttrap DTrace provider. When changing this, read the
1299 	 * comment in fasttrap.h for the all the other places that must
1300 	 * be changed.
1301 	 */
1302 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1303 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1304 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1305 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1306 #endif
1307 
1308 	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1309 	(void) getrlimit(RLIMIT_STACK, &rl);
1310 	self->ul_stksiz = rl.rlim_cur;
1311 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1312 
1313 	self->ul_forw = self->ul_back = self;
1314 	self->ul_hash = NULL;
1315 	self->ul_ix = 0;
1316 	self->ul_lwpid = 1; /* _lwp_self() */
1317 	self->ul_main = 1;
1318 	self->ul_self = self;
1319 	self->ul_policy = -1;		/* initialize only when needed */
1320 	self->ul_pri = 0;
1321 	self->ul_cid = 0;
1322 	self->ul_rtclassid = -1;
1323 	self->ul_uberdata = udp;
1324 	if (oldself != NULL) {
1325 		int i;
1326 
1327 		ASSERT(primary_link_map);
1328 		ASSERT(oldself->ul_main == 1);
1329 		self->ul_stsd = oldself->ul_stsd;
1330 		for (i = 0; i < TSD_NFAST; i++)
1331 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1332 		self->ul_tls = oldself->ul_tls;
1333 		/*
1334 		 * Retrieve all pointers to uberdata allocated
1335 		 * while running on previous link maps.
1336 		 * We would like to do a structure assignment here, but
1337 		 * gcc turns structure assignments into calls to memcpy(),
1338 		 * a function exported from libc.  We can't call any such
1339 		 * external functions until we establish curthread, below,
1340 		 * so we just call our private version of memcpy().
1341 		 */
1342 		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1343 		/*
1344 		 * These items point to global data on the primary link map.
1345 		 */
1346 		udp->thr_hash_table = init_hash_table;
1347 		udp->sigacthandler = sigacthandler;
1348 		udp->tdb.tdb_events = tdb_events;
1349 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1350 		ASSERT(udp->lwp_stacks == NULL);
1351 		ASSERT(udp->ulwp_freelist == NULL);
1352 		ASSERT(udp->ulwp_replace_free == NULL);
1353 		ASSERT(udp->hash_size == 1);
1354 	}
1355 	udp->all_lwps = self;
1356 	udp->ulwp_one = self;
1357 	udp->pid = getpid();
1358 	udp->nthreads = 1;
1359 	/*
1360 	 * In every link map, tdb_bootstrap points to the same piece of
1361 	 * allocated memory.  When the primary link map is initialized,
1362 	 * the allocated memory is assigned a pointer to the one true
1363 	 * uberdata.  This allows libc_db to initialize itself regardless
1364 	 * of which instance of libc it finds in the address space.
1365 	 */
1366 	if (udp->tdb_bootstrap == NULL)
1367 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1368 	__tdb_bootstrap = udp->tdb_bootstrap;
1369 	if (primary_link_map) {
1370 		self->ul_primarymap = 1;
1371 		udp->primary_map = 1;
1372 		*udp->tdb_bootstrap = udp;
1373 	}
1374 	/*
1375 	 * Cancellation can't happen until:
1376 	 *	pthread_cancel() is called
1377 	 * or:
1378 	 *	another thread is created
1379 	 * For now, as a single-threaded process, set the flag that tells
1380 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1381 	 */
1382 	self->ul_nocancel = 1;
1383 
1384 #if defined(__amd64)
1385 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1386 #elif defined(__i386)
1387 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1388 #endif	/* __i386 || __amd64 */
1389 	set_curthread(self);		/* redundant on i386 */
1390 	/*
1391 	 * Now curthread is established and it is safe to call any
1392 	 * function in libc except one that uses thread-local storage.
1393 	 */
1394 	self->ul_errnop = &errno;
1395 	if (oldself != NULL) {
1396 		/* tls_size was zero when oldself was allocated */
1397 		lfree(oldself, sizeof (ulwp_t));
1398 	}
1399 	mutex_setup();
1400 	atfork_init();
1401 	signal_init();
1402 
1403 	/*
1404 	 * If the stack is unlimited, we set the size to zero to disable
1405 	 * stack checking.
1406 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1407 	 */
1408 	if (self->ul_stksiz == RLIM_INFINITY) {
1409 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1410 		self->ul_ustack.ss_size = 0;
1411 	} else {
1412 		self->ul_ustack.ss_sp = self->ul_stk;
1413 		self->ul_ustack.ss_size = self->ul_stksiz;
1414 	}
1415 	self->ul_ustack.ss_flags = 0;
1416 	(void) setustack(&self->ul_ustack);
1417 
1418 	/*
1419 	 * Get the variables that affect thread behavior from the environment.
1420 	 */
1421 	set_thread_vars();
1422 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1423 	udp->thread_stack_cache = thread_stack_cache;
1424 
1425 	/*
1426 	 * Make per-thread copies of global variables, for speed.
1427 	 */
1428 	self->ul_queue_fifo = (char)thread_queue_fifo;
1429 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1430 	self->ul_error_detection = (char)thread_error_detection;
1431 	self->ul_async_safe = (char)thread_async_safe;
1432 	self->ul_door_noreserve = (char)thread_door_noreserve;
1433 	self->ul_misaligned = (char)thread_locks_misaligned;
1434 	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1435 	self->ul_adaptive_spin = thread_adaptive_spin;
1436 	self->ul_queue_spin = thread_queue_spin;
1437 
1438 #if defined(__sparc) && !defined(_LP64)
1439 	if (self->ul_misaligned) {
1440 		/*
1441 		 * Tell the kernel to fix up ldx/stx instructions that
1442 		 * refer to non-8-byte aligned data instead of giving
1443 		 * the process an alignment trap and generating SIGBUS.
1444 		 *
1445 		 * Programs compiled for 32-bit sparc with the Studio SS12
1446 		 * compiler get this done for them automatically (in _init()).
1447 		 * We do it here for the benefit of programs compiled with
1448 		 * other compilers, like gcc.
1449 		 *
1450 		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1451 		 * environment variable horrible hack to work.
1452 		 */
1453 		extern void _do_fix_align(void);
1454 		_do_fix_align();
1455 	}
1456 #endif
1457 
1458 	/*
1459 	 * When we have initialized the primary link map, inform
1460 	 * the dynamic linker about our interface functions.
1461 	 */
1462 	if (self->ul_primarymap)
1463 		_ld_libc((void *)rtld_funcs);
1464 
1465 	/*
1466 	 * Defer signals until TLS constructors have been called.
1467 	 */
1468 	sigoff(self);
1469 	tls_setup();
1470 	sigon(self);
1471 	if (setmask)
1472 		(void) restore_signals(self);
1473 
1474 	/*
1475 	 * Make private copies of __xpg4 and __xpg6 so libc can test
1476 	 * them after this point without invoking the dynamic linker.
1477 	 */
1478 	libc__xpg4 = __xpg4;
1479 	libc__xpg6 = __xpg6;
1480 
1481 	/* PROBE_SUPPORT begin */
1482 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1483 		__tnf_probe_notify();
1484 	/* PROBE_SUPPORT end */
1485 
1486 	init_sigev_thread();
1487 	init_aio();
1488 
1489 	/*
1490 	 * We need to reset __threaded dynamically at runtime, so that
1491 	 * __threaded can be bound to __threaded outside libc which may not
1492 	 * have initial value of 1 (without a copy relocation in a.out).
1493 	 */
1494 	__threaded = 1;
1495 }
1496 
1497 #pragma fini(libc_fini)
1498 void
1499 libc_fini()
1500 {
1501 	/*
1502 	 * If we are doing fini processing for the instance of libc
1503 	 * on the first alternate link map (this happens only when
1504 	 * the dynamic linker rejects a bad audit library), then clear
1505 	 * __curthread().  We abandon whatever memory was allocated by
1506 	 * lmalloc() while running on this alternate link-map but we
1507 	 * don't care (and can't find the memory in any case); we just
1508 	 * want to protect the application from this bad audit library.
1509 	 * No fini processing is done by libc in the normal case.
1510 	 */
1511 
1512 	uberdata_t *udp = curthread->ul_uberdata;
1513 
1514 	if (udp->primary_map == 0 && udp == &__uberdata)
1515 		set_curthread(NULL);
1516 }
1517 
1518 /*
1519  * finish_init is called when we are about to become multi-threaded,
1520  * that is, on the first call to thr_create().
1521  */
1522 void
1523 finish_init()
1524 {
1525 	ulwp_t *self = curthread;
1526 	uberdata_t *udp = self->ul_uberdata;
1527 	thr_hash_table_t *htp;
1528 	void *data;
1529 	int i;
1530 
1531 	/*
1532 	 * No locks needed here; we are single-threaded on the first call.
1533 	 * We can be called only after the primary link map has been set up.
1534 	 */
1535 	ASSERT(self->ul_primarymap);
1536 	ASSERT(self == udp->ulwp_one);
1537 	ASSERT(!udp->uberflags.uf_mt);
1538 	ASSERT(udp->hash_size == 1);
1539 
1540 	/*
1541 	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1542 	 */
1543 	update_sched(self);
1544 
1545 	/*
1546 	 * Allocate the queue_head array if not already allocated.
1547 	 */
1548 	if (udp->queue_head == NULL)
1549 		queue_alloc();
1550 
1551 	/*
1552 	 * Now allocate the thread hash table.
1553 	 */
1554 	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1555 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1556 	    == MAP_FAILED)
1557 		thr_panic("cannot allocate thread hash table");
1558 
1559 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1560 	udp->hash_size = HASHTBLSZ;
1561 	udp->hash_mask = HASHTBLSZ - 1;
1562 
1563 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1564 		htp->hash_lock.mutex_flag = LOCK_INITED;
1565 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1566 		htp->hash_cond.cond_magic = COND_MAGIC;
1567 	}
1568 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1569 
1570 	/*
1571 	 * Set up the SIGCANCEL handler for threads cancellation.
1572 	 */
1573 	setup_cancelsig(SIGCANCEL);
1574 
1575 	/*
1576 	 * Arrange to do special things on exit --
1577 	 * - collect queue statistics from all remaining active threads.
1578 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1579 	 * - grab assert_lock to ensure that assertion failures
1580 	 *   and a core dump take precedence over _exit().
1581 	 * (Functions are called in the reverse order of their registration.)
1582 	 */
1583 	(void) _atexit(grab_assert_lock);
1584 #if defined(THREAD_DEBUG)
1585 	(void) _atexit(dump_queue_statistics);
1586 	(void) _atexit(collect_queue_statistics);
1587 #endif
1588 }
1589 
1590 /*
1591  * Used only by postfork1_child(), below.
1592  */
1593 static void
1594 mark_dead_and_buried(ulwp_t *ulwp)
1595 {
1596 	ulwp->ul_dead = 1;
1597 	ulwp->ul_lwpid = (lwpid_t)(-1);
1598 	ulwp->ul_hash = NULL;
1599 	ulwp->ul_ix = -1;
1600 	ulwp->ul_schedctl = NULL;
1601 	ulwp->ul_schedctl_called = NULL;
1602 }
1603 
1604 /*
1605  * This is called from fork1() in the child.
1606  * Reset our data structures to reflect one lwp.
1607  */
1608 void
1609 postfork1_child()
1610 {
1611 	ulwp_t *self = curthread;
1612 	uberdata_t *udp = self->ul_uberdata;
1613 	queue_head_t *qp;
1614 	ulwp_t *next;
1615 	ulwp_t *ulwp;
1616 	int i;
1617 
1618 	/* daemon threads shouldn't call fork1(), but oh well... */
1619 	self->ul_usropts &= ~THR_DAEMON;
1620 	udp->nthreads = 1;
1621 	udp->ndaemons = 0;
1622 	udp->uberflags.uf_mt = 0;
1623 	__libc_threaded = 0;
1624 	for (i = 0; i < udp->hash_size; i++)
1625 		udp->thr_hash_table[i].hash_bucket = NULL;
1626 	self->ul_lwpid = _lwp_self();
1627 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1628 
1629 	/*
1630 	 * Some thread in the parent might have been suspended
1631 	 * while holding udp->callout_lock or udp->ld_lock.
1632 	 * Reinitialize the child's copies.
1633 	 */
1634 	(void) mutex_init(&udp->callout_lock,
1635 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1636 	(void) mutex_init(&udp->ld_lock,
1637 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1638 
1639 	/* no one in the child is on a sleep queue; reinitialize */
1640 	if ((qp = udp->queue_head) != NULL) {
1641 		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1642 		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1643 			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1644 			qp->qh_lock.mutex_flag = LOCK_INITED;
1645 			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1646 			qp->qh_hlist = &qp->qh_def_root;
1647 #if defined(THREAD_DEBUG)
1648 			qp->qh_hlen = 1;
1649 			qp->qh_hmax = 1;
1650 #endif
1651 		}
1652 	}
1653 
1654 	/*
1655 	 * Do post-fork1 processing for subsystems that need it.
1656 	 * We need to do this before unmapping all of the abandoned
1657 	 * threads' stacks, below(), because the post-fork1 actions
1658 	 * might require access to those stacks.
1659 	 */
1660 	postfork1_child_sigev_aio();
1661 	postfork1_child_sigev_mq();
1662 	postfork1_child_sigev_timer();
1663 	postfork1_child_aio();
1664 	/*
1665 	 * The above subsystems use thread pools, so this action
1666 	 * must be performed after those actions.
1667 	 */
1668 	postfork1_child_tpool();
1669 
1670 	/*
1671 	 * All lwps except ourself are gone.  Mark them so.
1672 	 * First mark all of the lwps that have already been freed.
1673 	 * Then mark and free all of the active lwps except ourself.
1674 	 * Since we are single-threaded, no locks are required here.
1675 	 */
1676 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1677 		mark_dead_and_buried(ulwp);
1678 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1679 		mark_dead_and_buried(ulwp);
1680 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1681 		next = ulwp->ul_forw;
1682 		ulwp->ul_forw = ulwp->ul_back = NULL;
1683 		mark_dead_and_buried(ulwp);
1684 		tsd_free(ulwp);
1685 		tls_free(ulwp);
1686 		rwl_free(ulwp);
1687 		heldlock_free(ulwp);
1688 		ulwp_free(ulwp);
1689 	}
1690 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1691 	if (self != udp->ulwp_one)
1692 		mark_dead_and_buried(udp->ulwp_one);
1693 	if ((ulwp = udp->all_zombies) != NULL) {
1694 		ASSERT(udp->nzombies != 0);
1695 		do {
1696 			next = ulwp->ul_forw;
1697 			ulwp->ul_forw = ulwp->ul_back = NULL;
1698 			mark_dead_and_buried(ulwp);
1699 			udp->nzombies--;
1700 			if (ulwp->ul_replace) {
1701 				ulwp->ul_next = NULL;
1702 				if (udp->ulwp_replace_free == NULL) {
1703 					udp->ulwp_replace_free =
1704 					    udp->ulwp_replace_last = ulwp;
1705 				} else {
1706 					udp->ulwp_replace_last->ul_next = ulwp;
1707 					udp->ulwp_replace_last = ulwp;
1708 				}
1709 			}
1710 		} while ((ulwp = next) != udp->all_zombies);
1711 		ASSERT(udp->nzombies == 0);
1712 		udp->all_zombies = NULL;
1713 		udp->nzombies = 0;
1714 	}
1715 	trim_stack_cache(0);
1716 }
1717 
1718 lwpid_t
1719 lwp_self(void)
1720 {
1721 	return (curthread->ul_lwpid);
1722 }
1723 
1724 #pragma weak _ti_thr_self = thr_self
1725 #pragma weak pthread_self = thr_self
1726 thread_t
1727 thr_self()
1728 {
1729 	return (curthread->ul_lwpid);
1730 }
1731 
1732 int
1733 thr_main()
1734 {
1735 	ulwp_t *self = __curthread();
1736 
1737 	return ((self == NULL)? -1 : self->ul_main);
1738 }
1739 
1740 int
1741 _thrp_cancelled(void)
1742 {
1743 	return (curthread->ul_rval == PTHREAD_CANCELED);
1744 }
1745 
1746 int
1747 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1748 {
1749 	stk->ss_sp = (void *)ulwp->ul_stktop;
1750 	stk->ss_size = ulwp->ul_stksiz;
1751 	stk->ss_flags = 0;
1752 	return (0);
1753 }
1754 
1755 #pragma weak _thr_stksegment = thr_stksegment
1756 int
1757 thr_stksegment(stack_t *stk)
1758 {
1759 	return (_thrp_stksegment(curthread, stk));
1760 }
1761 
1762 void
1763 force_continue(ulwp_t *ulwp)
1764 {
1765 #if defined(THREAD_DEBUG)
1766 	ulwp_t *self = curthread;
1767 	uberdata_t *udp = self->ul_uberdata;
1768 #endif
1769 	int error;
1770 	timespec_t ts;
1771 
1772 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1773 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1774 
1775 	for (;;) {
1776 		error = _lwp_continue(ulwp->ul_lwpid);
1777 		if (error != 0 && error != EINTR)
1778 			break;
1779 		error = 0;
1780 		if (ulwp->ul_stopping) {	/* he is stopping himself */
1781 			ts.tv_sec = 0;		/* give him a chance to run */
1782 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1783 			(void) __nanosleep(&ts, NULL);
1784 		}
1785 		if (!ulwp->ul_stopping)		/* he is running now */
1786 			break;			/* so we are done */
1787 		/*
1788 		 * He is marked as being in the process of stopping
1789 		 * himself.  Loop around and continue him again.
1790 		 * He may not have been stopped the first time.
1791 		 */
1792 	}
1793 }
1794 
1795 /*
1796  * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1797  * that is, to a point where ul_critical and ul_rtld are both zero.
1798  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1799  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1800  * If we have to drop link_lock, we store 1 through link_dropped.
1801  * If the lwp exits before it can be suspended, we return ESRCH.
1802  */
1803 int
1804 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1805 {
1806 	ulwp_t *self = curthread;
1807 	uberdata_t *udp = self->ul_uberdata;
1808 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1809 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1810 	thread_t tid = ulwp->ul_lwpid;
1811 	int ix = ulwp->ul_ix;
1812 	int error = 0;
1813 
1814 	ASSERT(whystopped == TSTP_REGULAR ||
1815 	    whystopped == TSTP_MUTATOR ||
1816 	    whystopped == TSTP_FORK);
1817 	ASSERT(ulwp != self);
1818 	ASSERT(!ulwp->ul_stop);
1819 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1820 	ASSERT(MUTEX_OWNED(mp, self));
1821 
1822 	if (link_dropped != NULL)
1823 		*link_dropped = 0;
1824 
1825 	/*
1826 	 * We must grab the target's spin lock before suspending it.
1827 	 * See the comments below and in _thrp_suspend() for why.
1828 	 */
1829 	spin_lock_set(&ulwp->ul_spinlock);
1830 	(void) ___lwp_suspend(tid);
1831 	spin_lock_clear(&ulwp->ul_spinlock);
1832 
1833 top:
1834 	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1835 	    ulwp->ul_stopping) {
1836 		/* thread is already safe */
1837 		ulwp->ul_stop |= whystopped;
1838 	} else {
1839 		/*
1840 		 * Setting ul_pleasestop causes the target thread to stop
1841 		 * itself in _thrp_suspend(), below, after we drop its lock.
1842 		 * We must continue the critical thread before dropping
1843 		 * link_lock because the critical thread may be holding
1844 		 * the queue lock for link_lock.  This is delicate.
1845 		 */
1846 		ulwp->ul_pleasestop |= whystopped;
1847 		force_continue(ulwp);
1848 		if (link_dropped != NULL) {
1849 			*link_dropped = 1;
1850 			lmutex_unlock(&udp->link_lock);
1851 			/* be sure to drop link_lock only once */
1852 			link_dropped = NULL;
1853 		}
1854 
1855 		/*
1856 		 * The thread may disappear by calling thr_exit() so we
1857 		 * cannot rely on the ulwp pointer after dropping the lock.
1858 		 * Instead, we search the hash table to find it again.
1859 		 * When we return, we may find that the thread has been
1860 		 * continued by some other thread.  The suspend/continue
1861 		 * interfaces are prone to such race conditions by design.
1862 		 */
1863 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1864 		    (ulwp->ul_pleasestop & whystopped)) {
1865 			(void) __cond_wait(cvp, mp);
1866 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1867 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1868 				if (ulwp->ul_lwpid == tid)
1869 					break;
1870 			}
1871 		}
1872 
1873 		if (ulwp == NULL || ulwp->ul_dead)
1874 			error = ESRCH;
1875 		else {
1876 			/*
1877 			 * Do another lwp_suspend() to make sure we don't
1878 			 * return until the target thread is fully stopped
1879 			 * in the kernel.  Don't apply lwp_suspend() until
1880 			 * we know that the target is not holding any
1881 			 * queue locks, that is, that it has completed
1882 			 * ulwp_unlock(self) and has, or at least is
1883 			 * about to, call lwp_suspend() on itself.  We do
1884 			 * this by grabbing the target's spin lock.
1885 			 */
1886 			ASSERT(ulwp->ul_lwpid == tid);
1887 			spin_lock_set(&ulwp->ul_spinlock);
1888 			(void) ___lwp_suspend(tid);
1889 			spin_lock_clear(&ulwp->ul_spinlock);
1890 			/*
1891 			 * If some other thread did a thr_continue()
1892 			 * on the target thread we have to start over.
1893 			 */
1894 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1895 				goto top;
1896 		}
1897 	}
1898 
1899 	(void) cond_broadcast(cvp);
1900 	lmutex_unlock(mp);
1901 	return (error);
1902 }
1903 
1904 int
1905 _thrp_suspend(thread_t tid, uchar_t whystopped)
1906 {
1907 	ulwp_t *self = curthread;
1908 	uberdata_t *udp = self->ul_uberdata;
1909 	ulwp_t *ulwp;
1910 	int error = 0;
1911 
1912 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1913 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1914 
1915 	/*
1916 	 * We can't suspend anyone except ourself while
1917 	 * some other thread is performing a fork.
1918 	 * This also allows only one suspension at a time.
1919 	 */
1920 	if (tid != self->ul_lwpid)
1921 		fork_lock_enter();
1922 
1923 	if ((ulwp = find_lwp(tid)) == NULL)
1924 		error = ESRCH;
1925 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1926 		ulwp_unlock(ulwp, udp);
1927 		error = EINVAL;
1928 	} else if (ulwp->ul_stop) {	/* already stopped */
1929 		ulwp->ul_stop |= whystopped;
1930 		ulwp_broadcast(ulwp);
1931 		ulwp_unlock(ulwp, udp);
1932 	} else if (ulwp != self) {
1933 		/*
1934 		 * After suspending the other thread, move it out of a
1935 		 * critical section and deal with the schedctl mappings.
1936 		 * safe_suspend() suspends the other thread, calls
1937 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1938 		 */
1939 		error = safe_suspend(ulwp, whystopped, NULL);
1940 	} else {
1941 		int schedctl_after_fork = 0;
1942 
1943 		/*
1944 		 * We are suspending ourself.  We must not take a signal
1945 		 * until we return from lwp_suspend() and clear ul_stopping.
1946 		 * This is to guard against siglongjmp().
1947 		 */
1948 		enter_critical(self);
1949 		self->ul_sp = stkptr();
1950 		_flush_windows();	/* sparc */
1951 		self->ul_pleasestop = 0;
1952 		self->ul_stop |= whystopped;
1953 		/*
1954 		 * Grab our spin lock before dropping ulwp_mutex(self).
1955 		 * This prevents the suspending thread from applying
1956 		 * lwp_suspend() to us before we emerge from
1957 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1958 		 */
1959 		spin_lock_set(&self->ul_spinlock);
1960 		self->ul_stopping = 1;
1961 		ulwp_broadcast(self);
1962 		ulwp_unlock(self, udp);
1963 		/*
1964 		 * From this point until we return from lwp_suspend(),
1965 		 * we must not call any function that might invoke the
1966 		 * dynamic linker, that is, we can only call functions
1967 		 * private to the library.
1968 		 *
1969 		 * Also, this is a nasty race condition for a process
1970 		 * that is undergoing a forkall() operation:
1971 		 * Once we clear our spinlock (below), we are vulnerable
1972 		 * to being suspended by the forkall() thread before
1973 		 * we manage to suspend ourself in ___lwp_suspend().
1974 		 * See safe_suspend() and force_continue().
1975 		 *
1976 		 * To avoid a SIGSEGV due to the disappearance
1977 		 * of the schedctl mappings in the child process,
1978 		 * which can happen in spin_lock_clear() if we
1979 		 * are suspended while we are in the middle of
1980 		 * its call to preempt(), we preemptively clear
1981 		 * our own schedctl pointer before dropping our
1982 		 * spinlock.  We reinstate it, in both the parent
1983 		 * and (if this really is a forkall()) the child.
1984 		 */
1985 		if (whystopped & TSTP_FORK) {
1986 			schedctl_after_fork = 1;
1987 			self->ul_schedctl = NULL;
1988 			self->ul_schedctl_called = &udp->uberflags;
1989 		}
1990 		spin_lock_clear(&self->ul_spinlock);
1991 		(void) ___lwp_suspend(tid);
1992 		/*
1993 		 * Somebody else continued us.
1994 		 * We can't grab ulwp_lock(self)
1995 		 * until after clearing ul_stopping.
1996 		 * force_continue() relies on this.
1997 		 */
1998 		self->ul_stopping = 0;
1999 		self->ul_sp = 0;
2000 		if (schedctl_after_fork) {
2001 			self->ul_schedctl_called = NULL;
2002 			self->ul_schedctl = NULL;
2003 			(void) setup_schedctl();
2004 		}
2005 		ulwp_lock(self, udp);
2006 		ulwp_broadcast(self);
2007 		ulwp_unlock(self, udp);
2008 		exit_critical(self);
2009 	}
2010 
2011 	if (tid != self->ul_lwpid)
2012 		fork_lock_exit();
2013 
2014 	return (error);
2015 }
2016 
2017 /*
2018  * Suspend all lwps other than ourself in preparation for fork.
2019  */
2020 void
2021 suspend_fork()
2022 {
2023 	ulwp_t *self = curthread;
2024 	uberdata_t *udp = self->ul_uberdata;
2025 	ulwp_t *ulwp;
2026 	int link_dropped;
2027 
2028 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2029 top:
2030 	lmutex_lock(&udp->link_lock);
2031 
2032 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2033 		ulwp_lock(ulwp, udp);
2034 		if (ulwp->ul_stop) {	/* already stopped */
2035 			ulwp->ul_stop |= TSTP_FORK;
2036 			ulwp_broadcast(ulwp);
2037 			ulwp_unlock(ulwp, udp);
2038 		} else {
2039 			/*
2040 			 * Move the stopped lwp out of a critical section.
2041 			 */
2042 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2043 			    link_dropped)
2044 				goto top;
2045 		}
2046 	}
2047 
2048 	lmutex_unlock(&udp->link_lock);
2049 }
2050 
2051 void
2052 continue_fork(int child)
2053 {
2054 	ulwp_t *self = curthread;
2055 	uberdata_t *udp = self->ul_uberdata;
2056 	ulwp_t *ulwp;
2057 
2058 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2059 
2060 	/*
2061 	 * Clear the schedctl pointers in the child of forkall().
2062 	 */
2063 	if (child) {
2064 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2065 			ulwp->ul_schedctl_called =
2066 			    ulwp->ul_dead? &udp->uberflags : NULL;
2067 			ulwp->ul_schedctl = NULL;
2068 		}
2069 	}
2070 
2071 	/*
2072 	 * Set all lwps that were stopped for fork() running again.
2073 	 */
2074 	lmutex_lock(&udp->link_lock);
2075 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2076 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2077 		lmutex_lock(mp);
2078 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2079 		ulwp->ul_stop &= ~TSTP_FORK;
2080 		ulwp_broadcast(ulwp);
2081 		if (!ulwp->ul_stop)
2082 			force_continue(ulwp);
2083 		lmutex_unlock(mp);
2084 	}
2085 	lmutex_unlock(&udp->link_lock);
2086 }
2087 
2088 int
2089 _thrp_continue(thread_t tid, uchar_t whystopped)
2090 {
2091 	uberdata_t *udp = curthread->ul_uberdata;
2092 	ulwp_t *ulwp;
2093 	mutex_t *mp;
2094 	int error = 0;
2095 
2096 	ASSERT(whystopped == TSTP_REGULAR ||
2097 	    whystopped == TSTP_MUTATOR);
2098 
2099 	/*
2100 	 * We single-thread the entire thread suspend/continue mechanism.
2101 	 */
2102 	fork_lock_enter();
2103 
2104 	if ((ulwp = find_lwp(tid)) == NULL) {
2105 		fork_lock_exit();
2106 		return (ESRCH);
2107 	}
2108 
2109 	mp = ulwp_mutex(ulwp, udp);
2110 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2111 		error = EINVAL;
2112 	} else if (ulwp->ul_stop & whystopped) {
2113 		ulwp->ul_stop &= ~whystopped;
2114 		ulwp_broadcast(ulwp);
2115 		if (!ulwp->ul_stop) {
2116 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2117 				ulwp->ul_sp = 0;
2118 				ulwp->ul_created = 0;
2119 			}
2120 			force_continue(ulwp);
2121 		}
2122 	}
2123 	lmutex_unlock(mp);
2124 
2125 	fork_lock_exit();
2126 	return (error);
2127 }
2128 
2129 int
2130 thr_suspend(thread_t tid)
2131 {
2132 	return (_thrp_suspend(tid, TSTP_REGULAR));
2133 }
2134 
2135 int
2136 thr_continue(thread_t tid)
2137 {
2138 	return (_thrp_continue(tid, TSTP_REGULAR));
2139 }
2140 
2141 void
2142 thr_yield()
2143 {
2144 	yield();
2145 }
2146 
2147 #pragma weak pthread_kill = thr_kill
2148 #pragma weak _thr_kill = thr_kill
2149 int
2150 thr_kill(thread_t tid, int sig)
2151 {
2152 	if (sig == SIGCANCEL)
2153 		return (EINVAL);
2154 	return (_lwp_kill(tid, sig));
2155 }
2156 
2157 /*
2158  * Exit a critical section, take deferred actions if necessary.
2159  * Called from exit_critical() and from sigon().
2160  */
2161 void
2162 do_exit_critical()
2163 {
2164 	ulwp_t *self = curthread;
2165 	int sig;
2166 
2167 	ASSERT(self->ul_critical == 0);
2168 
2169 	/*
2170 	 * Don't suspend ourself or take a deferred signal while dying
2171 	 * or while executing inside the dynamic linker (ld.so.1).
2172 	 */
2173 	if (self->ul_dead || self->ul_rtld)
2174 		return;
2175 
2176 	while (self->ul_pleasestop ||
2177 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2178 		/*
2179 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2180 		 * by keeping self->ul_critical == 1 here.
2181 		 */
2182 		self->ul_critical++;
2183 		while (self->ul_pleasestop) {
2184 			/*
2185 			 * Guard against suspending ourself while on a sleep
2186 			 * queue.  See the comments in call_user_handler().
2187 			 */
2188 			unsleep_self();
2189 			set_parking_flag(self, 0);
2190 			(void) _thrp_suspend(self->ul_lwpid,
2191 			    self->ul_pleasestop);
2192 		}
2193 		self->ul_critical--;
2194 
2195 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2196 			/*
2197 			 * Clear ul_cursig before proceeding.
2198 			 * This protects us from the dynamic linker's
2199 			 * calls to bind_guard()/bind_clear() in the
2200 			 * event that it is invoked to resolve a symbol
2201 			 * like take_deferred_signal() below.
2202 			 */
2203 			self->ul_cursig = 0;
2204 			take_deferred_signal(sig);
2205 			ASSERT(self->ul_cursig == 0);
2206 		}
2207 	}
2208 	ASSERT(self->ul_critical == 0);
2209 }
2210 
2211 /*
2212  * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2213  * (ld.so.1) when it has do do something, like resolve a symbol to be called
2214  * by the application or one of its libraries.  _ti_bind_guard() is called
2215  * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2216  * application.  The dynamic linker gets special dispensation from libc to
2217  * run in a critical region (all signals deferred and no thread suspension
2218  * or forking allowed), and to be immune from cancellation for the duration.
2219  */
2220 int
2221 _ti_bind_guard(int flags)
2222 {
2223 	ulwp_t *self = curthread;
2224 	uberdata_t *udp = self->ul_uberdata;
2225 	int bindflag = (flags & THR_FLG_RTLD);
2226 
2227 	if ((self->ul_bindflags & bindflag) == bindflag)
2228 		return (0);
2229 	self->ul_bindflags |= bindflag;
2230 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2231 		sigoff(self);	/* see no signals while holding ld_lock */
2232 		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2233 		(void) mutex_lock(&udp->ld_lock);
2234 	}
2235 	enter_critical(self);
2236 	self->ul_save_state = self->ul_cancel_disabled;
2237 	self->ul_cancel_disabled = 1;
2238 	set_cancel_pending_flag(self, 0);
2239 	return (1);
2240 }
2241 
2242 int
2243 _ti_bind_clear(int flags)
2244 {
2245 	ulwp_t *self = curthread;
2246 	uberdata_t *udp = self->ul_uberdata;
2247 	int bindflag = (flags & THR_FLG_RTLD);
2248 
2249 	if ((self->ul_bindflags & bindflag) == 0)
2250 		return (self->ul_bindflags);
2251 	self->ul_bindflags &= ~bindflag;
2252 	self->ul_cancel_disabled = self->ul_save_state;
2253 	set_cancel_pending_flag(self, 0);
2254 	exit_critical(self);
2255 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2256 		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2257 			(void) mutex_unlock(&udp->ld_lock);
2258 			self->ul_rtld--;
2259 			sigon(self);	/* reenable signals */
2260 		}
2261 	}
2262 	return (self->ul_bindflags);
2263 }
2264 
2265 /*
2266  * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2267  * a critical region in libc.  Return zero if not, else return non-zero.
2268  */
2269 int
2270 _ti_critical(void)
2271 {
2272 	ulwp_t *self = curthread;
2273 	int level = self->ul_critical;
2274 
2275 	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2276 		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2277 	return (level - 1);
2278 }
2279 
2280 /*
2281  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2282  * it does in the old libthread (see the comments in cond_wait_queue()).
2283  * Also, signals are deferred at thread startup until TLS constructors
2284  * have all been called, at which time _thrp_setup() calls sigon().
2285  *
2286  * _sigoff() and _sigon() are external consolidation-private interfaces to
2287  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2288  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2289  * (librtc.so) to defer signals during its critical sections (not to be
2290  * confused with libc critical sections [see exit_critical() above]).
2291  */
2292 void
2293 _sigoff(void)
2294 {
2295 	ulwp_t *self = curthread;
2296 
2297 	sigoff(self);
2298 }
2299 
2300 void
2301 _sigon(void)
2302 {
2303 	ulwp_t *self = curthread;
2304 
2305 	ASSERT(self->ul_sigdefer > 0);
2306 	sigon(self);
2307 }
2308 
2309 int
2310 thr_getconcurrency()
2311 {
2312 	return (thr_concurrency);
2313 }
2314 
2315 int
2316 pthread_getconcurrency()
2317 {
2318 	return (pthread_concurrency);
2319 }
2320 
2321 int
2322 thr_setconcurrency(int new_level)
2323 {
2324 	uberdata_t *udp = curthread->ul_uberdata;
2325 
2326 	if (new_level < 0)
2327 		return (EINVAL);
2328 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2329 		return (EAGAIN);
2330 	lmutex_lock(&udp->link_lock);
2331 	if (new_level > thr_concurrency)
2332 		thr_concurrency = new_level;
2333 	lmutex_unlock(&udp->link_lock);
2334 	return (0);
2335 }
2336 
2337 int
2338 pthread_setconcurrency(int new_level)
2339 {
2340 	if (new_level < 0)
2341 		return (EINVAL);
2342 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2343 		return (EAGAIN);
2344 	pthread_concurrency = new_level;
2345 	return (0);
2346 }
2347 
2348 size_t
2349 thr_min_stack(void)
2350 {
2351 	return (MINSTACK);
2352 }
2353 
2354 int
2355 __nthreads(void)
2356 {
2357 	return (curthread->ul_uberdata->nthreads);
2358 }
2359 
2360 /*
2361  * XXX
2362  * The remainder of this file implements the private interfaces to java for
2363  * garbage collection.  It is no longer used, at least by java 1.2.
2364  * It can all go away once all old JVMs have disappeared.
2365  */
2366 
2367 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2368 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2369 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2370 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2371 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2372 
2373 /*
2374  * Get the available register state for the target thread.
2375  * Return non-volatile registers: TRS_NONVOLATILE
2376  */
2377 #pragma weak _thr_getstate = thr_getstate
2378 int
2379 thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2380 {
2381 	ulwp_t *self = curthread;
2382 	uberdata_t *udp = self->ul_uberdata;
2383 	ulwp_t **ulwpp;
2384 	ulwp_t *ulwp;
2385 	int error = 0;
2386 	int trs_flag = TRS_LWPID;
2387 
2388 	if (tid == 0 || self->ul_lwpid == tid) {
2389 		ulwp = self;
2390 		ulwp_lock(ulwp, udp);
2391 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2392 		ulwp = *ulwpp;
2393 	} else {
2394 		if (flag)
2395 			*flag = TRS_INVALID;
2396 		return (ESRCH);
2397 	}
2398 
2399 	if (ulwp->ul_dead) {
2400 		trs_flag = TRS_INVALID;
2401 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2402 		error = EINVAL;
2403 		trs_flag = TRS_INVALID;
2404 	} else if (ulwp->ul_stop) {
2405 		trs_flag = TRS_NONVOLATILE;
2406 		getgregs(ulwp, rs);
2407 	}
2408 
2409 	if (flag)
2410 		*flag = trs_flag;
2411 	if (lwp)
2412 		*lwp = tid;
2413 	if (ss != NULL)
2414 		(void) _thrp_stksegment(ulwp, ss);
2415 
2416 	ulwp_unlock(ulwp, udp);
2417 	return (error);
2418 }
2419 
2420 /*
2421  * Set the appropriate register state for the target thread.
2422  * This is not used by java.  It exists solely for the MSTC test suite.
2423  */
2424 #pragma weak _thr_setstate = thr_setstate
2425 int
2426 thr_setstate(thread_t tid, int flag, gregset_t rs)
2427 {
2428 	uberdata_t *udp = curthread->ul_uberdata;
2429 	ulwp_t *ulwp;
2430 	int error = 0;
2431 
2432 	if ((ulwp = find_lwp(tid)) == NULL)
2433 		return (ESRCH);
2434 
2435 	if (!ulwp->ul_stop && !suspendedallmutators)
2436 		error = EINVAL;
2437 	else if (rs != NULL) {
2438 		switch (flag) {
2439 		case TRS_NONVOLATILE:
2440 			/* do /proc stuff here? */
2441 			if (ulwp->ul_stop)
2442 				setgregs(ulwp, rs);
2443 			else
2444 				error = EINVAL;
2445 			break;
2446 		case TRS_LWPID:		/* do /proc stuff here? */
2447 		default:
2448 			error = EINVAL;
2449 			break;
2450 		}
2451 	}
2452 
2453 	ulwp_unlock(ulwp, udp);
2454 	return (error);
2455 }
2456 
2457 int
2458 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2459 {
2460 	extern ssize_t __pread(int, void *, size_t, off_t);
2461 	char buf[100];
2462 	int fd;
2463 
2464 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2465 	(void) strcpy(buf, "/proc/self/lwp/");
2466 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2467 	(void) strcat(buf, "/lwpstatus");
2468 	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2469 		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2470 			if (sp->pr_flags & PR_STOPPED) {
2471 				(void) __close(fd);
2472 				return (0);
2473 			}
2474 			yield();	/* give him a chance to stop */
2475 		}
2476 		(void) __close(fd);
2477 	}
2478 	return (-1);
2479 }
2480 
2481 int
2482 putlwpregs(thread_t tid, prgregset_t prp)
2483 {
2484 	extern ssize_t __writev(int, const struct iovec *, int);
2485 	char buf[100];
2486 	int fd;
2487 	long dstop_sreg[2];
2488 	long run_null[2];
2489 	iovec_t iov[3];
2490 
2491 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2492 	(void) strcpy(buf, "/proc/self/lwp/");
2493 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2494 	(void) strcat(buf, "/lwpctl");
2495 	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2496 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2497 		dstop_sreg[1] = PCSREG;		/* set the registers */
2498 		iov[0].iov_base = (caddr_t)dstop_sreg;
2499 		iov[0].iov_len = sizeof (dstop_sreg);
2500 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2501 		iov[1].iov_len = sizeof (prgregset_t);
2502 		run_null[0] = PCRUN;		/* make it runnable again */
2503 		run_null[1] = 0;
2504 		iov[2].iov_base = (caddr_t)run_null;
2505 		iov[2].iov_len = sizeof (run_null);
2506 		if (__writev(fd, iov, 3) >= 0) {
2507 			(void) __close(fd);
2508 			return (0);
2509 		}
2510 		(void) __close(fd);
2511 	}
2512 	return (-1);
2513 }
2514 
2515 static ulong_t
2516 gettsp_slow(thread_t tid)
2517 {
2518 	char buf[100];
2519 	struct lwpstatus status;
2520 
2521 	if (getlwpstatus(tid, &status) != 0) {
2522 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2523 		(void) strcpy(buf, "__gettsp(");
2524 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2525 		(void) strcat(buf, "): can't read lwpstatus");
2526 		thr_panic(buf);
2527 	}
2528 	return (status.pr_reg[R_SP]);
2529 }
2530 
2531 ulong_t
2532 __gettsp(thread_t tid)
2533 {
2534 	uberdata_t *udp = curthread->ul_uberdata;
2535 	ulwp_t *ulwp;
2536 	ulong_t result;
2537 
2538 	if ((ulwp = find_lwp(tid)) == NULL)
2539 		return (0);
2540 
2541 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2542 		ulwp_unlock(ulwp, udp);
2543 		return (result);
2544 	}
2545 
2546 	result = gettsp_slow(tid);
2547 	ulwp_unlock(ulwp, udp);
2548 	return (result);
2549 }
2550 
2551 /*
2552  * This tells java stack walkers how to find the ucontext
2553  * structure passed to signal handlers.
2554  */
2555 #pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2556 void
2557 thr_sighndlrinfo(void (**func)(), int *funcsize)
2558 {
2559 	*func = &__sighndlr;
2560 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2561 }
2562 
2563 /*
2564  * Mark a thread a mutator or reset a mutator to being a default,
2565  * non-mutator thread.
2566  */
2567 #pragma weak _thr_setmutator = thr_setmutator
2568 int
2569 thr_setmutator(thread_t tid, int enabled)
2570 {
2571 	ulwp_t *self = curthread;
2572 	uberdata_t *udp = self->ul_uberdata;
2573 	ulwp_t *ulwp;
2574 	int error;
2575 	int cancel_state;
2576 
2577 	enabled = enabled? 1 : 0;
2578 top:
2579 	if (tid == 0) {
2580 		ulwp = self;
2581 		ulwp_lock(ulwp, udp);
2582 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2583 		return (ESRCH);
2584 	}
2585 
2586 	/*
2587 	 * The target thread should be the caller itself or a suspended thread.
2588 	 * This prevents the target from also changing its ul_mutator field.
2589 	 */
2590 	error = 0;
2591 	if (ulwp != self && !ulwp->ul_stop && enabled)
2592 		error = EINVAL;
2593 	else if (ulwp->ul_mutator != enabled) {
2594 		lmutex_lock(&mutatorslock);
2595 		if (mutatorsbarrier) {
2596 			ulwp_unlock(ulwp, udp);
2597 			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2598 			    &cancel_state);
2599 			while (mutatorsbarrier)
2600 				(void) cond_wait(&mutatorscv, &mutatorslock);
2601 			(void) pthread_setcancelstate(cancel_state, NULL);
2602 			lmutex_unlock(&mutatorslock);
2603 			goto top;
2604 		}
2605 		ulwp->ul_mutator = enabled;
2606 		lmutex_unlock(&mutatorslock);
2607 	}
2608 
2609 	ulwp_unlock(ulwp, udp);
2610 	return (error);
2611 }
2612 
2613 /*
2614  * Establish a barrier against new mutators.  Any non-mutator trying
2615  * to become a mutator is suspended until the barrier is removed.
2616  */
2617 #pragma weak _thr_mutators_barrier = thr_mutators_barrier
2618 void
2619 thr_mutators_barrier(int enabled)
2620 {
2621 	int oldvalue;
2622 	int cancel_state;
2623 
2624 	lmutex_lock(&mutatorslock);
2625 
2626 	/*
2627 	 * Wait if trying to set the barrier while it is already set.
2628 	 */
2629 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2630 	while (mutatorsbarrier && enabled)
2631 		(void) cond_wait(&mutatorscv, &mutatorslock);
2632 	(void) pthread_setcancelstate(cancel_state, NULL);
2633 
2634 	oldvalue = mutatorsbarrier;
2635 	mutatorsbarrier = enabled;
2636 	/*
2637 	 * Wakeup any blocked non-mutators when barrier is removed.
2638 	 */
2639 	if (oldvalue && !enabled)
2640 		(void) cond_broadcast(&mutatorscv);
2641 	lmutex_unlock(&mutatorslock);
2642 }
2643 
2644 /*
2645  * Suspend the set of all mutators except for the caller.  The list
2646  * of actively running threads is searched and only the mutators
2647  * in this list are suspended.  Actively running non-mutators remain
2648  * running.  Any other thread is suspended.
2649  */
2650 #pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2651 int
2652 thr_suspend_allmutators(void)
2653 {
2654 	ulwp_t *self = curthread;
2655 	uberdata_t *udp = self->ul_uberdata;
2656 	ulwp_t *ulwp;
2657 	int link_dropped;
2658 
2659 	/*
2660 	 * We single-thread the entire thread suspend/continue mechanism.
2661 	 */
2662 	fork_lock_enter();
2663 
2664 top:
2665 	lmutex_lock(&udp->link_lock);
2666 
2667 	if (suspendingallmutators || suspendedallmutators) {
2668 		lmutex_unlock(&udp->link_lock);
2669 		fork_lock_exit();
2670 		return (EINVAL);
2671 	}
2672 	suspendingallmutators = 1;
2673 
2674 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2675 		ulwp_lock(ulwp, udp);
2676 		if (!ulwp->ul_mutator) {
2677 			ulwp_unlock(ulwp, udp);
2678 		} else if (ulwp->ul_stop) {	/* already stopped */
2679 			ulwp->ul_stop |= TSTP_MUTATOR;
2680 			ulwp_broadcast(ulwp);
2681 			ulwp_unlock(ulwp, udp);
2682 		} else {
2683 			/*
2684 			 * Move the stopped lwp out of a critical section.
2685 			 */
2686 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2687 			    link_dropped) {
2688 				suspendingallmutators = 0;
2689 				goto top;
2690 			}
2691 		}
2692 	}
2693 
2694 	suspendedallmutators = 1;
2695 	suspendingallmutators = 0;
2696 	lmutex_unlock(&udp->link_lock);
2697 	fork_lock_exit();
2698 	return (0);
2699 }
2700 
2701 /*
2702  * Suspend the target mutator.  The caller is permitted to suspend
2703  * itself.  If a mutator barrier is enabled, the caller will suspend
2704  * itself as though it had been suspended by thr_suspend_allmutators().
2705  * When the barrier is removed, this thread will be resumed.  Any
2706  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2707  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2708  */
2709 #pragma weak _thr_suspend_mutator = thr_suspend_mutator
2710 int
2711 thr_suspend_mutator(thread_t tid)
2712 {
2713 	if (tid == 0)
2714 		tid = curthread->ul_lwpid;
2715 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2716 }
2717 
2718 /*
2719  * Resume the set of all suspended mutators.
2720  */
2721 #pragma weak _thr_continue_allmutators = thr_continue_allmutators
2722 int
2723 thr_continue_allmutators()
2724 {
2725 	ulwp_t *self = curthread;
2726 	uberdata_t *udp = self->ul_uberdata;
2727 	ulwp_t *ulwp;
2728 
2729 	/*
2730 	 * We single-thread the entire thread suspend/continue mechanism.
2731 	 */
2732 	fork_lock_enter();
2733 
2734 	lmutex_lock(&udp->link_lock);
2735 	if (!suspendedallmutators) {
2736 		lmutex_unlock(&udp->link_lock);
2737 		fork_lock_exit();
2738 		return (EINVAL);
2739 	}
2740 	suspendedallmutators = 0;
2741 
2742 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2743 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2744 		lmutex_lock(mp);
2745 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2746 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2747 			ulwp_broadcast(ulwp);
2748 			if (!ulwp->ul_stop)
2749 				force_continue(ulwp);
2750 		}
2751 		lmutex_unlock(mp);
2752 	}
2753 
2754 	lmutex_unlock(&udp->link_lock);
2755 	fork_lock_exit();
2756 	return (0);
2757 }
2758 
2759 /*
2760  * Resume a suspended mutator.
2761  */
2762 #pragma weak _thr_continue_mutator = thr_continue_mutator
2763 int
2764 thr_continue_mutator(thread_t tid)
2765 {
2766 	return (_thrp_continue(tid, TSTP_MUTATOR));
2767 }
2768 
2769 #pragma weak _thr_wait_mutator = thr_wait_mutator
2770 int
2771 thr_wait_mutator(thread_t tid, int dontwait)
2772 {
2773 	uberdata_t *udp = curthread->ul_uberdata;
2774 	ulwp_t *ulwp;
2775 	int cancel_state;
2776 	int error = 0;
2777 
2778 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2779 top:
2780 	if ((ulwp = find_lwp(tid)) == NULL) {
2781 		(void) pthread_setcancelstate(cancel_state, NULL);
2782 		return (ESRCH);
2783 	}
2784 
2785 	if (!ulwp->ul_mutator)
2786 		error = EINVAL;
2787 	else if (dontwait) {
2788 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2789 			error = EWOULDBLOCK;
2790 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2791 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2792 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2793 
2794 		(void) cond_wait(cvp, mp);
2795 		(void) lmutex_unlock(mp);
2796 		goto top;
2797 	}
2798 
2799 	ulwp_unlock(ulwp, udp);
2800 	(void) pthread_setcancelstate(cancel_state, NULL);
2801 	return (error);
2802 }
2803 
2804 /* PROBE_SUPPORT begin */
2805 
2806 void
2807 thr_probe_setup(void *data)
2808 {
2809 	curthread->ul_tpdp = data;
2810 }
2811 
2812 static void *
2813 _thread_probe_getfunc()
2814 {
2815 	return (curthread->ul_tpdp);
2816 }
2817 
2818 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2819 
2820 /* ARGSUSED */
2821 void
2822 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2823 {
2824 	/* never called */
2825 }
2826 
2827 /* ARGSUSED */
2828 void
2829 _resume_ret(ulwp_t *oldlwp)
2830 {
2831 	/* never called */
2832 }
2833 
2834 /* PROBE_SUPPORT end */
2835