xref: /illumos-gate/usr/src/lib/libc/port/threads/thr.c (revision a92282e44f968185a6bba094d1e5fece2da819cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  * Copyright (c) 2017 by The MathWorks, Inc. All rights reserved.
26  */
27 /*
28  * Copyright 2018 Joyent, Inc.
29  */
30 
31 #include "lint.h"
32 #include "thr_uberdata.h"
33 #include <pthread.h>
34 #include <procfs.h>
35 #include <sys/uio.h>
36 #include <ctype.h>
37 #include "libc.h"
38 
39 /*
40  * These symbols should not be exported from libc, but
41  * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
42  * Also, some older versions of the Studio compiler/debugger
43  * components reference them.  These need to be fixed, too.
44  */
45 #pragma weak _thr_main = thr_main
46 #pragma weak _thr_create = thr_create
47 #pragma weak _thr_join = thr_join
48 #pragma weak _thr_self = thr_self
49 
50 #undef errno
51 extern int errno;
52 
53 /*
54  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
55  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
56  * system used it illegally (it is a consolidation private symbol).
57  * To accommodate this and possibly other abusers of the symbol,
58  * we make it always equal to 1 now that libthread has been folded
59  * into libc.  The new __libc_threaded symbol is used to indicate
60  * the new meaning, "more than one thread exists".
61  */
62 int __threaded = 1;		/* always equal to 1 */
63 int __libc_threaded = 0;	/* zero until first thr_create() */
64 
65 /*
66  * thr_concurrency and pthread_concurrency are not used by the library.
67  * They exist solely to hold and return the values set by calls to
68  * thr_setconcurrency() and pthread_setconcurrency().
69  * Because thr_concurrency is affected by the THR_NEW_LWP flag
70  * to thr_create(), thr_concurrency is protected by link_lock.
71  */
72 static	int	thr_concurrency = 1;
73 static	int	pthread_concurrency;
74 
75 #define	HASHTBLSZ	1024	/* must be a power of two */
76 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
77 
78 /* initial allocation, just enough for one lwp */
79 #pragma align 64(init_hash_table)
80 thr_hash_table_t init_hash_table[1] = {
81 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
82 };
83 
84 extern const Lc_interface rtld_funcs[];
85 
86 /*
87  * The weak version is known to libc_db and mdb.
88  */
89 #pragma weak _uberdata = __uberdata
90 uberdata_t __uberdata = {
91 	{ DEFAULTMUTEX, 0, 0 },	/* link_lock */
92 	{ RECURSIVEMUTEX, 0, 0 },	/* ld_lock */
93 	{ RECURSIVEMUTEX, 0, 0 },	/* fork_lock */
94 	{ RECURSIVEMUTEX, 0, 0 },	/* atfork_lock */
95 	{ RECURSIVEMUTEX, 0, 0 },	/* callout_lock */
96 	{ DEFAULTMUTEX, 0, 0 },	/* tdb_hash_lock */
97 	{ 0, },				/* tdb_hash_lock_stats */
98 	{ { 0 }, },			/* siguaction[NSIG] */
99 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
100 	{ DEFAULTMUTEX, NULL, 0 },
101 	{ DEFAULTMUTEX, NULL, 0 },
102 	{ DEFAULTMUTEX, NULL, 0 },
103 	{ DEFAULTMUTEX, NULL, 0 },
104 	{ DEFAULTMUTEX, NULL, 0 },
105 	{ DEFAULTMUTEX, NULL, 0 },
106 	{ DEFAULTMUTEX, NULL, 0 },
107 	{ DEFAULTMUTEX, NULL, 0 },
108 	{ DEFAULTMUTEX, NULL, 0 }},
109 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
110 	{ RECURSIVEMUTEX, NULL },		/* quickexit_root */
111 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
112 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
113 	0,			/* primary_map */
114 	0,			/* bucket_init */
115 	0,			/* pad[0] */
116 	0,			/* pad[1] */
117 	{ 0 },			/* uberflags */
118 	NULL,			/* queue_head */
119 	init_hash_table,	/* thr_hash_table */
120 	1,			/* hash_size: size of the hash table */
121 	0,			/* hash_mask: hash_size - 1 */
122 	NULL,			/* ulwp_one */
123 	NULL,			/* all_lwps */
124 	NULL,			/* all_zombies */
125 	0,			/* nthreads */
126 	0,			/* nzombies */
127 	0,			/* ndaemons */
128 	0,			/* pid */
129 	sigacthandler,		/* sigacthandler */
130 	NULL,			/* lwp_stacks */
131 	NULL,			/* lwp_laststack */
132 	0,			/* nfreestack */
133 	10,			/* thread_stack_cache */
134 	NULL,			/* ulwp_freelist */
135 	NULL,			/* ulwp_lastfree */
136 	NULL,			/* ulwp_replace_free */
137 	NULL,			/* ulwp_replace_last */
138 	NULL,			/* atforklist */
139 	NULL,			/* robustlocks */
140 	NULL,			/* robustlist */
141 	NULL,			/* progname */
142 	NULL,			/* ub_comm_page */
143 	NULL,			/* __tdb_bootstrap */
144 	{			/* tdb */
145 		NULL,		/* tdb_sync_addr_hash */
146 		0,		/* tdb_register_count */
147 		0,		/* tdb_hash_alloc_failed */
148 		NULL,		/* tdb_sync_addr_free */
149 		NULL,		/* tdb_sync_addr_last */
150 		0,		/* tdb_sync_alloc */
151 		{ 0, 0 },	/* tdb_ev_global_mask */
152 		tdb_events,	/* tdb_events array */
153 	},
154 };
155 
156 /*
157  * The weak version is known to libc_db and mdb.
158  */
159 #pragma weak _tdb_bootstrap = __tdb_bootstrap
160 uberdata_t **__tdb_bootstrap = NULL;
161 
162 int	thread_queue_fifo = 4;
163 int	thread_queue_dump = 0;
164 int	thread_cond_wait_defer = 0;
165 int	thread_error_detection = 0;
166 int	thread_async_safe = 0;
167 int	thread_stack_cache = 10;
168 int	thread_door_noreserve = 0;
169 int	thread_locks_misaligned = 0;
170 
171 static	ulwp_t	*ulwp_alloc(void);
172 static	void	ulwp_free(ulwp_t *);
173 
174 /*
175  * Insert the lwp into the hash table.
176  */
177 void
178 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
179 {
180 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
181 	udp->thr_hash_table[ix].hash_bucket = ulwp;
182 	ulwp->ul_ix = ix;
183 }
184 
185 void
186 hash_in(ulwp_t *ulwp, uberdata_t *udp)
187 {
188 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
189 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
190 
191 	lmutex_lock(mp);
192 	hash_in_unlocked(ulwp, ix, udp);
193 	lmutex_unlock(mp);
194 }
195 
196 /*
197  * Delete the lwp from the hash table.
198  */
199 void
200 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
201 {
202 	ulwp_t **ulwpp;
203 
204 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
205 	    ulwp != *ulwpp;
206 	    ulwpp = &(*ulwpp)->ul_hash)
207 		;
208 	*ulwpp = ulwp->ul_hash;
209 	ulwp->ul_hash = NULL;
210 	ulwp->ul_ix = -1;
211 }
212 
213 void
214 hash_out(ulwp_t *ulwp, uberdata_t *udp)
215 {
216 	int ix;
217 
218 	if ((ix = ulwp->ul_ix) >= 0) {
219 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
220 
221 		lmutex_lock(mp);
222 		hash_out_unlocked(ulwp, ix, udp);
223 		lmutex_unlock(mp);
224 	}
225 }
226 
227 /*
228  * Retain stack information for thread structures that are being recycled for
229  * new threads.  All other members of the thread structure should be zeroed.
230  */
231 static void
232 ulwp_clean(ulwp_t *ulwp)
233 {
234 	caddr_t stk = ulwp->ul_stk;
235 	size_t mapsiz = ulwp->ul_mapsiz;
236 	size_t guardsize = ulwp->ul_guardsize;
237 	uintptr_t stktop = ulwp->ul_stktop;
238 	size_t stksiz = ulwp->ul_stksiz;
239 
240 	(void) memset(ulwp, 0, sizeof (*ulwp));
241 
242 	ulwp->ul_stk = stk;
243 	ulwp->ul_mapsiz = mapsiz;
244 	ulwp->ul_guardsize = guardsize;
245 	ulwp->ul_stktop = stktop;
246 	ulwp->ul_stksiz = stksiz;
247 }
248 
249 static int stackprot;
250 
251 /*
252  * Answer the question, "Is the lwp in question really dead?"
253  * We must inquire of the operating system to be really sure
254  * because the lwp may have called lwp_exit() but it has not
255  * yet completed the exit.
256  */
257 static int
258 dead_and_buried(ulwp_t *ulwp)
259 {
260 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
261 		return (1);
262 	if (ulwp->ul_dead && ulwp->ul_detached &&
263 	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
264 		ulwp->ul_lwpid = (lwpid_t)(-1);
265 		return (1);
266 	}
267 	return (0);
268 }
269 
270 /*
271  * Attempt to keep the stack cache within the specified cache limit.
272  */
273 static void
274 trim_stack_cache(int cache_limit)
275 {
276 	ulwp_t *self = curthread;
277 	uberdata_t *udp = self->ul_uberdata;
278 	ulwp_t *prev = NULL;
279 	ulwp_t **ulwpp = &udp->lwp_stacks;
280 	ulwp_t *ulwp;
281 
282 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
283 
284 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
285 		if (dead_and_buried(ulwp)) {
286 			*ulwpp = ulwp->ul_next;
287 			if (ulwp == udp->lwp_laststack)
288 				udp->lwp_laststack = prev;
289 			hash_out(ulwp, udp);
290 			udp->nfreestack--;
291 			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
292 			/*
293 			 * Now put the free ulwp on the ulwp freelist.
294 			 */
295 			ulwp->ul_mapsiz = 0;
296 			ulwp->ul_next = NULL;
297 			if (udp->ulwp_freelist == NULL)
298 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
299 			else {
300 				udp->ulwp_lastfree->ul_next = ulwp;
301 				udp->ulwp_lastfree = ulwp;
302 			}
303 		} else {
304 			prev = ulwp;
305 			ulwpp = &ulwp->ul_next;
306 		}
307 	}
308 }
309 
310 /*
311  * Find an unused stack of the requested size
312  * or create a new stack of the requested size.
313  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
314  * thr_exit() stores 1 in the ul_dead member.
315  * thr_join() stores -1 in the ul_lwpid member.
316  */
317 static ulwp_t *
318 find_stack(size_t stksize, size_t guardsize)
319 {
320 	static size_t pagesize = 0;
321 
322 	uberdata_t *udp = curthread->ul_uberdata;
323 	size_t mapsize;
324 	ulwp_t *prev;
325 	ulwp_t *ulwp;
326 	ulwp_t **ulwpp;
327 	void *stk;
328 
329 	/*
330 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
331 	 * unless overridden by the system's configuration.
332 	 */
333 	if (stackprot == 0) {	/* do this once */
334 		long lprot = _sysconf(_SC_STACK_PROT);
335 		if (lprot <= 0)
336 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
337 		stackprot = (int)lprot;
338 	}
339 	if (pagesize == 0)	/* do this once */
340 		pagesize = _sysconf(_SC_PAGESIZE);
341 
342 	/*
343 	 * One megabyte stacks by default, but subtract off
344 	 * two pages for the system-created red zones.
345 	 * Round up a non-zero stack size to a pagesize multiple.
346 	 */
347 	if (stksize == 0)
348 		stksize = DEFAULTSTACK - 2 * pagesize;
349 	else
350 		stksize = ((stksize + pagesize - 1) & -pagesize);
351 
352 	/*
353 	 * Round up the mapping size to a multiple of pagesize.
354 	 * Note: mmap() provides at least one page of red zone
355 	 * so we deduct that from the value of guardsize.
356 	 */
357 	if (guardsize != 0)
358 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
359 	mapsize = stksize + guardsize;
360 
361 	lmutex_lock(&udp->link_lock);
362 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
363 	    (ulwp = *ulwpp) != NULL;
364 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
365 		if (ulwp->ul_mapsiz == mapsize &&
366 		    ulwp->ul_guardsize == guardsize &&
367 		    dead_and_buried(ulwp)) {
368 			/*
369 			 * The previous lwp is gone; reuse the stack.
370 			 * Remove the ulwp from the stack list.
371 			 */
372 			*ulwpp = ulwp->ul_next;
373 			ulwp->ul_next = NULL;
374 			if (ulwp == udp->lwp_laststack)
375 				udp->lwp_laststack = prev;
376 			hash_out(ulwp, udp);
377 			udp->nfreestack--;
378 			lmutex_unlock(&udp->link_lock);
379 			ulwp_clean(ulwp);
380 			return (ulwp);
381 		}
382 	}
383 
384 	/*
385 	 * None of the cached stacks matched our mapping size.
386 	 * Reduce the stack cache to get rid of possibly
387 	 * very old stacks that will never be reused.
388 	 */
389 	if (udp->nfreestack > udp->thread_stack_cache)
390 		trim_stack_cache(udp->thread_stack_cache);
391 	else if (udp->nfreestack > 0)
392 		trim_stack_cache(udp->nfreestack - 1);
393 	lmutex_unlock(&udp->link_lock);
394 
395 	/*
396 	 * Create a new stack.
397 	 */
398 	if ((stk = mmap(NULL, mapsize, stackprot,
399 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
400 		/*
401 		 * We have allocated our stack.  Now allocate the ulwp.
402 		 */
403 		ulwp = ulwp_alloc();
404 		if (ulwp == NULL)
405 			(void) munmap(stk, mapsize);
406 		else {
407 			ulwp->ul_stk = stk;
408 			ulwp->ul_mapsiz = mapsize;
409 			ulwp->ul_guardsize = guardsize;
410 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
411 			ulwp->ul_stksiz = stksize;
412 			if (guardsize)	/* protect the extra red zone */
413 				(void) mprotect(stk, guardsize, PROT_NONE);
414 		}
415 	}
416 	return (ulwp);
417 }
418 
419 /*
420  * Get a ulwp_t structure from the free list or allocate a new one.
421  * Such ulwp_t's do not have a stack allocated by the library.
422  */
423 static ulwp_t *
424 ulwp_alloc(void)
425 {
426 	ulwp_t *self = curthread;
427 	uberdata_t *udp = self->ul_uberdata;
428 	size_t tls_size;
429 	ulwp_t *prev;
430 	ulwp_t *ulwp;
431 	ulwp_t **ulwpp;
432 	caddr_t data;
433 
434 	lmutex_lock(&udp->link_lock);
435 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
436 	    (ulwp = *ulwpp) != NULL;
437 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
438 		if (dead_and_buried(ulwp)) {
439 			*ulwpp = ulwp->ul_next;
440 			ulwp->ul_next = NULL;
441 			if (ulwp == udp->ulwp_lastfree)
442 				udp->ulwp_lastfree = prev;
443 			hash_out(ulwp, udp);
444 			lmutex_unlock(&udp->link_lock);
445 			ulwp_clean(ulwp);
446 			return (ulwp);
447 		}
448 	}
449 	lmutex_unlock(&udp->link_lock);
450 
451 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
452 	data = lmalloc(sizeof (*ulwp) + tls_size);
453 	if (data != NULL) {
454 		/* LINTED pointer cast may result in improper alignment */
455 		ulwp = (ulwp_t *)(data + tls_size);
456 	}
457 	return (ulwp);
458 }
459 
460 /*
461  * Free a ulwp structure.
462  * If there is an associated stack, put it on the stack list and
463  * munmap() previously freed stacks up to the residual cache limit.
464  * Else put it on the ulwp free list and never call lfree() on it.
465  */
466 static void
467 ulwp_free(ulwp_t *ulwp)
468 {
469 	uberdata_t *udp = curthread->ul_uberdata;
470 
471 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
472 	ulwp->ul_next = NULL;
473 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
474 		/*EMPTY*/;
475 	else if (ulwp->ul_mapsiz != 0) {
476 		if (udp->lwp_stacks == NULL)
477 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
478 		else {
479 			udp->lwp_laststack->ul_next = ulwp;
480 			udp->lwp_laststack = ulwp;
481 		}
482 		if (++udp->nfreestack > udp->thread_stack_cache)
483 			trim_stack_cache(udp->thread_stack_cache);
484 	} else {
485 		if (udp->ulwp_freelist == NULL)
486 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
487 		else {
488 			udp->ulwp_lastfree->ul_next = ulwp;
489 			udp->ulwp_lastfree = ulwp;
490 		}
491 	}
492 }
493 
494 /*
495  * Find a named lwp and return a pointer to its hash list location.
496  * On success, returns with the hash lock held.
497  */
498 ulwp_t **
499 find_lwpp(thread_t tid)
500 {
501 	uberdata_t *udp = curthread->ul_uberdata;
502 	int ix = TIDHASH(tid, udp);
503 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
504 	ulwp_t *ulwp;
505 	ulwp_t **ulwpp;
506 
507 	if (tid == 0)
508 		return (NULL);
509 
510 	lmutex_lock(mp);
511 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
512 	    (ulwp = *ulwpp) != NULL;
513 	    ulwpp = &ulwp->ul_hash) {
514 		if (ulwp->ul_lwpid == tid)
515 			return (ulwpp);
516 	}
517 	lmutex_unlock(mp);
518 	return (NULL);
519 }
520 
521 /*
522  * Wake up all lwps waiting on this lwp for some reason.
523  */
524 void
525 ulwp_broadcast(ulwp_t *ulwp)
526 {
527 	ulwp_t *self = curthread;
528 	uberdata_t *udp = self->ul_uberdata;
529 
530 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
531 	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
532 }
533 
534 /*
535  * Find a named lwp and return a pointer to it.
536  * Returns with the hash lock held.
537  */
538 ulwp_t *
539 find_lwp(thread_t tid)
540 {
541 	ulwp_t *self = curthread;
542 	uberdata_t *udp = self->ul_uberdata;
543 	ulwp_t *ulwp = NULL;
544 	ulwp_t **ulwpp;
545 
546 	if (self->ul_lwpid == tid) {
547 		ulwp = self;
548 		ulwp_lock(ulwp, udp);
549 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
550 		ulwp = *ulwpp;
551 	}
552 
553 	if (ulwp && ulwp->ul_dead) {
554 		ulwp_unlock(ulwp, udp);
555 		ulwp = NULL;
556 	}
557 
558 	return (ulwp);
559 }
560 
561 int
562 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
563     long flags, thread_t *new_thread, size_t guardsize, const char *name)
564 {
565 	ulwp_t *self = curthread;
566 	uberdata_t *udp = self->ul_uberdata;
567 	ucontext_t uc;
568 	uint_t lwp_flags;
569 	thread_t tid;
570 	int error;
571 	ulwp_t *ulwp;
572 
573 	/*
574 	 * Enforce the restriction of not creating any threads
575 	 * until the primary link map has been initialized.
576 	 * Also, disallow thread creation to a child of vfork().
577 	 */
578 	if (!self->ul_primarymap || self->ul_vfork)
579 		return (ENOTSUP);
580 
581 	if (udp->hash_size == 1)
582 		finish_init();
583 
584 	if ((stk || stksize) && stksize < MINSTACK)
585 		return (EINVAL);
586 
587 	if (stk == NULL) {
588 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
589 			return (ENOMEM);
590 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
591 	} else {
592 		/* initialize the private stack */
593 		if ((ulwp = ulwp_alloc()) == NULL)
594 			return (ENOMEM);
595 		ulwp->ul_stk = stk;
596 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
597 		ulwp->ul_stksiz = stksize;
598 	}
599 	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
600 	ulwp->ul_ix = -1;
601 	ulwp->ul_errnop = &ulwp->ul_errno;
602 
603 	lwp_flags = LWP_SUSPENDED;
604 	if (flags & (THR_DETACHED|THR_DAEMON)) {
605 		flags |= THR_DETACHED;
606 		lwp_flags |= LWP_DETACHED;
607 	}
608 	if (flags & THR_DAEMON)
609 		lwp_flags |= LWP_DAEMON;
610 
611 	/* creating a thread: enforce mt-correctness in mutex_lock() */
612 	self->ul_async_safe = 1;
613 
614 	/* per-thread copies of global variables, for speed */
615 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
616 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
617 	ulwp->ul_error_detection = self->ul_error_detection;
618 	ulwp->ul_async_safe = self->ul_async_safe;
619 	ulwp->ul_max_spinners = self->ul_max_spinners;
620 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
621 	ulwp->ul_queue_spin = self->ul_queue_spin;
622 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
623 	ulwp->ul_misaligned = self->ul_misaligned;
624 
625 	/* new thread inherits creating thread's scheduling parameters */
626 	ulwp->ul_policy = self->ul_policy;
627 	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
628 	ulwp->ul_cid = self->ul_cid;
629 	ulwp->ul_rtclassid = self->ul_rtclassid;
630 
631 	ulwp->ul_primarymap = self->ul_primarymap;
632 	ulwp->ul_self = ulwp;
633 	ulwp->ul_uberdata = udp;
634 
635 	/* debugger support */
636 	ulwp->ul_usropts = flags;
637 
638 #ifdef __sparc
639 	/*
640 	 * We cache several instructions in the thread structure for use
641 	 * by the fasttrap DTrace provider. When changing this, read the
642 	 * comment in fasttrap.h for the all the other places that must
643 	 * be changed.
644 	 */
645 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
646 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
647 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
648 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
649 #endif
650 
651 	ulwp->ul_startpc = func;
652 	ulwp->ul_startarg = arg;
653 	_fpinherit(ulwp);
654 	/*
655 	 * Defer signals on the new thread until its TLS constructors
656 	 * have been called.  _thrp_setup() will call sigon() after
657 	 * it has called tls_setup().
658 	 */
659 	ulwp->ul_sigdefer = 1;
660 
661 	error = setup_context(&uc, _thrp_setup, ulwp,
662 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
663 	if (error != 0 && stk != NULL)	/* inaccessible stack */
664 		error = EFAULT;
665 
666 	/*
667 	 * Call enter_critical() to avoid being suspended until we
668 	 * have linked the new thread into the proper lists.
669 	 * This is necessary because forkall() and fork1() must
670 	 * suspend all threads and they must see a complete list.
671 	 */
672 	enter_critical(self);
673 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
674 	if (error != 0 ||
675 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
676 		exit_critical(self);
677 		ulwp->ul_lwpid = (lwpid_t)(-1);
678 		ulwp->ul_dead = 1;
679 		ulwp->ul_detached = 1;
680 		lmutex_lock(&udp->link_lock);
681 		ulwp_free(ulwp);
682 		lmutex_unlock(&udp->link_lock);
683 		return (error);
684 	}
685 	self->ul_nocancel = 0;	/* cancellation is now possible */
686 	udp->uberflags.uf_mt = 1;
687 	if (new_thread)
688 		*new_thread = tid;
689 	if (flags & THR_DETACHED)
690 		ulwp->ul_detached = 1;
691 	ulwp->ul_lwpid = tid;
692 	ulwp->ul_stop = TSTP_REGULAR;
693 	if (flags & THR_SUSPENDED)
694 		ulwp->ul_created = 1;
695 
696 	lmutex_lock(&udp->link_lock);
697 	ulwp->ul_forw = udp->all_lwps;
698 	ulwp->ul_back = udp->all_lwps->ul_back;
699 	ulwp->ul_back->ul_forw = ulwp;
700 	ulwp->ul_forw->ul_back = ulwp;
701 	hash_in(ulwp, udp);
702 	udp->nthreads++;
703 	if (flags & THR_DAEMON)
704 		udp->ndaemons++;
705 	if (flags & THR_NEW_LWP)
706 		thr_concurrency++;
707 	__libc_threaded = 1;		/* inform stdio */
708 	lmutex_unlock(&udp->link_lock);
709 
710 	if (__td_event_report(self, TD_CREATE, udp)) {
711 		self->ul_td_evbuf.eventnum = TD_CREATE;
712 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
713 		tdb_event(TD_CREATE, udp);
714 	}
715 
716 	exit_critical(self);
717 
718 	if (name != NULL && name[0] != '\0')
719 		(void) pthread_setname_np(tid, name);
720 
721 	if (!(flags & THR_SUSPENDED))
722 		(void) _thrp_continue(tid, TSTP_REGULAR);
723 
724 	return (0);
725 }
726 
727 int
728 thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
729     long flags, thread_t *new_thread)
730 {
731 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0,
732 	    NULL));
733 }
734 
735 /*
736  * A special cancellation cleanup hook for DCE.
737  * cleanuphndlr, when it is not NULL, will contain a callback
738  * function to be called before a thread is terminated in
739  * thr_exit() as a result of being cancelled.
740  */
741 static void (*cleanuphndlr)(void) = NULL;
742 
743 /*
744  * _pthread_setcleanupinit: sets the cleanup hook.
745  */
746 int
747 _pthread_setcleanupinit(void (*func)(void))
748 {
749 	cleanuphndlr = func;
750 	return (0);
751 }
752 
753 void
754 _thrp_exit()
755 {
756 	ulwp_t *self = curthread;
757 	uberdata_t *udp = self->ul_uberdata;
758 	ulwp_t *replace = NULL;
759 
760 	if (__td_event_report(self, TD_DEATH, udp)) {
761 		self->ul_td_evbuf.eventnum = TD_DEATH;
762 		tdb_event(TD_DEATH, udp);
763 	}
764 
765 	ASSERT(self->ul_sigdefer != 0);
766 
767 	lmutex_lock(&udp->link_lock);
768 	udp->nthreads--;
769 	if (self->ul_usropts & THR_NEW_LWP)
770 		thr_concurrency--;
771 	if (self->ul_usropts & THR_DAEMON)
772 		udp->ndaemons--;
773 	else if (udp->nthreads == udp->ndaemons) {
774 		/*
775 		 * We are the last non-daemon thread exiting.
776 		 * Exit the process.  We retain our TSD and TLS so
777 		 * that atexit() application functions can use them.
778 		 */
779 		lmutex_unlock(&udp->link_lock);
780 		exit(0);
781 		thr_panic("_thrp_exit(): exit(0) returned");
782 	}
783 	lmutex_unlock(&udp->link_lock);
784 
785 	/*
786 	 * tsd_exit() may call its destructor free(), thus depending on
787 	 * tmem, therefore tmem_exit() needs to be called after tsd_exit()
788 	 * and tls_exit().
789 	 */
790 	tsd_exit();		/* deallocate thread-specific data */
791 	tls_exit();		/* deallocate thread-local storage */
792 	tmem_exit();		/* deallocate tmem allocations */
793 	heldlock_exit();	/* deal with left-over held locks */
794 
795 	/* block all signals to finish exiting */
796 	block_all_signals(self);
797 	/* also prevent ourself from being suspended */
798 	enter_critical(self);
799 	rwl_free(self);
800 	lmutex_lock(&udp->link_lock);
801 	ulwp_free(self);
802 	(void) ulwp_lock(self, udp);
803 
804 	if (self->ul_mapsiz && !self->ul_detached) {
805 		/*
806 		 * We want to free the stack for reuse but must keep
807 		 * the ulwp_t struct for the benefit of thr_join().
808 		 * For this purpose we allocate a replacement ulwp_t.
809 		 */
810 		if ((replace = udp->ulwp_replace_free) == NULL)
811 			replace = lmalloc(REPLACEMENT_SIZE);
812 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
813 			udp->ulwp_replace_last = NULL;
814 	}
815 
816 	if (udp->all_lwps == self)
817 		udp->all_lwps = self->ul_forw;
818 	if (udp->all_lwps == self)
819 		udp->all_lwps = NULL;
820 	else {
821 		self->ul_forw->ul_back = self->ul_back;
822 		self->ul_back->ul_forw = self->ul_forw;
823 	}
824 	self->ul_forw = self->ul_back = NULL;
825 #if defined(THREAD_DEBUG)
826 	/* collect queue lock statistics before marking ourself dead */
827 	record_spin_locks(self);
828 #endif
829 	self->ul_dead = 1;
830 	self->ul_pleasestop = 0;
831 	if (replace != NULL) {
832 		int ix = self->ul_ix;		/* the hash index */
833 		(void) memcpy(replace, self, REPLACEMENT_SIZE);
834 		replace->ul_self = replace;
835 		replace->ul_next = NULL;	/* clone not on stack list */
836 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
837 		replace->ul_replace = 1;	/* requires clone to be freed */
838 		hash_out_unlocked(self, ix, udp);
839 		hash_in_unlocked(replace, ix, udp);
840 		ASSERT(!(self->ul_detached));
841 		self->ul_detached = 1;		/* this frees the stack */
842 		self->ul_schedctl = NULL;
843 		self->ul_schedctl_called = &udp->uberflags;
844 		set_curthread(self = replace);
845 		/*
846 		 * Having just changed the address of curthread, we
847 		 * must reset the ownership of the locks we hold so
848 		 * that assertions will not fire when we release them.
849 		 */
850 		udp->link_lock.mutex_owner = (uintptr_t)self;
851 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
852 		/*
853 		 * NOTE:
854 		 * On i386, %gs still references the original, not the
855 		 * replacement, ulwp structure.  Fetching the replacement
856 		 * curthread pointer via %gs:0 works correctly since the
857 		 * original ulwp structure will not be reallocated until
858 		 * this lwp has completed its lwp_exit() system call (see
859 		 * dead_and_buried()), but from here on out, we must make
860 		 * no references to %gs:<offset> other than %gs:0.
861 		 */
862 	}
863 	/*
864 	 * Put non-detached terminated threads in the all_zombies list.
865 	 */
866 	if (!self->ul_detached) {
867 		udp->nzombies++;
868 		if (udp->all_zombies == NULL) {
869 			ASSERT(udp->nzombies == 1);
870 			udp->all_zombies = self->ul_forw = self->ul_back = self;
871 		} else {
872 			self->ul_forw = udp->all_zombies;
873 			self->ul_back = udp->all_zombies->ul_back;
874 			self->ul_back->ul_forw = self;
875 			self->ul_forw->ul_back = self;
876 		}
877 	}
878 	/*
879 	 * Notify everyone waiting for this thread.
880 	 */
881 	ulwp_broadcast(self);
882 	(void) ulwp_unlock(self, udp);
883 	/*
884 	 * Prevent any more references to the schedctl data.
885 	 * We are exiting and continue_fork() may not find us.
886 	 * Do this just before dropping link_lock, since fork
887 	 * serializes on link_lock.
888 	 */
889 	self->ul_schedctl = NULL;
890 	self->ul_schedctl_called = &udp->uberflags;
891 	lmutex_unlock(&udp->link_lock);
892 
893 	ASSERT(self->ul_critical == 1);
894 	ASSERT(self->ul_preempt == 0);
895 	_lwp_terminate();	/* never returns */
896 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
897 }
898 
899 #if defined(THREAD_DEBUG)
900 void
901 collect_queue_statistics()
902 {
903 	uberdata_t *udp = curthread->ul_uberdata;
904 	ulwp_t *ulwp;
905 
906 	if (thread_queue_dump) {
907 		lmutex_lock(&udp->link_lock);
908 		if ((ulwp = udp->all_lwps) != NULL) {
909 			do {
910 				record_spin_locks(ulwp);
911 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
912 		}
913 		lmutex_unlock(&udp->link_lock);
914 	}
915 }
916 #endif
917 
918 static void __NORETURN
919 _thrp_exit_common(void *status, int unwind)
920 {
921 	ulwp_t *self = curthread;
922 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
923 
924 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
925 
926 	/*
927 	 * Disable cancellation and call the special DCE cancellation
928 	 * cleanup hook if it is enabled.  Do nothing else before calling
929 	 * the DCE cancellation cleanup hook; it may call longjmp() and
930 	 * never return here.
931 	 */
932 	self->ul_cancel_disabled = 1;
933 	self->ul_cancel_async = 0;
934 	self->ul_save_async = 0;
935 	self->ul_cancelable = 0;
936 	self->ul_cancel_pending = 0;
937 	set_cancel_pending_flag(self, 1);
938 	if (cancelled && cleanuphndlr != NULL)
939 		(*cleanuphndlr)();
940 
941 	/*
942 	 * Block application signals while we are exiting.
943 	 * We call out to C++, TSD, and TLS destructors while exiting
944 	 * and these are application-defined, so we cannot be assured
945 	 * that they won't reset the signal mask.  We use sigoff() to
946 	 * defer any signals that may be received as a result of this
947 	 * bad behavior.  Such signals will be lost to the process
948 	 * when the thread finishes exiting.
949 	 */
950 	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
951 	sigoff(self);
952 
953 	self->ul_rval = status;
954 
955 	/*
956 	 * If thr_exit is being called from the places where
957 	 * C++ destructors are to be called such as cancellation
958 	 * points, then set this flag. It is checked in _t_cancel()
959 	 * to decide whether _ex_unwind() is to be called or not.
960 	 */
961 	if (unwind)
962 		self->ul_unwind = 1;
963 
964 	/*
965 	 * _thrp_unwind() will eventually call _thrp_exit().
966 	 * It never returns.
967 	 */
968 	_thrp_unwind(NULL);
969 	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
970 
971 	for (;;)	/* to shut the compiler up about __NORETURN */
972 		continue;
973 }
974 
975 /*
976  * Called when a thread returns from its start function.
977  * We are at the top of the stack; no unwinding is necessary.
978  */
979 void
980 _thrp_terminate(void *status)
981 {
982 	_thrp_exit_common(status, 0);
983 }
984 
985 #pragma weak pthread_exit = thr_exit
986 #pragma weak _thr_exit = thr_exit
987 void
988 thr_exit(void *status)
989 {
990 	_thrp_exit_common(status, 1);
991 }
992 
993 int
994 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
995 {
996 	uberdata_t *udp = curthread->ul_uberdata;
997 	mutex_t *mp;
998 	void *rval;
999 	thread_t found;
1000 	ulwp_t *ulwp;
1001 	ulwp_t **ulwpp;
1002 	int replace;
1003 	int error;
1004 
1005 	if (do_cancel)
1006 		error = lwp_wait(tid, &found);
1007 	else {
1008 		while ((error = __lwp_wait(tid, &found)) == EINTR)
1009 			;
1010 	}
1011 	if (error)
1012 		return (error);
1013 
1014 	/*
1015 	 * We must hold link_lock to avoid a race condition with find_stack().
1016 	 */
1017 	lmutex_lock(&udp->link_lock);
1018 	if ((ulwpp = find_lwpp(found)) == NULL) {
1019 		/*
1020 		 * lwp_wait() found an lwp that the library doesn't know
1021 		 * about.  It must have been created with _lwp_create().
1022 		 * Just return its lwpid; we can't know its status.
1023 		 */
1024 		lmutex_unlock(&udp->link_lock);
1025 		rval = NULL;
1026 	} else {
1027 		/*
1028 		 * Remove ulwp from the hash table.
1029 		 */
1030 		ulwp = *ulwpp;
1031 		*ulwpp = ulwp->ul_hash;
1032 		ulwp->ul_hash = NULL;
1033 		/*
1034 		 * Remove ulwp from all_zombies list.
1035 		 */
1036 		ASSERT(udp->nzombies >= 1);
1037 		if (udp->all_zombies == ulwp)
1038 			udp->all_zombies = ulwp->ul_forw;
1039 		if (udp->all_zombies == ulwp)
1040 			udp->all_zombies = NULL;
1041 		else {
1042 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1043 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1044 		}
1045 		ulwp->ul_forw = ulwp->ul_back = NULL;
1046 		udp->nzombies--;
1047 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1048 		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1049 		/*
1050 		 * We can't call ulwp_unlock(ulwp) after we set
1051 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1052 		 * ulwp's hash table mutex now in order to unlock it below.
1053 		 */
1054 		mp = ulwp_mutex(ulwp, udp);
1055 		ulwp->ul_lwpid = (lwpid_t)(-1);
1056 		ulwp->ul_ix = -1;
1057 		rval = ulwp->ul_rval;
1058 		replace = ulwp->ul_replace;
1059 		lmutex_unlock(mp);
1060 		if (replace) {
1061 			ulwp->ul_next = NULL;
1062 			if (udp->ulwp_replace_free == NULL)
1063 				udp->ulwp_replace_free =
1064 				    udp->ulwp_replace_last = ulwp;
1065 			else {
1066 				udp->ulwp_replace_last->ul_next = ulwp;
1067 				udp->ulwp_replace_last = ulwp;
1068 			}
1069 		}
1070 		lmutex_unlock(&udp->link_lock);
1071 	}
1072 
1073 	if (departed != NULL)
1074 		*departed = found;
1075 	if (status != NULL)
1076 		*status = rval;
1077 	return (0);
1078 }
1079 
1080 int
1081 thr_join(thread_t tid, thread_t *departed, void **status)
1082 {
1083 	int error = _thrp_join(tid, departed, status, 1);
1084 	return ((error == EINVAL)? ESRCH : error);
1085 }
1086 
1087 /*
1088  * pthread_join() differs from Solaris thr_join():
1089  * It does not return the departed thread's id
1090  * and hence does not have a "departed" argument.
1091  * It returns EINVAL if tid refers to a detached thread.
1092  */
1093 #pragma weak _pthread_join = pthread_join
1094 int
1095 pthread_join(pthread_t tid, void **status)
1096 {
1097 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1098 }
1099 
1100 int
1101 pthread_detach(pthread_t tid)
1102 {
1103 	uberdata_t *udp = curthread->ul_uberdata;
1104 	ulwp_t *ulwp;
1105 	ulwp_t **ulwpp;
1106 	int error = 0;
1107 
1108 	if ((ulwpp = find_lwpp(tid)) == NULL)
1109 		return (ESRCH);
1110 	ulwp = *ulwpp;
1111 
1112 	if (ulwp->ul_dead) {
1113 		ulwp_unlock(ulwp, udp);
1114 		error = _thrp_join(tid, NULL, NULL, 0);
1115 	} else {
1116 		error = __lwp_detach(tid);
1117 		ulwp->ul_detached = 1;
1118 		ulwp->ul_usropts |= THR_DETACHED;
1119 		ulwp_unlock(ulwp, udp);
1120 	}
1121 	return (error);
1122 }
1123 
1124 static const char *
1125 ematch(const char *ev, const char *match)
1126 {
1127 	int c;
1128 
1129 	while ((c = *match++) != '\0') {
1130 		if (*ev++ != c)
1131 			return (NULL);
1132 	}
1133 	if (*ev++ != '=')
1134 		return (NULL);
1135 	return (ev);
1136 }
1137 
1138 static int
1139 envvar(const char *ev, const char *match, int limit)
1140 {
1141 	int val = -1;
1142 	const char *ename;
1143 
1144 	if ((ename = ematch(ev, match)) != NULL) {
1145 		int c;
1146 		for (val = 0; (c = *ename) != '\0'; ename++) {
1147 			if (!isdigit(c)) {
1148 				val = -1;
1149 				break;
1150 			}
1151 			val = val * 10 + (c - '0');
1152 			if (val > limit) {
1153 				val = limit;
1154 				break;
1155 			}
1156 		}
1157 	}
1158 	return (val);
1159 }
1160 
1161 static void
1162 etest(const char *ev)
1163 {
1164 	int value;
1165 
1166 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1167 		thread_queue_spin = value;
1168 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1169 		thread_adaptive_spin = value;
1170 	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1171 		thread_max_spinners = value;
1172 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1173 		thread_queue_fifo = value;
1174 #if defined(THREAD_DEBUG)
1175 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1176 		thread_queue_verify = value;
1177 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1178 		thread_queue_dump = value;
1179 #endif
1180 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1181 		thread_stack_cache = value;
1182 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1183 		thread_cond_wait_defer = value;
1184 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1185 		thread_error_detection = value;
1186 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1187 		thread_async_safe = value;
1188 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1189 		thread_door_noreserve = value;
1190 	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1191 		thread_locks_misaligned = value;
1192 }
1193 
1194 /*
1195  * Look for and evaluate environment variables of the form "_THREAD_*".
1196  * For compatibility with the past, we also look for environment
1197  * names of the form "LIBTHREAD_*".
1198  */
1199 static void
1200 set_thread_vars()
1201 {
1202 	extern const char **_environ;
1203 	const char **pev;
1204 	const char *ev;
1205 	char c;
1206 
1207 	if ((pev = _environ) == NULL)
1208 		return;
1209 	while ((ev = *pev++) != NULL) {
1210 		c = *ev;
1211 		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1212 			etest(ev + 8);
1213 		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1214 			etest(ev + 10);
1215 	}
1216 }
1217 
1218 /* PROBE_SUPPORT begin */
1219 #pragma weak __tnf_probe_notify
1220 extern void __tnf_probe_notify(void);
1221 /* PROBE_SUPPORT end */
1222 
1223 /* same as atexit() but private to the library */
1224 extern int _atexit(void (*)(void));
1225 
1226 /* same as _cleanup() but private to the library */
1227 extern void __cleanup(void);
1228 
1229 extern void atfork_init(void);
1230 
1231 #ifdef __amd64
1232 extern void __proc64id(void);
1233 #endif
1234 
1235 static void
1236 init_auxv_data(uberdata_t *udp)
1237 {
1238 	Dl_argsinfo_t args;
1239 
1240 	udp->ub_comm_page = NULL;
1241 	if (dlinfo(RTLD_SELF, RTLD_DI_ARGSINFO, &args) < 0)
1242 		return;
1243 
1244 	while (args.dla_auxv->a_type != AT_NULL) {
1245 		if (args.dla_auxv->a_type == AT_SUN_COMMPAGE) {
1246 			udp->ub_comm_page = args.dla_auxv->a_un.a_ptr;
1247 		}
1248 		args.dla_auxv++;
1249 	}
1250 }
1251 
1252 /*
1253  * libc_init() is called by ld.so.1 for library initialization.
1254  * We perform minimal initialization; enough to work with the main thread.
1255  */
1256 void
1257 libc_init(void)
1258 {
1259 	uberdata_t *udp = &__uberdata;
1260 	ulwp_t *oldself = __curthread();
1261 	ucontext_t uc;
1262 	ulwp_t *self;
1263 	struct rlimit rl;
1264 	caddr_t data;
1265 	size_t tls_size;
1266 	int setmask;
1267 
1268 	/*
1269 	 * For the initial stage of initialization, we must be careful
1270 	 * not to call any function that could possibly call _cerror().
1271 	 * For this purpose, we call only the raw system call wrappers.
1272 	 */
1273 
1274 #ifdef __amd64
1275 	/*
1276 	 * Gather information about cache layouts for optimized
1277 	 * AMD and Intel assembler strfoo() and memfoo() functions.
1278 	 */
1279 	__proc64id();
1280 #endif
1281 
1282 	/*
1283 	 * Every libc, regardless of which link map, must register __cleanup().
1284 	 */
1285 	(void) _atexit(__cleanup);
1286 
1287 	/*
1288 	 * Every libc, regardless of link map, needs to go through and check
1289 	 * its aux vectors.  Doing so will indicate whether or not this has
1290 	 * been given a comm page (to optimize certain system actions).
1291 	 */
1292 	init_auxv_data(udp);
1293 
1294 	/*
1295 	 * We keep our uberdata on one of (a) the first alternate link map
1296 	 * or (b) the primary link map.  We switch to the primary link map
1297 	 * and stay there once we see it.  All intermediate link maps are
1298 	 * subject to being unloaded at any time.
1299 	 */
1300 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1301 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1302 		/*
1303 		 * Each link map has its own copy of the stack protector guard
1304 		 * and must always be initialized.
1305 		 */
1306 		ssp_init();
1307 		mutex_setup();
1308 		atfork_init();	/* every link map needs atfork() processing */
1309 		init_progname();
1310 		return;
1311 	}
1312 
1313 	/*
1314 	 * To establish the main stack information, we have to get our context.
1315 	 * This is also convenient to use for getting our signal mask.
1316 	 */
1317 	uc.uc_flags = UC_ALL;
1318 	(void) __getcontext(&uc);
1319 	ASSERT(uc.uc_link == NULL);
1320 
1321 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1322 	ASSERT(primary_link_map || tls_size == 0);
1323 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1324 	if (data == NULL)
1325 		thr_panic("cannot allocate thread structure for main thread");
1326 	/* LINTED pointer cast may result in improper alignment */
1327 	self = (ulwp_t *)(data + tls_size);
1328 	init_hash_table[0].hash_bucket = self;
1329 
1330 	self->ul_sigmask = uc.uc_sigmask;
1331 	delete_reserved_signals(&self->ul_sigmask);
1332 	/*
1333 	 * Are the old and new sets different?
1334 	 * (This can happen if we are currently blocking SIGCANCEL.)
1335 	 * If so, we must explicitly set our signal mask, below.
1336 	 */
1337 	setmask =
1338 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1339 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1340 	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1341 	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1342 
1343 #ifdef __sparc
1344 	/*
1345 	 * We cache several instructions in the thread structure for use
1346 	 * by the fasttrap DTrace provider. When changing this, read the
1347 	 * comment in fasttrap.h for the all the other places that must
1348 	 * be changed.
1349 	 */
1350 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1351 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1352 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1353 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1354 #endif
1355 
1356 	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1357 	(void) getrlimit(RLIMIT_STACK, &rl);
1358 	self->ul_stksiz = rl.rlim_cur;
1359 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1360 
1361 	self->ul_forw = self->ul_back = self;
1362 	self->ul_hash = NULL;
1363 	self->ul_ix = 0;
1364 	self->ul_lwpid = 1; /* _lwp_self() */
1365 	self->ul_main = 1;
1366 	self->ul_self = self;
1367 	self->ul_policy = -1;		/* initialize only when needed */
1368 	self->ul_pri = 0;
1369 	self->ul_cid = 0;
1370 	self->ul_rtclassid = -1;
1371 	self->ul_uberdata = udp;
1372 	if (oldself != NULL) {
1373 		int i;
1374 
1375 		ASSERT(primary_link_map);
1376 		ASSERT(oldself->ul_main == 1);
1377 		self->ul_stsd = oldself->ul_stsd;
1378 		for (i = 0; i < TSD_NFAST; i++)
1379 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1380 		self->ul_tls = oldself->ul_tls;
1381 		/*
1382 		 * Retrieve all pointers to uberdata allocated
1383 		 * while running on previous link maps.
1384 		 * We would like to do a structure assignment here, but
1385 		 * gcc turns structure assignments into calls to memcpy(),
1386 		 * a function exported from libc.  We can't call any such
1387 		 * external functions until we establish curthread, below,
1388 		 * so we just call our private version of memcpy().
1389 		 */
1390 		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1391 		/*
1392 		 * These items point to global data on the primary link map.
1393 		 */
1394 		udp->thr_hash_table = init_hash_table;
1395 		udp->sigacthandler = sigacthandler;
1396 		udp->tdb.tdb_events = tdb_events;
1397 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1398 		ASSERT(udp->lwp_stacks == NULL);
1399 		ASSERT(udp->ulwp_freelist == NULL);
1400 		ASSERT(udp->ulwp_replace_free == NULL);
1401 		ASSERT(udp->hash_size == 1);
1402 	}
1403 	udp->all_lwps = self;
1404 	udp->ulwp_one = self;
1405 	udp->pid = getpid();
1406 	udp->nthreads = 1;
1407 	/*
1408 	 * In every link map, tdb_bootstrap points to the same piece of
1409 	 * allocated memory.  When the primary link map is initialized,
1410 	 * the allocated memory is assigned a pointer to the one true
1411 	 * uberdata.  This allows libc_db to initialize itself regardless
1412 	 * of which instance of libc it finds in the address space.
1413 	 */
1414 	if (udp->tdb_bootstrap == NULL)
1415 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1416 	__tdb_bootstrap = udp->tdb_bootstrap;
1417 	if (primary_link_map) {
1418 		self->ul_primarymap = 1;
1419 		udp->primary_map = 1;
1420 		*udp->tdb_bootstrap = udp;
1421 	}
1422 	/*
1423 	 * Cancellation can't happen until:
1424 	 *	pthread_cancel() is called
1425 	 * or:
1426 	 *	another thread is created
1427 	 * For now, as a single-threaded process, set the flag that tells
1428 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1429 	 */
1430 	self->ul_nocancel = 1;
1431 
1432 #if defined(__amd64)
1433 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1434 #elif defined(__i386)
1435 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1436 #endif	/* __i386 || __amd64 */
1437 	set_curthread(self);		/* redundant on i386 */
1438 	/*
1439 	 * Now curthread is established and it is safe to call any
1440 	 * function in libc except one that uses thread-local storage.
1441 	 */
1442 	self->ul_errnop = &errno;
1443 	if (oldself != NULL) {
1444 		/* tls_size was zero when oldself was allocated */
1445 		lfree(oldself, sizeof (ulwp_t));
1446 	}
1447 	ssp_init();
1448 	mutex_setup();
1449 	atfork_init();
1450 	signal_init();
1451 
1452 	/*
1453 	 * If the stack is unlimited, we set the size to zero to disable
1454 	 * stack checking.
1455 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1456 	 */
1457 	if (self->ul_stksiz == RLIM_INFINITY) {
1458 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1459 		self->ul_ustack.ss_size = 0;
1460 	} else {
1461 		self->ul_ustack.ss_sp = self->ul_stk;
1462 		self->ul_ustack.ss_size = self->ul_stksiz;
1463 	}
1464 	self->ul_ustack.ss_flags = 0;
1465 	(void) setustack(&self->ul_ustack);
1466 
1467 	/*
1468 	 * Get the variables that affect thread behavior from the environment.
1469 	 */
1470 	set_thread_vars();
1471 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1472 	udp->thread_stack_cache = thread_stack_cache;
1473 
1474 	/*
1475 	 * Make per-thread copies of global variables, for speed.
1476 	 */
1477 	self->ul_queue_fifo = (char)thread_queue_fifo;
1478 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1479 	self->ul_error_detection = (char)thread_error_detection;
1480 	self->ul_async_safe = (char)thread_async_safe;
1481 	self->ul_door_noreserve = (char)thread_door_noreserve;
1482 	self->ul_misaligned = (char)thread_locks_misaligned;
1483 	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1484 	self->ul_adaptive_spin = thread_adaptive_spin;
1485 	self->ul_queue_spin = thread_queue_spin;
1486 
1487 #if defined(__sparc) && !defined(_LP64)
1488 	if (self->ul_misaligned) {
1489 		/*
1490 		 * Tell the kernel to fix up ldx/stx instructions that
1491 		 * refer to non-8-byte aligned data instead of giving
1492 		 * the process an alignment trap and generating SIGBUS.
1493 		 *
1494 		 * Programs compiled for 32-bit sparc with the Studio SS12
1495 		 * compiler get this done for them automatically (in _init()).
1496 		 * We do it here for the benefit of programs compiled with
1497 		 * other compilers, like gcc.
1498 		 *
1499 		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1500 		 * environment variable horrible hack to work.
1501 		 */
1502 		extern void _do_fix_align(void);
1503 		_do_fix_align();
1504 	}
1505 #endif
1506 
1507 	/*
1508 	 * When we have initialized the primary link map, inform
1509 	 * the dynamic linker about our interface functions.
1510 	 * Set up our pointer to the program name.
1511 	 */
1512 	if (self->ul_primarymap)
1513 		_ld_libc((void *)rtld_funcs);
1514 	init_progname();
1515 
1516 	/*
1517 	 * Defer signals until TLS constructors have been called.
1518 	 */
1519 	sigoff(self);
1520 	tls_setup();
1521 	sigon(self);
1522 	if (setmask)
1523 		(void) restore_signals(self);
1524 
1525 	/*
1526 	 * Make private copies of __xpg4 and __xpg6 so libc can test
1527 	 * them after this point without invoking the dynamic linker.
1528 	 */
1529 	libc__xpg4 = __xpg4;
1530 	libc__xpg6 = __xpg6;
1531 
1532 	/* PROBE_SUPPORT begin */
1533 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1534 		__tnf_probe_notify();
1535 	/* PROBE_SUPPORT end */
1536 
1537 	init_sigev_thread();
1538 	init_aio();
1539 
1540 	/*
1541 	 * We need to reset __threaded dynamically at runtime, so that
1542 	 * __threaded can be bound to __threaded outside libc which may not
1543 	 * have initial value of 1 (without a copy relocation in a.out).
1544 	 */
1545 	__threaded = 1;
1546 }
1547 
1548 #pragma fini(libc_fini)
1549 void
1550 libc_fini()
1551 {
1552 	/*
1553 	 * If we are doing fini processing for the instance of libc
1554 	 * on the first alternate link map (this happens only when
1555 	 * the dynamic linker rejects a bad audit library), then clear
1556 	 * __curthread().  We abandon whatever memory was allocated by
1557 	 * lmalloc() while running on this alternate link-map but we
1558 	 * don't care (and can't find the memory in any case); we just
1559 	 * want to protect the application from this bad audit library.
1560 	 * No fini processing is done by libc in the normal case.
1561 	 */
1562 
1563 	uberdata_t *udp = curthread->ul_uberdata;
1564 
1565 	if (udp->primary_map == 0 && udp == &__uberdata)
1566 		set_curthread(NULL);
1567 }
1568 
1569 /*
1570  * finish_init is called when we are about to become multi-threaded,
1571  * that is, on the first call to thr_create().
1572  */
1573 void
1574 finish_init()
1575 {
1576 	ulwp_t *self = curthread;
1577 	uberdata_t *udp = self->ul_uberdata;
1578 	thr_hash_table_t *htp;
1579 	void *data;
1580 	int i;
1581 
1582 	/*
1583 	 * No locks needed here; we are single-threaded on the first call.
1584 	 * We can be called only after the primary link map has been set up.
1585 	 */
1586 	ASSERT(self->ul_primarymap);
1587 	ASSERT(self == udp->ulwp_one);
1588 	ASSERT(!udp->uberflags.uf_mt);
1589 	ASSERT(udp->hash_size == 1);
1590 
1591 	/*
1592 	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1593 	 */
1594 	update_sched(self);
1595 
1596 	/*
1597 	 * Allocate the queue_head array if not already allocated.
1598 	 */
1599 	if (udp->queue_head == NULL)
1600 		queue_alloc();
1601 
1602 	/*
1603 	 * Now allocate the thread hash table.
1604 	 */
1605 	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1606 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1607 	    == MAP_FAILED)
1608 		thr_panic("cannot allocate thread hash table");
1609 
1610 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1611 	udp->hash_size = HASHTBLSZ;
1612 	udp->hash_mask = HASHTBLSZ - 1;
1613 
1614 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1615 		htp->hash_lock.mutex_flag = LOCK_INITED;
1616 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1617 		htp->hash_cond.cond_magic = COND_MAGIC;
1618 	}
1619 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1620 
1621 	/*
1622 	 * Set up the SIGCANCEL handler for threads cancellation.
1623 	 */
1624 	setup_cancelsig(SIGCANCEL);
1625 
1626 	/*
1627 	 * Arrange to do special things on exit --
1628 	 * - collect queue statistics from all remaining active threads.
1629 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1630 	 * - grab assert_lock to ensure that assertion failures
1631 	 *   and a core dump take precedence over _exit().
1632 	 * (Functions are called in the reverse order of their registration.)
1633 	 */
1634 	(void) _atexit(grab_assert_lock);
1635 #if defined(THREAD_DEBUG)
1636 	(void) _atexit(dump_queue_statistics);
1637 	(void) _atexit(collect_queue_statistics);
1638 #endif
1639 }
1640 
1641 /*
1642  * Used only by postfork1_child(), below.
1643  */
1644 static void
1645 mark_dead_and_buried(ulwp_t *ulwp)
1646 {
1647 	ulwp->ul_dead = 1;
1648 	ulwp->ul_lwpid = (lwpid_t)(-1);
1649 	ulwp->ul_hash = NULL;
1650 	ulwp->ul_ix = -1;
1651 	ulwp->ul_schedctl = NULL;
1652 	ulwp->ul_schedctl_called = NULL;
1653 }
1654 
1655 /*
1656  * This is called from fork1() in the child.
1657  * Reset our data structures to reflect one lwp.
1658  */
1659 void
1660 postfork1_child()
1661 {
1662 	ulwp_t *self = curthread;
1663 	uberdata_t *udp = self->ul_uberdata;
1664 	queue_head_t *qp;
1665 	ulwp_t *next;
1666 	ulwp_t *ulwp;
1667 	int i;
1668 
1669 	/* daemon threads shouldn't call fork1(), but oh well... */
1670 	self->ul_usropts &= ~THR_DAEMON;
1671 	udp->nthreads = 1;
1672 	udp->ndaemons = 0;
1673 	udp->uberflags.uf_mt = 0;
1674 	__libc_threaded = 0;
1675 	for (i = 0; i < udp->hash_size; i++)
1676 		udp->thr_hash_table[i].hash_bucket = NULL;
1677 	self->ul_lwpid = _lwp_self();
1678 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1679 
1680 	/*
1681 	 * Some thread in the parent might have been suspended
1682 	 * while holding udp->callout_lock or udp->ld_lock.
1683 	 * Reinitialize the child's copies.
1684 	 */
1685 	(void) mutex_init(&udp->callout_lock,
1686 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1687 	(void) mutex_init(&udp->ld_lock,
1688 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1689 
1690 	/* no one in the child is on a sleep queue; reinitialize */
1691 	if ((qp = udp->queue_head) != NULL) {
1692 		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1693 		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1694 			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1695 			qp->qh_lock.mutex_flag = LOCK_INITED;
1696 			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1697 			qp->qh_hlist = &qp->qh_def_root;
1698 #if defined(THREAD_DEBUG)
1699 			qp->qh_hlen = 1;
1700 			qp->qh_hmax = 1;
1701 #endif
1702 		}
1703 	}
1704 
1705 	/*
1706 	 * Do post-fork1 processing for subsystems that need it.
1707 	 * We need to do this before unmapping all of the abandoned
1708 	 * threads' stacks, below(), because the post-fork1 actions
1709 	 * might require access to those stacks.
1710 	 */
1711 	postfork1_child_sigev_aio();
1712 	postfork1_child_sigev_mq();
1713 	postfork1_child_sigev_timer();
1714 	postfork1_child_aio();
1715 	/*
1716 	 * The above subsystems use thread pools, so this action
1717 	 * must be performed after those actions.
1718 	 */
1719 	postfork1_child_tpool();
1720 
1721 	/*
1722 	 * All lwps except ourself are gone.  Mark them so.
1723 	 * First mark all of the lwps that have already been freed.
1724 	 * Then mark and free all of the active lwps except ourself.
1725 	 * Since we are single-threaded, no locks are required here.
1726 	 */
1727 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1728 		mark_dead_and_buried(ulwp);
1729 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1730 		mark_dead_and_buried(ulwp);
1731 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1732 		next = ulwp->ul_forw;
1733 		ulwp->ul_forw = ulwp->ul_back = NULL;
1734 		mark_dead_and_buried(ulwp);
1735 		tsd_free(ulwp);
1736 		tls_free(ulwp);
1737 		rwl_free(ulwp);
1738 		heldlock_free(ulwp);
1739 		ulwp_free(ulwp);
1740 	}
1741 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1742 	if (self != udp->ulwp_one)
1743 		mark_dead_and_buried(udp->ulwp_one);
1744 	if ((ulwp = udp->all_zombies) != NULL) {
1745 		ASSERT(udp->nzombies != 0);
1746 		do {
1747 			next = ulwp->ul_forw;
1748 			ulwp->ul_forw = ulwp->ul_back = NULL;
1749 			mark_dead_and_buried(ulwp);
1750 			udp->nzombies--;
1751 			if (ulwp->ul_replace) {
1752 				ulwp->ul_next = NULL;
1753 				if (udp->ulwp_replace_free == NULL) {
1754 					udp->ulwp_replace_free =
1755 					    udp->ulwp_replace_last = ulwp;
1756 				} else {
1757 					udp->ulwp_replace_last->ul_next = ulwp;
1758 					udp->ulwp_replace_last = ulwp;
1759 				}
1760 			}
1761 		} while ((ulwp = next) != udp->all_zombies);
1762 		ASSERT(udp->nzombies == 0);
1763 		udp->all_zombies = NULL;
1764 		udp->nzombies = 0;
1765 	}
1766 	trim_stack_cache(0);
1767 }
1768 
1769 lwpid_t
1770 lwp_self(void)
1771 {
1772 	return (curthread->ul_lwpid);
1773 }
1774 
1775 #pragma weak _ti_thr_self = thr_self
1776 #pragma weak pthread_self = thr_self
1777 thread_t
1778 thr_self()
1779 {
1780 	return (curthread->ul_lwpid);
1781 }
1782 
1783 int
1784 thr_main()
1785 {
1786 	ulwp_t *self = __curthread();
1787 
1788 	return ((self == NULL)? -1 : self->ul_main);
1789 }
1790 
1791 int
1792 _thrp_cancelled(void)
1793 {
1794 	return (curthread->ul_rval == PTHREAD_CANCELED);
1795 }
1796 
1797 int
1798 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1799 {
1800 	stk->ss_sp = (void *)ulwp->ul_stktop;
1801 	stk->ss_size = ulwp->ul_stksiz;
1802 	stk->ss_flags = 0;
1803 	return (0);
1804 }
1805 
1806 #pragma weak _thr_stksegment = thr_stksegment
1807 int
1808 thr_stksegment(stack_t *stk)
1809 {
1810 	return (_thrp_stksegment(curthread, stk));
1811 }
1812 
1813 void
1814 force_continue(ulwp_t *ulwp)
1815 {
1816 #if defined(THREAD_DEBUG)
1817 	ulwp_t *self = curthread;
1818 	uberdata_t *udp = self->ul_uberdata;
1819 #endif
1820 	int error;
1821 	timespec_t ts;
1822 
1823 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1824 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1825 
1826 	for (;;) {
1827 		error = _lwp_continue(ulwp->ul_lwpid);
1828 		if (error != 0 && error != EINTR)
1829 			break;
1830 		error = 0;
1831 		if (ulwp->ul_stopping) {	/* it is stopping itself */
1832 			ts.tv_sec = 0;		/* give it a chance to run */
1833 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1834 			(void) __nanosleep(&ts, NULL);
1835 		}
1836 		if (!ulwp->ul_stopping)		/* it is running now */
1837 			break;			/* so we are done */
1838 		/*
1839 		 * It is marked as being in the process of stopping
1840 		 * itself.  Loop around and continue it again.
1841 		 * It may not have been stopped the first time.
1842 		 */
1843 	}
1844 }
1845 
1846 /*
1847  * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1848  * that is, to a point where ul_critical and ul_rtld are both zero.
1849  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1850  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1851  * If we have to drop link_lock, we store 1 through link_dropped.
1852  * If the lwp exits before it can be suspended, we return ESRCH.
1853  */
1854 int
1855 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1856 {
1857 	ulwp_t *self = curthread;
1858 	uberdata_t *udp = self->ul_uberdata;
1859 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1860 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1861 	thread_t tid = ulwp->ul_lwpid;
1862 	int ix = ulwp->ul_ix;
1863 	int error = 0;
1864 
1865 	ASSERT(whystopped == TSTP_REGULAR ||
1866 	    whystopped == TSTP_MUTATOR ||
1867 	    whystopped == TSTP_FORK);
1868 	ASSERT(ulwp != self);
1869 	ASSERT(!ulwp->ul_stop);
1870 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1871 	ASSERT(MUTEX_OWNED(mp, self));
1872 
1873 	if (link_dropped != NULL)
1874 		*link_dropped = 0;
1875 
1876 	/*
1877 	 * We must grab the target's spin lock before suspending it.
1878 	 * See the comments below and in _thrp_suspend() for why.
1879 	 */
1880 	spin_lock_set(&ulwp->ul_spinlock);
1881 	(void) ___lwp_suspend(tid);
1882 	spin_lock_clear(&ulwp->ul_spinlock);
1883 
1884 top:
1885 	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1886 	    ulwp->ul_stopping) {
1887 		/* thread is already safe */
1888 		ulwp->ul_stop |= whystopped;
1889 	} else {
1890 		/*
1891 		 * Setting ul_pleasestop causes the target thread to stop
1892 		 * itself in _thrp_suspend(), below, after we drop its lock.
1893 		 * We must continue the critical thread before dropping
1894 		 * link_lock because the critical thread may be holding
1895 		 * the queue lock for link_lock.  This is delicate.
1896 		 */
1897 		ulwp->ul_pleasestop |= whystopped;
1898 		force_continue(ulwp);
1899 		if (link_dropped != NULL) {
1900 			*link_dropped = 1;
1901 			lmutex_unlock(&udp->link_lock);
1902 			/* be sure to drop link_lock only once */
1903 			link_dropped = NULL;
1904 		}
1905 
1906 		/*
1907 		 * The thread may disappear by calling thr_exit() so we
1908 		 * cannot rely on the ulwp pointer after dropping the lock.
1909 		 * Instead, we search the hash table to find it again.
1910 		 * When we return, we may find that the thread has been
1911 		 * continued by some other thread.  The suspend/continue
1912 		 * interfaces are prone to such race conditions by design.
1913 		 */
1914 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1915 		    (ulwp->ul_pleasestop & whystopped)) {
1916 			(void) __cond_wait(cvp, mp);
1917 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1918 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1919 				if (ulwp->ul_lwpid == tid)
1920 					break;
1921 			}
1922 		}
1923 
1924 		if (ulwp == NULL || ulwp->ul_dead)
1925 			error = ESRCH;
1926 		else {
1927 			/*
1928 			 * Do another lwp_suspend() to make sure we don't
1929 			 * return until the target thread is fully stopped
1930 			 * in the kernel.  Don't apply lwp_suspend() until
1931 			 * we know that the target is not holding any
1932 			 * queue locks, that is, that it has completed
1933 			 * ulwp_unlock(self) and has, or at least is
1934 			 * about to, call lwp_suspend() on itself.  We do
1935 			 * this by grabbing the target's spin lock.
1936 			 */
1937 			ASSERT(ulwp->ul_lwpid == tid);
1938 			spin_lock_set(&ulwp->ul_spinlock);
1939 			(void) ___lwp_suspend(tid);
1940 			spin_lock_clear(&ulwp->ul_spinlock);
1941 			/*
1942 			 * If some other thread did a thr_continue()
1943 			 * on the target thread we have to start over.
1944 			 */
1945 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1946 				goto top;
1947 		}
1948 	}
1949 
1950 	(void) cond_broadcast(cvp);
1951 	lmutex_unlock(mp);
1952 	return (error);
1953 }
1954 
1955 int
1956 _thrp_suspend(thread_t tid, uchar_t whystopped)
1957 {
1958 	ulwp_t *self = curthread;
1959 	uberdata_t *udp = self->ul_uberdata;
1960 	ulwp_t *ulwp;
1961 	int error = 0;
1962 
1963 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1964 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1965 
1966 	/*
1967 	 * We can't suspend anyone except ourself while
1968 	 * some other thread is performing a fork.
1969 	 * This also allows only one suspension at a time.
1970 	 */
1971 	if (tid != self->ul_lwpid)
1972 		fork_lock_enter();
1973 
1974 	if ((ulwp = find_lwp(tid)) == NULL)
1975 		error = ESRCH;
1976 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1977 		ulwp_unlock(ulwp, udp);
1978 		error = EINVAL;
1979 	} else if (ulwp->ul_stop) {	/* already stopped */
1980 		ulwp->ul_stop |= whystopped;
1981 		ulwp_broadcast(ulwp);
1982 		ulwp_unlock(ulwp, udp);
1983 	} else if (ulwp != self) {
1984 		/*
1985 		 * After suspending the other thread, move it out of a
1986 		 * critical section and deal with the schedctl mappings.
1987 		 * safe_suspend() suspends the other thread, calls
1988 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1989 		 */
1990 		error = safe_suspend(ulwp, whystopped, NULL);
1991 	} else {
1992 		int schedctl_after_fork = 0;
1993 
1994 		/*
1995 		 * We are suspending ourself.  We must not take a signal
1996 		 * until we return from lwp_suspend() and clear ul_stopping.
1997 		 * This is to guard against siglongjmp().
1998 		 */
1999 		enter_critical(self);
2000 		self->ul_sp = stkptr();
2001 		_flush_windows();	/* sparc */
2002 		self->ul_pleasestop = 0;
2003 		self->ul_stop |= whystopped;
2004 		/*
2005 		 * Grab our spin lock before dropping ulwp_mutex(self).
2006 		 * This prevents the suspending thread from applying
2007 		 * lwp_suspend() to us before we emerge from
2008 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
2009 		 */
2010 		spin_lock_set(&self->ul_spinlock);
2011 		self->ul_stopping = 1;
2012 		ulwp_broadcast(self);
2013 		ulwp_unlock(self, udp);
2014 		/*
2015 		 * From this point until we return from lwp_suspend(),
2016 		 * we must not call any function that might invoke the
2017 		 * dynamic linker, that is, we can only call functions
2018 		 * private to the library.
2019 		 *
2020 		 * Also, this is a nasty race condition for a process
2021 		 * that is undergoing a forkall() operation:
2022 		 * Once we clear our spinlock (below), we are vulnerable
2023 		 * to being suspended by the forkall() thread before
2024 		 * we manage to suspend ourself in ___lwp_suspend().
2025 		 * See safe_suspend() and force_continue().
2026 		 *
2027 		 * To avoid a SIGSEGV due to the disappearance
2028 		 * of the schedctl mappings in the child process,
2029 		 * which can happen in spin_lock_clear() if we
2030 		 * are suspended while we are in the middle of
2031 		 * its call to preempt(), we preemptively clear
2032 		 * our own schedctl pointer before dropping our
2033 		 * spinlock.  We reinstate it, in both the parent
2034 		 * and (if this really is a forkall()) the child.
2035 		 */
2036 		if (whystopped & TSTP_FORK) {
2037 			schedctl_after_fork = 1;
2038 			self->ul_schedctl = NULL;
2039 			self->ul_schedctl_called = &udp->uberflags;
2040 		}
2041 		spin_lock_clear(&self->ul_spinlock);
2042 		(void) ___lwp_suspend(tid);
2043 		/*
2044 		 * Somebody else continued us.
2045 		 * We can't grab ulwp_lock(self)
2046 		 * until after clearing ul_stopping.
2047 		 * force_continue() relies on this.
2048 		 */
2049 		self->ul_stopping = 0;
2050 		self->ul_sp = 0;
2051 		if (schedctl_after_fork) {
2052 			self->ul_schedctl_called = NULL;
2053 			self->ul_schedctl = NULL;
2054 			(void) setup_schedctl();
2055 		}
2056 		ulwp_lock(self, udp);
2057 		ulwp_broadcast(self);
2058 		ulwp_unlock(self, udp);
2059 		exit_critical(self);
2060 	}
2061 
2062 	if (tid != self->ul_lwpid)
2063 		fork_lock_exit();
2064 
2065 	return (error);
2066 }
2067 
2068 /*
2069  * Suspend all lwps other than ourself in preparation for fork.
2070  */
2071 void
2072 suspend_fork()
2073 {
2074 	ulwp_t *self = curthread;
2075 	uberdata_t *udp = self->ul_uberdata;
2076 	ulwp_t *ulwp;
2077 	int link_dropped;
2078 
2079 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2080 top:
2081 	lmutex_lock(&udp->link_lock);
2082 
2083 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2084 		ulwp_lock(ulwp, udp);
2085 		if (ulwp->ul_stop) {	/* already stopped */
2086 			ulwp->ul_stop |= TSTP_FORK;
2087 			ulwp_broadcast(ulwp);
2088 			ulwp_unlock(ulwp, udp);
2089 		} else {
2090 			/*
2091 			 * Move the stopped lwp out of a critical section.
2092 			 */
2093 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2094 			    link_dropped)
2095 				goto top;
2096 		}
2097 	}
2098 
2099 	lmutex_unlock(&udp->link_lock);
2100 }
2101 
2102 void
2103 continue_fork(int child)
2104 {
2105 	ulwp_t *self = curthread;
2106 	uberdata_t *udp = self->ul_uberdata;
2107 	ulwp_t *ulwp;
2108 
2109 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2110 
2111 	/*
2112 	 * Clear the schedctl pointers in the child of forkall().
2113 	 */
2114 	if (child) {
2115 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2116 			ulwp->ul_schedctl_called =
2117 			    ulwp->ul_dead? &udp->uberflags : NULL;
2118 			ulwp->ul_schedctl = NULL;
2119 		}
2120 	}
2121 
2122 	/*
2123 	 * Set all lwps that were stopped for fork() running again.
2124 	 */
2125 	lmutex_lock(&udp->link_lock);
2126 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2127 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2128 		lmutex_lock(mp);
2129 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2130 		ulwp->ul_stop &= ~TSTP_FORK;
2131 		ulwp_broadcast(ulwp);
2132 		if (!ulwp->ul_stop)
2133 			force_continue(ulwp);
2134 		lmutex_unlock(mp);
2135 	}
2136 	lmutex_unlock(&udp->link_lock);
2137 }
2138 
2139 int
2140 _thrp_continue(thread_t tid, uchar_t whystopped)
2141 {
2142 	uberdata_t *udp = curthread->ul_uberdata;
2143 	ulwp_t *ulwp;
2144 	mutex_t *mp;
2145 	int error = 0;
2146 
2147 	ASSERT(whystopped == TSTP_REGULAR ||
2148 	    whystopped == TSTP_MUTATOR);
2149 
2150 	/*
2151 	 * We single-thread the entire thread suspend/continue mechanism.
2152 	 */
2153 	fork_lock_enter();
2154 
2155 	if ((ulwp = find_lwp(tid)) == NULL) {
2156 		fork_lock_exit();
2157 		return (ESRCH);
2158 	}
2159 
2160 	mp = ulwp_mutex(ulwp, udp);
2161 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2162 		error = EINVAL;
2163 	} else if (ulwp->ul_stop & whystopped) {
2164 		ulwp->ul_stop &= ~whystopped;
2165 		ulwp_broadcast(ulwp);
2166 		if (!ulwp->ul_stop) {
2167 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2168 				ulwp->ul_sp = 0;
2169 				ulwp->ul_created = 0;
2170 			}
2171 			force_continue(ulwp);
2172 		}
2173 	}
2174 	lmutex_unlock(mp);
2175 
2176 	fork_lock_exit();
2177 	return (error);
2178 }
2179 
2180 int
2181 thr_suspend(thread_t tid)
2182 {
2183 	return (_thrp_suspend(tid, TSTP_REGULAR));
2184 }
2185 
2186 int
2187 thr_continue(thread_t tid)
2188 {
2189 	return (_thrp_continue(tid, TSTP_REGULAR));
2190 }
2191 
2192 void
2193 thr_yield()
2194 {
2195 	yield();
2196 }
2197 
2198 #pragma weak pthread_kill = thr_kill
2199 #pragma weak _thr_kill = thr_kill
2200 int
2201 thr_kill(thread_t tid, int sig)
2202 {
2203 	if (sig == SIGCANCEL)
2204 		return (EINVAL);
2205 	return (_lwp_kill(tid, sig));
2206 }
2207 
2208 /*
2209  * Exit a critical section, take deferred actions if necessary.
2210  * Called from exit_critical() and from sigon().
2211  */
2212 void
2213 do_exit_critical()
2214 {
2215 	ulwp_t *self = curthread;
2216 	int sig;
2217 
2218 	ASSERT(self->ul_critical == 0);
2219 
2220 	/*
2221 	 * Don't suspend ourself or take a deferred signal while dying
2222 	 * or while executing inside the dynamic linker (ld.so.1).
2223 	 */
2224 	if (self->ul_dead || self->ul_rtld)
2225 		return;
2226 
2227 	while (self->ul_pleasestop ||
2228 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2229 		/*
2230 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2231 		 * by keeping self->ul_critical == 1 here.
2232 		 */
2233 		self->ul_critical++;
2234 		while (self->ul_pleasestop) {
2235 			/*
2236 			 * Guard against suspending ourself while on a sleep
2237 			 * queue.  See the comments in call_user_handler().
2238 			 */
2239 			unsleep_self();
2240 			set_parking_flag(self, 0);
2241 			(void) _thrp_suspend(self->ul_lwpid,
2242 			    self->ul_pleasestop);
2243 		}
2244 		self->ul_critical--;
2245 
2246 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2247 			/*
2248 			 * Clear ul_cursig before proceeding.
2249 			 * This protects us from the dynamic linker's
2250 			 * calls to bind_guard()/bind_clear() in the
2251 			 * event that it is invoked to resolve a symbol
2252 			 * like take_deferred_signal() below.
2253 			 */
2254 			self->ul_cursig = 0;
2255 			take_deferred_signal(sig);
2256 			ASSERT(self->ul_cursig == 0);
2257 		}
2258 	}
2259 	ASSERT(self->ul_critical == 0);
2260 }
2261 
2262 /*
2263  * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2264  * (ld.so.1) when it has do do something, like resolve a symbol to be called
2265  * by the application or one of its libraries.  _ti_bind_guard() is called
2266  * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2267  * application.  The dynamic linker gets special dispensation from libc to
2268  * run in a critical region (all signals deferred and no thread suspension
2269  * or forking allowed), and to be immune from cancellation for the duration.
2270  */
2271 int
2272 _ti_bind_guard(int flags)
2273 {
2274 	ulwp_t *self = curthread;
2275 	uberdata_t *udp = self->ul_uberdata;
2276 	int bindflag = (flags & THR_FLG_RTLD);
2277 
2278 	if ((self->ul_bindflags & bindflag) == bindflag)
2279 		return (0);
2280 	self->ul_bindflags |= bindflag;
2281 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2282 		sigoff(self);	/* see no signals while holding ld_lock */
2283 		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2284 		(void) mutex_lock(&udp->ld_lock);
2285 	}
2286 	enter_critical(self);
2287 	self->ul_save_state = self->ul_cancel_disabled;
2288 	self->ul_cancel_disabled = 1;
2289 	set_cancel_pending_flag(self, 0);
2290 	return (1);
2291 }
2292 
2293 int
2294 _ti_bind_clear(int flags)
2295 {
2296 	ulwp_t *self = curthread;
2297 	uberdata_t *udp = self->ul_uberdata;
2298 	int bindflag = (flags & THR_FLG_RTLD);
2299 
2300 	if ((self->ul_bindflags & bindflag) == 0)
2301 		return (self->ul_bindflags);
2302 	self->ul_bindflags &= ~bindflag;
2303 	self->ul_cancel_disabled = self->ul_save_state;
2304 	set_cancel_pending_flag(self, 0);
2305 	exit_critical(self);
2306 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2307 		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2308 			(void) mutex_unlock(&udp->ld_lock);
2309 			self->ul_rtld--;
2310 			sigon(self);	/* reenable signals */
2311 		}
2312 	}
2313 	return (self->ul_bindflags);
2314 }
2315 
2316 /*
2317  * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2318  * a critical region in libc.  Return zero if not, else return non-zero.
2319  */
2320 int
2321 _ti_critical(void)
2322 {
2323 	ulwp_t *self = curthread;
2324 	int level = self->ul_critical;
2325 
2326 	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2327 		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2328 	return (level - 1);
2329 }
2330 
2331 /*
2332  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2333  * it does in the old libthread (see the comments in cond_wait_queue()).
2334  * Also, signals are deferred at thread startup until TLS constructors
2335  * have all been called, at which time _thrp_setup() calls sigon().
2336  *
2337  * _sigoff() and _sigon() are external consolidation-private interfaces to
2338  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2339  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2340  * (librtc.so) to defer signals during its critical sections (not to be
2341  * confused with libc critical sections [see exit_critical() above]).
2342  */
2343 void
2344 _sigoff(void)
2345 {
2346 	ulwp_t *self = curthread;
2347 
2348 	sigoff(self);
2349 }
2350 
2351 void
2352 _sigon(void)
2353 {
2354 	ulwp_t *self = curthread;
2355 
2356 	ASSERT(self->ul_sigdefer > 0);
2357 	sigon(self);
2358 }
2359 
2360 int
2361 thr_getconcurrency()
2362 {
2363 	return (thr_concurrency);
2364 }
2365 
2366 int
2367 pthread_getconcurrency()
2368 {
2369 	return (pthread_concurrency);
2370 }
2371 
2372 int
2373 thr_setconcurrency(int new_level)
2374 {
2375 	uberdata_t *udp = curthread->ul_uberdata;
2376 
2377 	if (new_level < 0)
2378 		return (EINVAL);
2379 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2380 		return (EAGAIN);
2381 	lmutex_lock(&udp->link_lock);
2382 	if (new_level > thr_concurrency)
2383 		thr_concurrency = new_level;
2384 	lmutex_unlock(&udp->link_lock);
2385 	return (0);
2386 }
2387 
2388 int
2389 pthread_setconcurrency(int new_level)
2390 {
2391 	if (new_level < 0)
2392 		return (EINVAL);
2393 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2394 		return (EAGAIN);
2395 	pthread_concurrency = new_level;
2396 	return (0);
2397 }
2398 
2399 size_t
2400 thr_min_stack(void)
2401 {
2402 	return (MINSTACK);
2403 }
2404 
2405 int
2406 __nthreads(void)
2407 {
2408 	return (curthread->ul_uberdata->nthreads);
2409 }
2410 
2411 /* "/proc/self/lwp/%u/lwpname" w/o stdio */
2412 static void
2413 lwpname_path(pthread_t tid, char *buf, size_t bufsize)
2414 {
2415 	(void) strlcpy(buf, "/proc/self/lwp/", bufsize);
2416 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2417 	(void) strlcat(buf, "/lwpname", bufsize);
2418 }
2419 
2420 #pragma weak pthread_setname_np = thr_setname
2421 int
2422 thr_setname(pthread_t tid, const char *name)
2423 {
2424 	extern ssize_t __write(int, const void *, size_t);
2425 	char path[PATH_MAX];
2426 	int saved_errno;
2427 	size_t len;
2428 	ssize_t n;
2429 	int fd;
2430 
2431 	if (name == NULL)
2432 		name = "";
2433 
2434 	len = strlen(name) + 1;
2435 	if (len > THREAD_NAME_MAX)
2436 		return (ERANGE);
2437 
2438 	lwpname_path(tid, path, sizeof (path));
2439 
2440 	if ((fd = __open(path, O_WRONLY, 0)) < 0) {
2441 		if (errno == ENOENT)
2442 			errno = ESRCH;
2443 		return (errno);
2444 	}
2445 
2446 	n = __write(fd, name, len);
2447 	saved_errno = errno;
2448 	(void) __close(fd);
2449 
2450 	if (n < 0)
2451 		return (saved_errno);
2452 	if (n != len)
2453 		return (EFAULT);
2454 	return (0);
2455 }
2456 
2457 #pragma weak pthread_getname_np = thr_getname
2458 int
2459 thr_getname(pthread_t tid, char *buf, size_t bufsize)
2460 {
2461 	extern ssize_t __read(int, void *, size_t);
2462 	char name[THREAD_NAME_MAX];
2463 	char path[PATH_MAX];
2464 	int saved_errno;
2465 	ssize_t n;
2466 	int fd;
2467 
2468 	if (buf == NULL)
2469 		return (EINVAL);
2470 
2471 	lwpname_path(tid, path, sizeof (path));
2472 
2473 	if ((fd = __open(path, O_RDONLY, 0)) < 0) {
2474 		if (errno == ENOENT)
2475 			errno = ESRCH;
2476 		return (errno);
2477 	}
2478 
2479 	n = __read(fd, name, sizeof (name));
2480 	saved_errno = errno;
2481 	(void) __close(fd);
2482 
2483 	if (n < 0)
2484 		return (saved_errno);
2485 	if (n != sizeof (name))
2486 		return (EFAULT);
2487 	if (strlcpy(buf, name, bufsize) >= bufsize)
2488 		return (ERANGE);
2489 	return (0);
2490 }
2491 
2492 /*
2493  * XXX
2494  * The remainder of this file implements the private interfaces to java for
2495  * garbage collection.  It is no longer used, at least by java 1.2.
2496  * It can all go away once all old JVMs have disappeared.
2497  */
2498 
2499 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2500 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2501 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2502 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2503 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2504 
2505 /*
2506  * Get the available register state for the target thread.
2507  * Return non-volatile registers: TRS_NONVOLATILE
2508  */
2509 #pragma weak _thr_getstate = thr_getstate
2510 int
2511 thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2512 {
2513 	ulwp_t *self = curthread;
2514 	uberdata_t *udp = self->ul_uberdata;
2515 	ulwp_t **ulwpp;
2516 	ulwp_t *ulwp;
2517 	int error = 0;
2518 	int trs_flag = TRS_LWPID;
2519 
2520 	if (tid == 0 || self->ul_lwpid == tid) {
2521 		ulwp = self;
2522 		ulwp_lock(ulwp, udp);
2523 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2524 		ulwp = *ulwpp;
2525 	} else {
2526 		if (flag)
2527 			*flag = TRS_INVALID;
2528 		return (ESRCH);
2529 	}
2530 
2531 	if (ulwp->ul_dead) {
2532 		trs_flag = TRS_INVALID;
2533 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2534 		error = EINVAL;
2535 		trs_flag = TRS_INVALID;
2536 	} else if (ulwp->ul_stop) {
2537 		trs_flag = TRS_NONVOLATILE;
2538 		getgregs(ulwp, rs);
2539 	}
2540 
2541 	if (flag)
2542 		*flag = trs_flag;
2543 	if (lwp)
2544 		*lwp = tid;
2545 	if (ss != NULL)
2546 		(void) _thrp_stksegment(ulwp, ss);
2547 
2548 	ulwp_unlock(ulwp, udp);
2549 	return (error);
2550 }
2551 
2552 /*
2553  * Set the appropriate register state for the target thread.
2554  * This is not used by java.  It exists solely for the MSTC test suite.
2555  */
2556 #pragma weak _thr_setstate = thr_setstate
2557 int
2558 thr_setstate(thread_t tid, int flag, gregset_t rs)
2559 {
2560 	uberdata_t *udp = curthread->ul_uberdata;
2561 	ulwp_t *ulwp;
2562 	int error = 0;
2563 
2564 	if ((ulwp = find_lwp(tid)) == NULL)
2565 		return (ESRCH);
2566 
2567 	if (!ulwp->ul_stop && !suspendedallmutators)
2568 		error = EINVAL;
2569 	else if (rs != NULL) {
2570 		switch (flag) {
2571 		case TRS_NONVOLATILE:
2572 			/* do /proc stuff here? */
2573 			if (ulwp->ul_stop)
2574 				setgregs(ulwp, rs);
2575 			else
2576 				error = EINVAL;
2577 			break;
2578 		case TRS_LWPID:		/* do /proc stuff here? */
2579 		default:
2580 			error = EINVAL;
2581 			break;
2582 		}
2583 	}
2584 
2585 	ulwp_unlock(ulwp, udp);
2586 	return (error);
2587 }
2588 
2589 int
2590 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2591 {
2592 	extern ssize_t __pread(int, void *, size_t, off_t);
2593 	char buf[100];
2594 	int fd;
2595 
2596 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2597 	(void) strcpy(buf, "/proc/self/lwp/");
2598 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2599 	(void) strcat(buf, "/lwpstatus");
2600 	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2601 		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2602 			if (sp->pr_flags & PR_STOPPED) {
2603 				(void) __close(fd);
2604 				return (0);
2605 			}
2606 			yield();	/* give it a chance to stop */
2607 		}
2608 		(void) __close(fd);
2609 	}
2610 	return (-1);
2611 }
2612 
2613 int
2614 putlwpregs(thread_t tid, prgregset_t prp)
2615 {
2616 	extern ssize_t __writev(int, const struct iovec *, int);
2617 	char buf[100];
2618 	int fd;
2619 	long dstop_sreg[2];
2620 	long run_null[2];
2621 	iovec_t iov[3];
2622 
2623 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2624 	(void) strcpy(buf, "/proc/self/lwp/");
2625 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2626 	(void) strcat(buf, "/lwpctl");
2627 	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2628 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2629 		dstop_sreg[1] = PCSREG;		/* set the registers */
2630 		iov[0].iov_base = (caddr_t)dstop_sreg;
2631 		iov[0].iov_len = sizeof (dstop_sreg);
2632 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2633 		iov[1].iov_len = sizeof (prgregset_t);
2634 		run_null[0] = PCRUN;		/* make it runnable again */
2635 		run_null[1] = 0;
2636 		iov[2].iov_base = (caddr_t)run_null;
2637 		iov[2].iov_len = sizeof (run_null);
2638 		if (__writev(fd, iov, 3) >= 0) {
2639 			(void) __close(fd);
2640 			return (0);
2641 		}
2642 		(void) __close(fd);
2643 	}
2644 	return (-1);
2645 }
2646 
2647 static ulong_t
2648 gettsp_slow(thread_t tid)
2649 {
2650 	char buf[100];
2651 	struct lwpstatus status;
2652 
2653 	if (getlwpstatus(tid, &status) != 0) {
2654 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2655 		(void) strcpy(buf, "__gettsp(");
2656 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2657 		(void) strcat(buf, "): can't read lwpstatus");
2658 		thr_panic(buf);
2659 	}
2660 	return (status.pr_reg[R_SP]);
2661 }
2662 
2663 ulong_t
2664 __gettsp(thread_t tid)
2665 {
2666 	uberdata_t *udp = curthread->ul_uberdata;
2667 	ulwp_t *ulwp;
2668 	ulong_t result;
2669 
2670 	if ((ulwp = find_lwp(tid)) == NULL)
2671 		return (0);
2672 
2673 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2674 		ulwp_unlock(ulwp, udp);
2675 		return (result);
2676 	}
2677 
2678 	result = gettsp_slow(tid);
2679 	ulwp_unlock(ulwp, udp);
2680 	return (result);
2681 }
2682 
2683 /*
2684  * This tells java stack walkers how to find the ucontext
2685  * structure passed to signal handlers.
2686  */
2687 #pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2688 void
2689 thr_sighndlrinfo(void (**func)(), int *funcsize)
2690 {
2691 	*func = &__sighndlr;
2692 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2693 }
2694 
2695 /*
2696  * Mark a thread a mutator or reset a mutator to being a default,
2697  * non-mutator thread.
2698  */
2699 #pragma weak _thr_setmutator = thr_setmutator
2700 int
2701 thr_setmutator(thread_t tid, int enabled)
2702 {
2703 	ulwp_t *self = curthread;
2704 	uberdata_t *udp = self->ul_uberdata;
2705 	ulwp_t *ulwp;
2706 	int error;
2707 	int cancel_state;
2708 
2709 	enabled = enabled? 1 : 0;
2710 top:
2711 	if (tid == 0) {
2712 		ulwp = self;
2713 		ulwp_lock(ulwp, udp);
2714 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2715 		return (ESRCH);
2716 	}
2717 
2718 	/*
2719 	 * The target thread should be the caller itself or a suspended thread.
2720 	 * This prevents the target from also changing its ul_mutator field.
2721 	 */
2722 	error = 0;
2723 	if (ulwp != self && !ulwp->ul_stop && enabled)
2724 		error = EINVAL;
2725 	else if (ulwp->ul_mutator != enabled) {
2726 		lmutex_lock(&mutatorslock);
2727 		if (mutatorsbarrier) {
2728 			ulwp_unlock(ulwp, udp);
2729 			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2730 			    &cancel_state);
2731 			while (mutatorsbarrier)
2732 				(void) cond_wait(&mutatorscv, &mutatorslock);
2733 			(void) pthread_setcancelstate(cancel_state, NULL);
2734 			lmutex_unlock(&mutatorslock);
2735 			goto top;
2736 		}
2737 		ulwp->ul_mutator = enabled;
2738 		lmutex_unlock(&mutatorslock);
2739 	}
2740 
2741 	ulwp_unlock(ulwp, udp);
2742 	return (error);
2743 }
2744 
2745 /*
2746  * Establish a barrier against new mutators.  Any non-mutator trying
2747  * to become a mutator is suspended until the barrier is removed.
2748  */
2749 #pragma weak _thr_mutators_barrier = thr_mutators_barrier
2750 void
2751 thr_mutators_barrier(int enabled)
2752 {
2753 	int oldvalue;
2754 	int cancel_state;
2755 
2756 	lmutex_lock(&mutatorslock);
2757 
2758 	/*
2759 	 * Wait if trying to set the barrier while it is already set.
2760 	 */
2761 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2762 	while (mutatorsbarrier && enabled)
2763 		(void) cond_wait(&mutatorscv, &mutatorslock);
2764 	(void) pthread_setcancelstate(cancel_state, NULL);
2765 
2766 	oldvalue = mutatorsbarrier;
2767 	mutatorsbarrier = enabled;
2768 	/*
2769 	 * Wakeup any blocked non-mutators when barrier is removed.
2770 	 */
2771 	if (oldvalue && !enabled)
2772 		(void) cond_broadcast(&mutatorscv);
2773 	lmutex_unlock(&mutatorslock);
2774 }
2775 
2776 /*
2777  * Suspend the set of all mutators except for the caller.  The list
2778  * of actively running threads is searched and only the mutators
2779  * in this list are suspended.  Actively running non-mutators remain
2780  * running.  Any other thread is suspended.
2781  */
2782 #pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2783 int
2784 thr_suspend_allmutators(void)
2785 {
2786 	ulwp_t *self = curthread;
2787 	uberdata_t *udp = self->ul_uberdata;
2788 	ulwp_t *ulwp;
2789 	int link_dropped;
2790 
2791 	/*
2792 	 * We single-thread the entire thread suspend/continue mechanism.
2793 	 */
2794 	fork_lock_enter();
2795 
2796 top:
2797 	lmutex_lock(&udp->link_lock);
2798 
2799 	if (suspendingallmutators || suspendedallmutators) {
2800 		lmutex_unlock(&udp->link_lock);
2801 		fork_lock_exit();
2802 		return (EINVAL);
2803 	}
2804 	suspendingallmutators = 1;
2805 
2806 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2807 		ulwp_lock(ulwp, udp);
2808 		if (!ulwp->ul_mutator) {
2809 			ulwp_unlock(ulwp, udp);
2810 		} else if (ulwp->ul_stop) {	/* already stopped */
2811 			ulwp->ul_stop |= TSTP_MUTATOR;
2812 			ulwp_broadcast(ulwp);
2813 			ulwp_unlock(ulwp, udp);
2814 		} else {
2815 			/*
2816 			 * Move the stopped lwp out of a critical section.
2817 			 */
2818 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2819 			    link_dropped) {
2820 				suspendingallmutators = 0;
2821 				goto top;
2822 			}
2823 		}
2824 	}
2825 
2826 	suspendedallmutators = 1;
2827 	suspendingallmutators = 0;
2828 	lmutex_unlock(&udp->link_lock);
2829 	fork_lock_exit();
2830 	return (0);
2831 }
2832 
2833 /*
2834  * Suspend the target mutator.  The caller is permitted to suspend
2835  * itself.  If a mutator barrier is enabled, the caller will suspend
2836  * itself as though it had been suspended by thr_suspend_allmutators().
2837  * When the barrier is removed, this thread will be resumed.  Any
2838  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2839  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2840  */
2841 #pragma weak _thr_suspend_mutator = thr_suspend_mutator
2842 int
2843 thr_suspend_mutator(thread_t tid)
2844 {
2845 	if (tid == 0)
2846 		tid = curthread->ul_lwpid;
2847 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2848 }
2849 
2850 /*
2851  * Resume the set of all suspended mutators.
2852  */
2853 #pragma weak _thr_continue_allmutators = thr_continue_allmutators
2854 int
2855 thr_continue_allmutators()
2856 {
2857 	ulwp_t *self = curthread;
2858 	uberdata_t *udp = self->ul_uberdata;
2859 	ulwp_t *ulwp;
2860 
2861 	/*
2862 	 * We single-thread the entire thread suspend/continue mechanism.
2863 	 */
2864 	fork_lock_enter();
2865 
2866 	lmutex_lock(&udp->link_lock);
2867 	if (!suspendedallmutators) {
2868 		lmutex_unlock(&udp->link_lock);
2869 		fork_lock_exit();
2870 		return (EINVAL);
2871 	}
2872 	suspendedallmutators = 0;
2873 
2874 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2875 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2876 		lmutex_lock(mp);
2877 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2878 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2879 			ulwp_broadcast(ulwp);
2880 			if (!ulwp->ul_stop)
2881 				force_continue(ulwp);
2882 		}
2883 		lmutex_unlock(mp);
2884 	}
2885 
2886 	lmutex_unlock(&udp->link_lock);
2887 	fork_lock_exit();
2888 	return (0);
2889 }
2890 
2891 /*
2892  * Resume a suspended mutator.
2893  */
2894 #pragma weak _thr_continue_mutator = thr_continue_mutator
2895 int
2896 thr_continue_mutator(thread_t tid)
2897 {
2898 	return (_thrp_continue(tid, TSTP_MUTATOR));
2899 }
2900 
2901 #pragma weak _thr_wait_mutator = thr_wait_mutator
2902 int
2903 thr_wait_mutator(thread_t tid, int dontwait)
2904 {
2905 	uberdata_t *udp = curthread->ul_uberdata;
2906 	ulwp_t *ulwp;
2907 	int cancel_state;
2908 	int error = 0;
2909 
2910 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2911 top:
2912 	if ((ulwp = find_lwp(tid)) == NULL) {
2913 		(void) pthread_setcancelstate(cancel_state, NULL);
2914 		return (ESRCH);
2915 	}
2916 
2917 	if (!ulwp->ul_mutator)
2918 		error = EINVAL;
2919 	else if (dontwait) {
2920 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2921 			error = EWOULDBLOCK;
2922 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2923 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2924 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2925 
2926 		(void) cond_wait(cvp, mp);
2927 		(void) lmutex_unlock(mp);
2928 		goto top;
2929 	}
2930 
2931 	ulwp_unlock(ulwp, udp);
2932 	(void) pthread_setcancelstate(cancel_state, NULL);
2933 	return (error);
2934 }
2935 
2936 /* PROBE_SUPPORT begin */
2937 
2938 void
2939 thr_probe_setup(void *data)
2940 {
2941 	curthread->ul_tpdp = data;
2942 }
2943 
2944 static void *
2945 _thread_probe_getfunc()
2946 {
2947 	return (curthread->ul_tpdp);
2948 }
2949 
2950 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2951 
2952 /* ARGSUSED */
2953 void
2954 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2955 {
2956 	/* never called */
2957 }
2958 
2959 /* ARGSUSED */
2960 void
2961 _resume_ret(ulwp_t *oldlwp)
2962 {
2963 	/* never called */
2964 }
2965 
2966 /* PROBE_SUPPORT end */
2967