xref: /titanic_52/usr/src/lib/libc/port/threads/thr.c (revision 41efec2219526a9b3ecce26f97aba761ef1e1d0d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <procfs.h>
32 #include <sys/uio.h>
33 #include <ctype.h>
34 
35 #undef errno
36 extern int errno;
37 
38 /*
39  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
40  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
41  * system used it illegally (it is a consolidation private symbol).
42  * To accommodate this and possibly other abusers of the symbol,
43  * we make it always equal to 1 now that libthread has been folded
44  * into libc.  The new __libc_threaded symbol is used to indicate
45  * the new meaning, "more than one thread exists".
46  */
47 int __threaded = 1;		/* always equal to 1 */
48 int __libc_threaded = 0;	/* zero until first thr_create() */
49 
50 /*
51  * thr_concurrency and pthread_concurrency are not used by the library.
52  * They exist solely to hold and return the values set by calls to
53  * thr_setconcurrency() and pthread_setconcurrency().
54  * Because thr_concurrency is affected by the THR_NEW_LWP flag
55  * to thr_create(), thr_concurrency is protected by link_lock.
56  */
57 static	int	thr_concurrency = 1;
58 static	int	pthread_concurrency;
59 
60 #define	HASHTBLSZ	1024	/* must be a power of two */
61 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
62 
63 /* initial allocation, just enough for one lwp */
64 #pragma align 64(init_hash_table)
65 thr_hash_table_t init_hash_table[1] = {
66 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
67 };
68 
69 extern const Lc_interface rtld_funcs[];
70 
71 /*
72  * The weak version is known to libc_db and mdb.
73  */
74 #pragma weak _uberdata = __uberdata
75 uberdata_t __uberdata = {
76 	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
77 	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
78 	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
79 	{ 0, },				/* tdb_hash_lock_stats */
80 	{ { 0 }, },			/* siguaction[NSIG] */
81 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
82 	{ DEFAULTMUTEX, NULL, 0 },
83 	{ DEFAULTMUTEX, NULL, 0 },
84 	{ DEFAULTMUTEX, NULL, 0 },
85 	{ DEFAULTMUTEX, NULL, 0 },
86 	{ DEFAULTMUTEX, NULL, 0 },
87 	{ DEFAULTMUTEX, NULL, 0 },
88 	{ DEFAULTMUTEX, NULL, 0 },
89 	{ DEFAULTMUTEX, NULL, 0 },
90 	{ DEFAULTMUTEX, NULL, 0 }},
91 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
92 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
93 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
94 	0,			/* primary_map */
95 	0,			/* bucket_init */
96 	0,			/* pad[0] */
97 	0,			/* pad[1] */
98 	{ 0 },			/* uberflags */
99 	NULL,			/* queue_head */
100 	init_hash_table,	/* thr_hash_table */
101 	1,			/* hash_size: size of the hash table */
102 	0,			/* hash_mask: hash_size - 1 */
103 	NULL,			/* ulwp_one */
104 	NULL,			/* all_lwps */
105 	NULL,			/* all_zombies */
106 	0,			/* nthreads */
107 	0,			/* nzombies */
108 	0,			/* ndaemons */
109 	0,			/* pid */
110 	sigacthandler,		/* sigacthandler */
111 	NULL,			/* lwp_stacks */
112 	NULL,			/* lwp_laststack */
113 	0,			/* nfreestack */
114 	10,			/* thread_stack_cache */
115 	NULL,			/* ulwp_freelist */
116 	NULL,			/* ulwp_lastfree */
117 	NULL,			/* ulwp_replace_free */
118 	NULL,			/* ulwp_replace_last */
119 	NULL,			/* atforklist */
120 	NULL,			/* __tdb_bootstrap */
121 	{			/* tdb */
122 		NULL,		/* tdb_sync_addr_hash */
123 		0,		/* tdb_register_count */
124 		0,		/* tdb_hash_alloc_failed */
125 		NULL,		/* tdb_sync_addr_free */
126 		NULL,		/* tdb_sync_addr_last */
127 		0,		/* tdb_sync_alloc */
128 		{ 0, 0 },	/* tdb_ev_global_mask */
129 		tdb_events,	/* tdb_events array */
130 	},
131 };
132 
133 /*
134  * The weak version is known to libc_db and mdb.
135  */
136 #pragma weak _tdb_bootstrap = __tdb_bootstrap
137 uberdata_t **__tdb_bootstrap = NULL;
138 
139 int	thread_queue_fifo = 4;
140 int	thread_queue_dump = 0;
141 int	thread_cond_wait_defer = 0;
142 int	thread_error_detection = 0;
143 int	thread_async_safe = 0;
144 int	thread_stack_cache = 10;
145 
146 int	thread_door_noreserve = 0;
147 
148 static	ulwp_t	*ulwp_alloc(void);
149 static	void	ulwp_free(ulwp_t *);
150 
151 /*
152  * Insert the lwp into the hash table.
153  */
154 void
155 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
156 {
157 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
158 	udp->thr_hash_table[ix].hash_bucket = ulwp;
159 	ulwp->ul_ix = ix;
160 }
161 
162 void
163 hash_in(ulwp_t *ulwp, uberdata_t *udp)
164 {
165 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
166 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
167 
168 	lmutex_lock(mp);
169 	hash_in_unlocked(ulwp, ix, udp);
170 	lmutex_unlock(mp);
171 }
172 
173 /*
174  * Delete the lwp from the hash table.
175  */
176 void
177 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
178 {
179 	ulwp_t **ulwpp;
180 
181 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
182 	    ulwp != *ulwpp;
183 	    ulwpp = &(*ulwpp)->ul_hash)
184 		;
185 	*ulwpp = ulwp->ul_hash;
186 	ulwp->ul_hash = NULL;
187 	ulwp->ul_ix = -1;
188 }
189 
190 void
191 hash_out(ulwp_t *ulwp, uberdata_t *udp)
192 {
193 	int ix;
194 
195 	if ((ix = ulwp->ul_ix) >= 0) {
196 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
197 
198 		lmutex_lock(mp);
199 		hash_out_unlocked(ulwp, ix, udp);
200 		lmutex_unlock(mp);
201 	}
202 }
203 
204 static void
205 ulwp_clean(ulwp_t *ulwp)
206 {
207 	ulwp->ul_self = NULL;
208 	ulwp->ul_rval = NULL;
209 	ulwp->ul_lwpid = 0;
210 	ulwp->ul_pri = 0;
211 	ulwp->ul_mappedpri = 0;
212 	ulwp->ul_policy = 0;
213 	ulwp->ul_pri_mapped = 0;
214 	ulwp->ul_mutator = 0;
215 	ulwp->ul_pleasestop = 0;
216 	ulwp->ul_stop = 0;
217 	ulwp->ul_dead = 0;
218 	ulwp->ul_unwind = 0;
219 	ulwp->ul_detached = 0;
220 	ulwp->ul_stopping = 0;
221 	ulwp->ul_sp = 0;
222 	ulwp->ul_critical = 0;
223 	ulwp->ul_cancelable = 0;
224 	ulwp->ul_preempt = 0;
225 	ulwp->ul_sigsuspend = 0;
226 	ulwp->ul_cancel_pending = 0;
227 	ulwp->ul_cancel_disabled = 0;
228 	ulwp->ul_cancel_async = 0;
229 	ulwp->ul_save_async = 0;
230 	ulwp->ul_cursig = 0;
231 	ulwp->ul_created = 0;
232 	ulwp->ul_replace = 0;
233 	ulwp->ul_schedctl_called = NULL;
234 	ulwp->ul_errno = 0;
235 	ulwp->ul_errnop = NULL;
236 	ulwp->ul_clnup_hdr = NULL;
237 	ulwp->ul_schedctl = NULL;
238 	ulwp->ul_bindflags = 0;
239 	(void) _private_memset(&ulwp->ul_td_evbuf, 0,
240 		sizeof (ulwp->ul_td_evbuf));
241 	ulwp->ul_td_events_enable = 0;
242 	ulwp->ul_qtype = 0;
243 	ulwp->ul_usropts = 0;
244 	ulwp->ul_startpc = NULL;
245 	ulwp->ul_startarg = NULL;
246 	ulwp->ul_wchan = NULL;
247 	ulwp->ul_link = NULL;
248 	ulwp->ul_sleepq = NULL;
249 	ulwp->ul_mxchain = NULL;
250 	ulwp->ul_epri = 0;
251 	ulwp->ul_emappedpri = 0;
252 	/* PROBE_SUPPORT begin */
253 	ulwp->ul_tpdp = NULL;
254 	/* PROBE_SUPPORT end */
255 	ulwp->ul_siglink = NULL;
256 	(void) _private_memset(ulwp->ul_ftsd, 0,
257 		sizeof (void *) * TSD_NFAST);
258 	ulwp->ul_stsd = NULL;
259 	(void) _private_memset(&ulwp->ul_spinlock, 0,
260 		sizeof (ulwp->ul_spinlock));
261 	ulwp->ul_spin_lock_spin = 0;
262 	ulwp->ul_spin_lock_spin2 = 0;
263 	ulwp->ul_spin_lock_sleep = 0;
264 	ulwp->ul_spin_lock_wakeup = 0;
265 	ulwp->ul_ex_unwind = NULL;
266 }
267 
268 static int stackprot;
269 
270 /*
271  * Answer the question, "Is the lwp in question really dead?"
272  * We must inquire of the operating system to be really sure
273  * because the lwp may have called lwp_exit() but it has not
274  * yet completed the exit.
275  */
276 static int
277 dead_and_buried(ulwp_t *ulwp)
278 {
279 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
280 		return (1);
281 	if (ulwp->ul_dead && ulwp->ul_detached &&
282 	    __lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
283 		ulwp->ul_lwpid = (lwpid_t)(-1);
284 		return (1);
285 	}
286 	return (0);
287 }
288 
289 /*
290  * Attempt to keep the stack cache within the specified cache limit.
291  */
292 static void
293 trim_stack_cache(int cache_limit)
294 {
295 	ulwp_t *self = curthread;
296 	uberdata_t *udp = self->ul_uberdata;
297 	ulwp_t *prev = NULL;
298 	ulwp_t **ulwpp = &udp->lwp_stacks;
299 	ulwp_t *ulwp;
300 
301 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
302 
303 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
304 		if (dead_and_buried(ulwp)) {
305 			*ulwpp = ulwp->ul_next;
306 			if (ulwp == udp->lwp_laststack)
307 				udp->lwp_laststack = prev;
308 			hash_out(ulwp, udp);
309 			udp->nfreestack--;
310 			(void) _private_munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
311 			/*
312 			 * Now put the free ulwp on the ulwp freelist.
313 			 */
314 			ulwp->ul_mapsiz = 0;
315 			ulwp->ul_next = NULL;
316 			if (udp->ulwp_freelist == NULL)
317 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
318 			else {
319 				udp->ulwp_lastfree->ul_next = ulwp;
320 				udp->ulwp_lastfree = ulwp;
321 			}
322 		} else {
323 			prev = ulwp;
324 			ulwpp = &ulwp->ul_next;
325 		}
326 	}
327 }
328 
329 /*
330  * Find an unused stack of the requested size
331  * or create a new stack of the requested size.
332  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
333  * thr_exit() stores 1 in the ul_dead member.
334  * thr_join() stores -1 in the ul_lwpid member.
335  */
336 ulwp_t *
337 find_stack(size_t stksize, size_t guardsize)
338 {
339 	static size_t pagesize = 0;
340 
341 	uberdata_t *udp = curthread->ul_uberdata;
342 	size_t mapsize;
343 	ulwp_t *prev;
344 	ulwp_t *ulwp;
345 	ulwp_t **ulwpp;
346 	void *stk;
347 
348 	/*
349 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
350 	 * unless overridden by the system's configuration.
351 	 */
352 	if (stackprot == 0) {	/* do this once */
353 		long lprot = _sysconf(_SC_STACK_PROT);
354 		if (lprot <= 0)
355 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
356 		stackprot = (int)lprot;
357 	}
358 	if (pagesize == 0)	/* do this once */
359 		pagesize = _sysconf(_SC_PAGESIZE);
360 
361 	/*
362 	 * One megabyte stacks by default, but subtract off
363 	 * two pages for the system-created red zones.
364 	 * Round up a non-zero stack size to a pagesize multiple.
365 	 */
366 	if (stksize == 0)
367 		stksize = DEFAULTSTACK - 2 * pagesize;
368 	else
369 		stksize = ((stksize + pagesize - 1) & -pagesize);
370 
371 	/*
372 	 * Round up the mapping size to a multiple of pagesize.
373 	 * Note: mmap() provides at least one page of red zone
374 	 * so we deduct that from the value of guardsize.
375 	 */
376 	if (guardsize != 0)
377 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
378 	mapsize = stksize + guardsize;
379 
380 	lmutex_lock(&udp->link_lock);
381 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
382 	    (ulwp = *ulwpp) != NULL;
383 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
384 		if (ulwp->ul_mapsiz == mapsize &&
385 		    ulwp->ul_guardsize == guardsize &&
386 		    dead_and_buried(ulwp)) {
387 			/*
388 			 * The previous lwp is gone; reuse the stack.
389 			 * Remove the ulwp from the stack list.
390 			 */
391 			*ulwpp = ulwp->ul_next;
392 			ulwp->ul_next = NULL;
393 			if (ulwp == udp->lwp_laststack)
394 				udp->lwp_laststack = prev;
395 			hash_out(ulwp, udp);
396 			udp->nfreestack--;
397 			lmutex_unlock(&udp->link_lock);
398 			ulwp_clean(ulwp);
399 			return (ulwp);
400 		}
401 	}
402 
403 	/*
404 	 * None of the cached stacks matched our mapping size.
405 	 * Reduce the stack cache to get rid of possibly
406 	 * very old stacks that will never be reused.
407 	 */
408 	if (udp->nfreestack > udp->thread_stack_cache)
409 		trim_stack_cache(udp->thread_stack_cache);
410 	else if (udp->nfreestack > 0)
411 		trim_stack_cache(udp->nfreestack - 1);
412 	lmutex_unlock(&udp->link_lock);
413 
414 	/*
415 	 * Create a new stack.
416 	 */
417 	if ((stk = _private_mmap(NULL, mapsize, stackprot,
418 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
419 		/*
420 		 * We have allocated our stack.  Now allocate the ulwp.
421 		 */
422 		ulwp = ulwp_alloc();
423 		if (ulwp == NULL)
424 			(void) _private_munmap(stk, mapsize);
425 		else {
426 			ulwp->ul_stk = stk;
427 			ulwp->ul_mapsiz = mapsize;
428 			ulwp->ul_guardsize = guardsize;
429 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
430 			ulwp->ul_stksiz = stksize;
431 			ulwp->ul_ix = -1;
432 			if (guardsize)	/* protect the extra red zone */
433 				(void) _private_mprotect(stk,
434 					guardsize, PROT_NONE);
435 		}
436 	}
437 	return (ulwp);
438 }
439 
440 /*
441  * Get a ulwp_t structure from the free list or allocate a new one.
442  * Such ulwp_t's do not have a stack allocated by the library.
443  */
444 static ulwp_t *
445 ulwp_alloc(void)
446 {
447 	ulwp_t *self = curthread;
448 	uberdata_t *udp = self->ul_uberdata;
449 	size_t tls_size;
450 	ulwp_t *prev;
451 	ulwp_t *ulwp;
452 	ulwp_t **ulwpp;
453 	caddr_t data;
454 
455 	lmutex_lock(&udp->link_lock);
456 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
457 	    (ulwp = *ulwpp) != NULL;
458 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
459 		if (dead_and_buried(ulwp)) {
460 			*ulwpp = ulwp->ul_next;
461 			ulwp->ul_next = NULL;
462 			if (ulwp == udp->ulwp_lastfree)
463 				udp->ulwp_lastfree = prev;
464 			hash_out(ulwp, udp);
465 			lmutex_unlock(&udp->link_lock);
466 			ulwp_clean(ulwp);
467 			return (ulwp);
468 		}
469 	}
470 	lmutex_unlock(&udp->link_lock);
471 
472 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
473 	data = lmalloc(sizeof (*ulwp) + tls_size);
474 	if (data != NULL) {
475 		/* LINTED pointer cast may result in improper alignment */
476 		ulwp = (ulwp_t *)(data + tls_size);
477 	}
478 	return (ulwp);
479 }
480 
481 /*
482  * Free a ulwp structure.
483  * If there is an associated stack, put it on the stack list and
484  * munmap() previously freed stacks up to the residual cache limit.
485  * Else put it on the ulwp free list and never call lfree() on it.
486  */
487 static void
488 ulwp_free(ulwp_t *ulwp)
489 {
490 	uberdata_t *udp = curthread->ul_uberdata;
491 
492 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
493 	ulwp->ul_next = NULL;
494 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
495 		/*EMPTY*/;
496 	else if (ulwp->ul_mapsiz != 0) {
497 		if (udp->lwp_stacks == NULL)
498 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
499 		else {
500 			udp->lwp_laststack->ul_next = ulwp;
501 			udp->lwp_laststack = ulwp;
502 		}
503 		if (++udp->nfreestack > udp->thread_stack_cache)
504 			trim_stack_cache(udp->thread_stack_cache);
505 	} else {
506 		if (udp->ulwp_freelist == NULL)
507 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
508 		else {
509 			udp->ulwp_lastfree->ul_next = ulwp;
510 			udp->ulwp_lastfree = ulwp;
511 		}
512 	}
513 }
514 
515 /*
516  * Find a named lwp and return a pointer to its hash list location.
517  * On success, returns with the hash lock held.
518  */
519 ulwp_t **
520 find_lwpp(thread_t tid)
521 {
522 	uberdata_t *udp = curthread->ul_uberdata;
523 	int ix = TIDHASH(tid, udp);
524 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
525 	ulwp_t *ulwp;
526 	ulwp_t **ulwpp;
527 
528 	if (tid == 0)
529 		return (NULL);
530 
531 	lmutex_lock(mp);
532 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
533 	    (ulwp = *ulwpp) != NULL;
534 	    ulwpp = &ulwp->ul_hash) {
535 		if (ulwp->ul_lwpid == tid)
536 			return (ulwpp);
537 	}
538 	lmutex_unlock(mp);
539 	return (NULL);
540 }
541 
542 /*
543  * Wake up all lwps waiting on this lwp for some reason.
544  */
545 void
546 ulwp_broadcast(ulwp_t *ulwp)
547 {
548 	ulwp_t *self = curthread;
549 	uberdata_t *udp = self->ul_uberdata;
550 
551 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
552 	(void) cond_broadcast_internal(ulwp_condvar(ulwp, udp));
553 }
554 
555 /*
556  * Find a named lwp and return a pointer to it.
557  * Returns with the hash lock held.
558  */
559 ulwp_t *
560 find_lwp(thread_t tid)
561 {
562 	ulwp_t *self = curthread;
563 	uberdata_t *udp = self->ul_uberdata;
564 	ulwp_t *ulwp = NULL;
565 	ulwp_t **ulwpp;
566 
567 	if (self->ul_lwpid == tid) {
568 		ulwp = self;
569 		ulwp_lock(ulwp, udp);
570 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
571 		ulwp = *ulwpp;
572 	}
573 
574 	if (ulwp && ulwp->ul_dead) {
575 		ulwp_unlock(ulwp, udp);
576 		ulwp = NULL;
577 	}
578 
579 	return (ulwp);
580 }
581 
582 int
583 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
584 	long flags, thread_t *new_thread, pri_t priority, int policy,
585 	size_t guardsize)
586 {
587 	ulwp_t *self = curthread;
588 	uberdata_t *udp = self->ul_uberdata;
589 	ucontext_t uc;
590 	uint_t lwp_flags;
591 	thread_t tid;
592 	int error = 0;
593 	ulwp_t *ulwp;
594 
595 	/*
596 	 * Enforce the restriction of not creating any threads
597 	 * until the primary link map has been initialized.
598 	 * Also, disallow thread creation to a child of vfork().
599 	 */
600 	if (!self->ul_primarymap || self->ul_vfork)
601 		return (ENOTSUP);
602 
603 	if (udp->hash_size == 1)
604 		finish_init();
605 
606 	if (((stk || stksize) && stksize < MINSTACK) ||
607 	    priority < THREAD_MIN_PRIORITY || priority > THREAD_MAX_PRIORITY)
608 		return (EINVAL);
609 
610 	if (stk == NULL) {
611 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
612 			return (ENOMEM);
613 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
614 	} else {
615 		/* initialize the private stack */
616 		if ((ulwp = ulwp_alloc()) == NULL)
617 			return (ENOMEM);
618 		ulwp->ul_stk = stk;
619 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
620 		ulwp->ul_stksiz = stksize;
621 		ulwp->ul_ix = -1;
622 	}
623 	ulwp->ul_errnop = &ulwp->ul_errno;
624 
625 	lwp_flags = LWP_SUSPENDED;
626 	if (flags & (THR_DETACHED|THR_DAEMON)) {
627 		flags |= THR_DETACHED;
628 		lwp_flags |= LWP_DETACHED;
629 	}
630 	if (flags & THR_DAEMON)
631 		lwp_flags |= LWP_DAEMON;
632 
633 	/* creating a thread: enforce mt-correctness in _mutex_lock() */
634 	self->ul_async_safe = 1;
635 
636 	/* per-thread copies of global variables, for speed */
637 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
638 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
639 	ulwp->ul_error_detection = self->ul_error_detection;
640 	ulwp->ul_async_safe = self->ul_async_safe;
641 	ulwp->ul_max_spinners = self->ul_max_spinners;
642 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
643 	ulwp->ul_release_spin = self->ul_release_spin;
644 	ulwp->ul_queue_spin = self->ul_queue_spin;
645 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
646 
647 	ulwp->ul_primarymap = self->ul_primarymap;
648 	ulwp->ul_self = ulwp;
649 	ulwp->ul_uberdata = udp;
650 
651 	/* debugger support */
652 	ulwp->ul_usropts = flags;
653 
654 #ifdef __sparc
655 	/*
656 	 * We cache several instructions in the thread structure for use
657 	 * by the fasttrap DTrace provider. When changing this, read the
658 	 * comment in fasttrap.h for the all the other places that must
659 	 * be changed.
660 	 */
661 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
662 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
663 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
664 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
665 #endif
666 
667 	ulwp->ul_startpc = func;
668 	ulwp->ul_startarg = arg;
669 	_fpinherit(ulwp);
670 	/*
671 	 * Defer signals on the new thread until its TLS constructors
672 	 * have been called.  _thr_setup() will call sigon() after
673 	 * it has called tls_setup().
674 	 */
675 	ulwp->ul_sigdefer = 1;
676 
677 	if (setup_context(&uc, _thr_setup, ulwp,
678 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize) != 0)
679 		error = EAGAIN;
680 
681 	/*
682 	 * Call enter_critical() to avoid being suspended until we
683 	 * have linked the new thread into the proper lists.
684 	 * This is necessary because forkall() and fork1() must
685 	 * suspend all threads and they must see a complete list.
686 	 */
687 	enter_critical(self);
688 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
689 	if (error != 0 ||
690 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
691 		exit_critical(self);
692 		ulwp->ul_lwpid = (lwpid_t)(-1);
693 		ulwp->ul_dead = 1;
694 		ulwp->ul_detached = 1;
695 		lmutex_lock(&udp->link_lock);
696 		ulwp_free(ulwp);
697 		lmutex_unlock(&udp->link_lock);
698 		return (error);
699 	}
700 	self->ul_nocancel = 0;	/* cancellation is now possible */
701 	ulwp->ul_nocancel = 0;
702 	udp->uberflags.uf_mt = 1;
703 	if (new_thread)
704 		*new_thread = tid;
705 	if (flags & THR_DETACHED)
706 		ulwp->ul_detached = 1;
707 	ulwp->ul_lwpid = tid;
708 	ulwp->ul_stop = TSTP_REGULAR;
709 	if (flags & THR_SUSPENDED)
710 		ulwp->ul_created = 1;
711 	ulwp->ul_policy = policy;
712 	ulwp->ul_pri = priority;
713 
714 	lmutex_lock(&udp->link_lock);
715 	ulwp->ul_forw = udp->all_lwps;
716 	ulwp->ul_back = udp->all_lwps->ul_back;
717 	ulwp->ul_back->ul_forw = ulwp;
718 	ulwp->ul_forw->ul_back = ulwp;
719 	hash_in(ulwp, udp);
720 	udp->nthreads++;
721 	if (flags & THR_DAEMON)
722 		udp->ndaemons++;
723 	if (flags & THR_NEW_LWP)
724 		thr_concurrency++;
725 	__libc_threaded = 1;		/* inform stdio */
726 	lmutex_unlock(&udp->link_lock);
727 
728 	if (__td_event_report(self, TD_CREATE, udp)) {
729 		self->ul_td_evbuf.eventnum = TD_CREATE;
730 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
731 		tdb_event(TD_CREATE, udp);
732 	}
733 
734 	exit_critical(self);
735 
736 	if (!(flags & THR_SUSPENDED))
737 		(void) _thrp_continue(tid, TSTP_REGULAR);
738 
739 	return (0);
740 }
741 
742 #pragma weak thr_create = _thr_create
743 int
744 _thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
745 	long flags, thread_t *new_thread)
746 {
747 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread,
748 		curthread->ul_pri, curthread->ul_policy, 0));
749 }
750 
751 /*
752  * A special cancellation cleanup hook for DCE.
753  * cleanuphndlr, when it is not NULL, will contain a callback
754  * function to be called before a thread is terminated in
755  * _thr_exit() as a result of being cancelled.
756  */
757 static void (*cleanuphndlr)(void) = NULL;
758 
759 /*
760  * _pthread_setcleanupinit: sets the cleanup hook.
761  */
762 int
763 _pthread_setcleanupinit(void (*func)(void))
764 {
765 	cleanuphndlr = func;
766 	return (0);
767 }
768 
769 void
770 _thrp_exit()
771 {
772 	ulwp_t *self = curthread;
773 	uberdata_t *udp = self->ul_uberdata;
774 	ulwp_t *replace = NULL;
775 
776 	if (__td_event_report(self, TD_DEATH, udp)) {
777 		self->ul_td_evbuf.eventnum = TD_DEATH;
778 		tdb_event(TD_DEATH, udp);
779 	}
780 
781 	ASSERT(self->ul_sigdefer != 0);
782 
783 	lmutex_lock(&udp->link_lock);
784 	udp->nthreads--;
785 	if (self->ul_usropts & THR_NEW_LWP)
786 		thr_concurrency--;
787 	if (self->ul_usropts & THR_DAEMON)
788 		udp->ndaemons--;
789 	else if (udp->nthreads == udp->ndaemons) {
790 		/*
791 		 * We are the last non-daemon thread exiting.
792 		 * Exit the process.  We retain our TSD and TLS so
793 		 * that atexit() application functions can use them.
794 		 */
795 		lmutex_unlock(&udp->link_lock);
796 		exit(0);
797 		thr_panic("_thrp_exit(): exit(0) returned");
798 	}
799 	lmutex_unlock(&udp->link_lock);
800 
801 	tsd_exit();	/* deallocate thread-specific data */
802 	tls_exit();	/* deallocate thread-local storage */
803 
804 	/* block all signals to finish exiting */
805 	block_all_signals(self);
806 	/* also prevent ourself from being suspended */
807 	enter_critical(self);
808 	rwl_free(self);
809 	lmutex_lock(&udp->link_lock);
810 	ulwp_free(self);
811 	(void) ulwp_lock(self, udp);
812 
813 	if (self->ul_mapsiz && !self->ul_detached) {
814 		/*
815 		 * We want to free the stack for reuse but must keep
816 		 * the ulwp_t struct for the benefit of thr_join().
817 		 * For this purpose we allocate a replacement ulwp_t.
818 		 */
819 		if ((replace = udp->ulwp_replace_free) == NULL)
820 			replace = lmalloc(REPLACEMENT_SIZE);
821 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
822 			udp->ulwp_replace_last = NULL;
823 	}
824 
825 	if (udp->all_lwps == self)
826 		udp->all_lwps = self->ul_forw;
827 	if (udp->all_lwps == self)
828 		udp->all_lwps = NULL;
829 	else {
830 		self->ul_forw->ul_back = self->ul_back;
831 		self->ul_back->ul_forw = self->ul_forw;
832 	}
833 	self->ul_forw = self->ul_back = NULL;
834 	/* collect queue lock statistics before marking ourself dead */
835 	record_spin_locks(self);
836 	self->ul_dead = 1;
837 	self->ul_pleasestop = 0;
838 	if (replace != NULL) {
839 		int ix = self->ul_ix;		/* the hash index */
840 		(void) _private_memcpy(replace, self, REPLACEMENT_SIZE);
841 		replace->ul_self = replace;
842 		replace->ul_next = NULL;	/* clone not on stack list */
843 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
844 		replace->ul_replace = 1;	/* requires clone to be freed */
845 		hash_out_unlocked(self, ix, udp);
846 		hash_in_unlocked(replace, ix, udp);
847 		ASSERT(!(self->ul_detached));
848 		self->ul_detached = 1;		/* this frees the stack */
849 		self->ul_schedctl = NULL;
850 		self->ul_schedctl_called = &udp->uberflags;
851 		set_curthread(self = replace);
852 		/*
853 		 * Having just changed the address of curthread, we
854 		 * must reset the ownership of the locks we hold so
855 		 * that assertions will not fire when we release them.
856 		 */
857 		udp->link_lock.mutex_owner = (uintptr_t)self;
858 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
859 		/*
860 		 * NOTE:
861 		 * On i386, %gs still references the original, not the
862 		 * replacement, ulwp structure.  Fetching the replacement
863 		 * curthread pointer via %gs:0 works correctly since the
864 		 * original ulwp structure will not be reallocated until
865 		 * this lwp has completed its lwp_exit() system call (see
866 		 * dead_and_buried()), but from here on out, we must make
867 		 * no references to %gs:<offset> other than %gs:0.
868 		 */
869 	}
870 	/*
871 	 * Put non-detached terminated threads in the all_zombies list.
872 	 */
873 	if (!self->ul_detached) {
874 		udp->nzombies++;
875 		if (udp->all_zombies == NULL) {
876 			ASSERT(udp->nzombies == 1);
877 			udp->all_zombies = self->ul_forw = self->ul_back = self;
878 		} else {
879 			self->ul_forw = udp->all_zombies;
880 			self->ul_back = udp->all_zombies->ul_back;
881 			self->ul_back->ul_forw = self;
882 			self->ul_forw->ul_back = self;
883 		}
884 	}
885 	/*
886 	 * Notify everyone waiting for this thread.
887 	 */
888 	ulwp_broadcast(self);
889 	(void) ulwp_unlock(self, udp);
890 	/*
891 	 * Prevent any more references to the schedctl data.
892 	 * We are exiting and continue_fork() may not find us.
893 	 * Do this just before dropping link_lock, since fork
894 	 * serializes on link_lock.
895 	 */
896 	self->ul_schedctl = NULL;
897 	self->ul_schedctl_called = &udp->uberflags;
898 	lmutex_unlock(&udp->link_lock);
899 
900 	ASSERT(self->ul_critical == 1);
901 	ASSERT(self->ul_preempt == 0);
902 	_lwp_terminate();	/* never returns */
903 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
904 }
905 
906 void
907 collect_queue_statistics()
908 {
909 	uberdata_t *udp = curthread->ul_uberdata;
910 	ulwp_t *ulwp;
911 
912 	if (thread_queue_dump) {
913 		lmutex_lock(&udp->link_lock);
914 		if ((ulwp = udp->all_lwps) != NULL) {
915 			do {
916 				record_spin_locks(ulwp);
917 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
918 		}
919 		lmutex_unlock(&udp->link_lock);
920 	}
921 }
922 
923 void
924 _thr_exit_common(void *status, int unwind)
925 {
926 	ulwp_t *self = curthread;
927 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
928 
929 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
930 
931 	/*
932 	 * Disable cancellation and call the special DCE cancellation
933 	 * cleanup hook if it is enabled.  Do nothing else before calling
934 	 * the DCE cancellation cleanup hook; it may call longjmp() and
935 	 * never return here.
936 	 */
937 	self->ul_cancel_disabled = 1;
938 	self->ul_cancel_async = 0;
939 	self->ul_save_async = 0;
940 	self->ul_cancelable = 0;
941 	self->ul_cancel_pending = 0;
942 	if (cancelled && cleanuphndlr != NULL)
943 		(*cleanuphndlr)();
944 
945 	/*
946 	 * Block application signals while we are exiting.
947 	 * We call out to C++, TSD, and TLS destructors while exiting
948 	 * and these are application-defined, so we cannot be assured
949 	 * that they won't reset the signal mask.  We use sigoff() to
950 	 * defer any signals that may be received as a result of this
951 	 * bad behavior.  Such signals will be lost to the process
952 	 * when the thread finishes exiting.
953 	 */
954 	(void) _thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
955 	sigoff(self);
956 
957 	self->ul_rval = status;
958 
959 	/*
960 	 * If thr_exit is being called from the places where
961 	 * C++ destructors are to be called such as cancellation
962 	 * points, then set this flag. It is checked in _t_cancel()
963 	 * to decide whether _ex_unwind() is to be called or not.
964 	 */
965 	if (unwind)
966 		self->ul_unwind = 1;
967 
968 	/*
969 	 * _thrp_unwind() will eventually call _thrp_exit().
970 	 * It never returns.
971 	 */
972 	_thrp_unwind(NULL);
973 	thr_panic("_thr_exit_common(): _thrp_unwind() returned");
974 }
975 
976 /*
977  * Called when a thread returns from its start function.
978  * We are at the top of the stack; no unwinding is necessary.
979  */
980 void
981 _thr_terminate(void *status)
982 {
983 	_thr_exit_common(status, 0);
984 }
985 
986 #pragma weak thr_exit = _thr_exit
987 #pragma weak pthread_exit = _thr_exit
988 #pragma weak _pthread_exit = _thr_exit
989 void
990 _thr_exit(void *status)
991 {
992 	_thr_exit_common(status, 1);
993 }
994 
995 int
996 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
997 {
998 	uberdata_t *udp = curthread->ul_uberdata;
999 	mutex_t *mp;
1000 	void *rval;
1001 	thread_t found;
1002 	ulwp_t *ulwp;
1003 	ulwp_t **ulwpp;
1004 	int replace;
1005 	int error;
1006 
1007 	if (do_cancel)
1008 		error = lwp_wait(tid, &found);
1009 	else {
1010 		while ((error = __lwp_wait(tid, &found)) == EINTR)
1011 			;
1012 	}
1013 	if (error)
1014 		return (error);
1015 
1016 	/*
1017 	 * We must hold link_lock to avoid a race condition with find_stack().
1018 	 */
1019 	lmutex_lock(&udp->link_lock);
1020 	if ((ulwpp = find_lwpp(found)) == NULL) {
1021 		/*
1022 		 * lwp_wait() found an lwp that the library doesn't know
1023 		 * about.  It must have been created with _lwp_create().
1024 		 * Just return its lwpid; we can't know its status.
1025 		 */
1026 		lmutex_unlock(&udp->link_lock);
1027 		rval = NULL;
1028 	} else {
1029 		/*
1030 		 * Remove ulwp from the hash table.
1031 		 */
1032 		ulwp = *ulwpp;
1033 		*ulwpp = ulwp->ul_hash;
1034 		ulwp->ul_hash = NULL;
1035 		/*
1036 		 * Remove ulwp from all_zombies list.
1037 		 */
1038 		ASSERT(udp->nzombies >= 1);
1039 		if (udp->all_zombies == ulwp)
1040 			udp->all_zombies = ulwp->ul_forw;
1041 		if (udp->all_zombies == ulwp)
1042 			udp->all_zombies = NULL;
1043 		else {
1044 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1045 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1046 		}
1047 		ulwp->ul_forw = ulwp->ul_back = NULL;
1048 		udp->nzombies--;
1049 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1050 			!(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1051 		/*
1052 		 * We can't call ulwp_unlock(ulwp) after we set
1053 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1054 		 * ulwp's hash table mutex now in order to unlock it below.
1055 		 */
1056 		mp = ulwp_mutex(ulwp, udp);
1057 		ulwp->ul_lwpid = (lwpid_t)(-1);
1058 		ulwp->ul_ix = -1;
1059 		rval = ulwp->ul_rval;
1060 		replace = ulwp->ul_replace;
1061 		lmutex_unlock(mp);
1062 		if (replace) {
1063 			ulwp->ul_next = NULL;
1064 			if (udp->ulwp_replace_free == NULL)
1065 				udp->ulwp_replace_free =
1066 					udp->ulwp_replace_last = ulwp;
1067 			else {
1068 				udp->ulwp_replace_last->ul_next = ulwp;
1069 				udp->ulwp_replace_last = ulwp;
1070 			}
1071 		}
1072 		lmutex_unlock(&udp->link_lock);
1073 	}
1074 
1075 	if (departed != NULL)
1076 		*departed = found;
1077 	if (status != NULL)
1078 		*status = rval;
1079 	return (0);
1080 }
1081 
1082 #pragma weak thr_join = _thr_join
1083 int
1084 _thr_join(thread_t tid, thread_t *departed, void **status)
1085 {
1086 	int error = _thrp_join(tid, departed, status, 1);
1087 	return ((error == EINVAL)? ESRCH : error);
1088 }
1089 
1090 /*
1091  * pthread_join() differs from Solaris thr_join():
1092  * It does not return the departed thread's id
1093  * and hence does not have a "departed" argument.
1094  * It returns EINVAL if tid refers to a detached thread.
1095  */
1096 #pragma weak pthread_join = _pthread_join
1097 int
1098 _pthread_join(pthread_t tid, void **status)
1099 {
1100 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1101 }
1102 
1103 #pragma weak pthread_detach = _thr_detach
1104 #pragma weak _pthread_detach = _thr_detach
1105 int
1106 _thr_detach(thread_t tid)
1107 {
1108 	uberdata_t *udp = curthread->ul_uberdata;
1109 	ulwp_t *ulwp;
1110 	ulwp_t **ulwpp;
1111 	int error = 0;
1112 
1113 	if ((ulwpp = find_lwpp(tid)) == NULL)
1114 		return (ESRCH);
1115 	ulwp = *ulwpp;
1116 
1117 	if (ulwp->ul_dead) {
1118 		ulwp_unlock(ulwp, udp);
1119 		error = _thrp_join(tid, NULL, NULL, 0);
1120 	} else {
1121 		error = __lwp_detach(tid);
1122 		ulwp->ul_detached = 1;
1123 		ulwp->ul_usropts |= THR_DETACHED;
1124 		ulwp_unlock(ulwp, udp);
1125 	}
1126 	return (error);
1127 }
1128 
1129 /*
1130  * Static local string compare function to avoid calling strncmp()
1131  * (and hence the dynamic linker) during library initialization.
1132  */
1133 static int
1134 sncmp(const char *s1, const char *s2, size_t n)
1135 {
1136 	n++;
1137 	while (--n != 0 && *s1 == *s2++)
1138 		if (*s1++ == '\0')
1139 			return (0);
1140 	return (n == 0 ? 0 : *(uchar_t *)s1 - *(uchar_t *)--s2);
1141 }
1142 
1143 static const char *
1144 ematch(const char *ev, const char *match)
1145 {
1146 	int c;
1147 
1148 	while ((c = *match++) != '\0') {
1149 		if (*ev++ != c)
1150 			return (NULL);
1151 	}
1152 	if (*ev++ != '=')
1153 		return (NULL);
1154 	return (ev);
1155 }
1156 
1157 static int
1158 envvar(const char *ev, const char *match, int limit)
1159 {
1160 	int val = -1;
1161 	const char *ename;
1162 
1163 	if ((ename = ematch(ev, match)) != NULL) {
1164 		int c;
1165 		for (val = 0; (c = *ename) != '\0'; ename++) {
1166 			if (!isdigit(c)) {
1167 				val = -1;
1168 				break;
1169 			}
1170 			val = val * 10 + (c - '0');
1171 			if (val > limit) {
1172 				val = limit;
1173 				break;
1174 			}
1175 		}
1176 	}
1177 	return (val);
1178 }
1179 
1180 static void
1181 etest(const char *ev)
1182 {
1183 	int value;
1184 
1185 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1186 		thread_queue_spin = value;
1187 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0) {
1188 		thread_adaptive_spin = value;
1189 		thread_release_spin = (value + 1) / 2;
1190 	}
1191 	if ((value = envvar(ev, "RELEASE_SPIN", 1000000)) >= 0)
1192 		thread_release_spin = value;
1193 	if ((value = envvar(ev, "MAX_SPINNERS", 100)) >= 0)
1194 		thread_max_spinners = value;
1195 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1196 		thread_queue_fifo = value;
1197 #if defined(THREAD_DEBUG)
1198 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1199 		thread_queue_verify = value;
1200 #endif
1201 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1202 		thread_queue_dump = value;
1203 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1204 		thread_stack_cache = value;
1205 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1206 		thread_cond_wait_defer = value;
1207 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1208 		thread_error_detection = value;
1209 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1210 		thread_async_safe = value;
1211 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1212 		thread_door_noreserve = value;
1213 }
1214 
1215 /*
1216  * Look for and evaluate environment variables of the form "_THREAD_*".
1217  * For compatibility with the past, we also look for environment
1218  * names of the form "LIBTHREAD_*".
1219  */
1220 static void
1221 set_thread_vars()
1222 {
1223 	extern const char **_environ;
1224 	const char **pev;
1225 	const char *ev;
1226 	char c;
1227 
1228 	if ((pev = _environ) == NULL)
1229 		return;
1230 	while ((ev = *pev++) != NULL) {
1231 		c = *ev;
1232 		if (c == '_' && sncmp(ev, "_THREAD_", 8) == 0)
1233 			etest(ev + 8);
1234 		if (c == 'L' && sncmp(ev, "LIBTHREAD_", 10) == 0)
1235 			etest(ev + 10);
1236 	}
1237 }
1238 
1239 /* PROBE_SUPPORT begin */
1240 #pragma weak __tnf_probe_notify
1241 extern void __tnf_probe_notify(void);
1242 /* PROBE_SUPPORT end */
1243 
1244 /* same as atexit() but private to the library */
1245 extern int _atexit(void (*)(void));
1246 
1247 /* same as _cleanup() but private to the library */
1248 extern void __cleanup(void);
1249 
1250 extern void atfork_init(void);
1251 
1252 #ifdef __amd64
1253 extern void __amd64id(void);
1254 #endif
1255 
1256 /*
1257  * libc_init() is called by ld.so.1 for library initialization.
1258  * We perform minimal initialization; enough to work with the main thread.
1259  */
1260 void
1261 libc_init(void)
1262 {
1263 	uberdata_t *udp = &__uberdata;
1264 	ulwp_t *oldself = __curthread();
1265 	ucontext_t uc;
1266 	ulwp_t *self;
1267 	struct rlimit rl;
1268 	caddr_t data;
1269 	size_t tls_size;
1270 	int setmask;
1271 
1272 	/*
1273 	 * For the initial stage of initialization, we must be careful
1274 	 * not to call any function that could possibly call _cerror().
1275 	 * For this purpose, we call only the raw system call wrappers.
1276 	 */
1277 
1278 #ifdef __amd64
1279 	/*
1280 	 * Gather information about cache layouts for optimized
1281 	 * AMD assembler strfoo() and memfoo() functions.
1282 	 */
1283 	__amd64id();
1284 #endif
1285 
1286 	/*
1287 	 * Every libc, regardless of which link map, must register __cleanup().
1288 	 */
1289 	(void) _atexit(__cleanup);
1290 
1291 	/*
1292 	 * We keep our uberdata on one of (a) the first alternate link map
1293 	 * or (b) the primary link map.  We switch to the primary link map
1294 	 * and stay there once we see it.  All intermediate link maps are
1295 	 * subject to being unloaded at any time.
1296 	 */
1297 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1298 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1299 		mutex_setup();
1300 		atfork_init();	/* every link map needs atfork() processing */
1301 		return;
1302 	}
1303 
1304 	/*
1305 	 * To establish the main stack information, we have to get our context.
1306 	 * This is also convenient to use for getting our signal mask.
1307 	 */
1308 	uc.uc_flags = UC_ALL;
1309 	(void) __getcontext_syscall(&uc);
1310 	ASSERT(uc.uc_link == NULL);
1311 
1312 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1313 	ASSERT(primary_link_map || tls_size == 0);
1314 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1315 	if (data == NULL)
1316 		thr_panic("cannot allocate thread structure for main thread");
1317 	/* LINTED pointer cast may result in improper alignment */
1318 	self = (ulwp_t *)(data + tls_size);
1319 	init_hash_table[0].hash_bucket = self;
1320 
1321 	self->ul_sigmask = uc.uc_sigmask;
1322 	delete_reserved_signals(&self->ul_sigmask);
1323 	/*
1324 	 * Are the old and new sets different?
1325 	 * (This can happen if we are currently blocking SIGCANCEL.)
1326 	 * If so, we must explicitly set our signal mask, below.
1327 	 */
1328 	setmask =
1329 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1330 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]));
1331 
1332 #ifdef __sparc
1333 	/*
1334 	 * We cache several instructions in the thread structure for use
1335 	 * by the fasttrap DTrace provider. When changing this, read the
1336 	 * comment in fasttrap.h for the all the other places that must
1337 	 * be changed.
1338 	 */
1339 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1340 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1341 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1342 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1343 #endif
1344 
1345 	self->ul_stktop =
1346 		(uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1347 	(void) _private_getrlimit(RLIMIT_STACK, &rl);
1348 	self->ul_stksiz = rl.rlim_cur;
1349 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1350 
1351 	self->ul_forw = self->ul_back = self;
1352 	self->ul_hash = NULL;
1353 	self->ul_ix = 0;
1354 	self->ul_lwpid = 1; /* __lwp_self() */
1355 	self->ul_main = 1;
1356 	self->ul_self = self;
1357 	self->ul_uberdata = udp;
1358 	if (oldself != NULL) {
1359 		int i;
1360 
1361 		ASSERT(primary_link_map);
1362 		ASSERT(oldself->ul_main == 1);
1363 		self->ul_stsd = oldself->ul_stsd;
1364 		for (i = 0; i < TSD_NFAST; i++)
1365 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1366 		self->ul_tls = oldself->ul_tls;
1367 		/*
1368 		 * Retrieve all pointers to uberdata allocated
1369 		 * while running on previous link maps.
1370 		 * We would like to do a structure assignment here, but
1371 		 * gcc turns structure assignments into calls to memcpy(),
1372 		 * a function exported from libc.  We can't call any such
1373 		 * external functions until we establish curthread, below,
1374 		 * so we just call our private version of memcpy().
1375 		 */
1376 		(void) _private_memcpy(udp,
1377 		    oldself->ul_uberdata, sizeof (*udp));
1378 		/*
1379 		 * These items point to global data on the primary link map.
1380 		 */
1381 		udp->thr_hash_table = init_hash_table;
1382 		udp->sigacthandler = sigacthandler;
1383 		udp->tdb.tdb_events = tdb_events;
1384 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1385 		ASSERT(udp->lwp_stacks == NULL);
1386 		ASSERT(udp->ulwp_freelist == NULL);
1387 		ASSERT(udp->ulwp_replace_free == NULL);
1388 		ASSERT(udp->hash_size == 1);
1389 	}
1390 	udp->all_lwps = self;
1391 	udp->ulwp_one = self;
1392 	udp->pid = _private_getpid();
1393 	udp->nthreads = 1;
1394 	/*
1395 	 * In every link map, tdb_bootstrap points to the same piece of
1396 	 * allocated memory.  When the primary link map is initialized,
1397 	 * the allocated memory is assigned a pointer to the one true
1398 	 * uberdata.  This allows libc_db to initialize itself regardless
1399 	 * of which instance of libc it finds in the address space.
1400 	 */
1401 	if (udp->tdb_bootstrap == NULL)
1402 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1403 	__tdb_bootstrap = udp->tdb_bootstrap;
1404 	if (primary_link_map) {
1405 		self->ul_primarymap = 1;
1406 		udp->primary_map = 1;
1407 		*udp->tdb_bootstrap = udp;
1408 	}
1409 	/*
1410 	 * Cancellation can't happen until:
1411 	 *	pthread_cancel() is called
1412 	 * or:
1413 	 *	another thread is created
1414 	 * For now, as a single-threaded process, set the flag that tells
1415 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1416 	 */
1417 	self->ul_nocancel = 1;
1418 
1419 #if defined(__amd64)
1420 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1421 #elif defined(__i386)
1422 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1423 #endif	/* __i386 || __amd64 */
1424 	set_curthread(self);		/* redundant on i386 */
1425 	/*
1426 	 * Now curthread is established and it is safe to call any
1427 	 * function in libc except one that uses thread-local storage.
1428 	 */
1429 	self->ul_errnop = &errno;
1430 	if (oldself != NULL) {
1431 		/* tls_size was zero when oldself was allocated */
1432 		lfree(oldself, sizeof (ulwp_t));
1433 	}
1434 	mutex_setup();
1435 	atfork_init();
1436 	signal_init();
1437 
1438 	/*
1439 	 * If the stack is unlimited, we set the size to zero to disable
1440 	 * stack checking.
1441 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1442 	 */
1443 	if (self->ul_stksiz == RLIM_INFINITY) {
1444 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1445 		self->ul_ustack.ss_size = 0;
1446 	} else {
1447 		self->ul_ustack.ss_sp = self->ul_stk;
1448 		self->ul_ustack.ss_size = self->ul_stksiz;
1449 	}
1450 	self->ul_ustack.ss_flags = 0;
1451 	(void) _private_setustack(&self->ul_ustack);
1452 
1453 	/*
1454 	 * Get the variables that affect thread behavior from the environment.
1455 	 */
1456 	set_thread_vars();
1457 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1458 	udp->thread_stack_cache = thread_stack_cache;
1459 
1460 	/*
1461 	 * Make per-thread copies of global variables, for speed.
1462 	 */
1463 	self->ul_queue_fifo = (char)thread_queue_fifo;
1464 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1465 	self->ul_error_detection = (char)thread_error_detection;
1466 	self->ul_async_safe = (char)thread_async_safe;
1467 	self->ul_door_noreserve = (char)thread_door_noreserve;
1468 	self->ul_max_spinners = (uchar_t)thread_max_spinners;
1469 	self->ul_adaptive_spin = thread_adaptive_spin;
1470 	self->ul_release_spin = thread_release_spin;
1471 	self->ul_queue_spin = thread_queue_spin;
1472 
1473 	/*
1474 	 * When we have initialized the primary link map, inform
1475 	 * the dynamic linker about our interface functions.
1476 	 */
1477 	if (self->ul_primarymap)
1478 		_ld_libc((void *)rtld_funcs);
1479 
1480 	/*
1481 	 * Defer signals until TLS constructors have been called.
1482 	 */
1483 	sigoff(self);
1484 	tls_setup();
1485 	sigon(self);
1486 	if (setmask)
1487 		(void) restore_signals(self);
1488 
1489 	/* PROBE_SUPPORT begin */
1490 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1491 		__tnf_probe_notify();
1492 	/* PROBE_SUPPORT end */
1493 
1494 	init_sigev_thread();
1495 	init_aio();
1496 
1497 	/*
1498 	 * We need to reset __threaded dynamically at runtime, so that
1499 	 * __threaded can be bound to __threaded outside libc which may not
1500 	 * have initial value of 1 (without a copy relocation in a.out).
1501 	 */
1502 	__threaded = 1;
1503 }
1504 
1505 #pragma fini(libc_fini)
1506 void
1507 libc_fini()
1508 {
1509 	/*
1510 	 * If we are doing fini processing for the instance of libc
1511 	 * on the first alternate link map (this happens only when
1512 	 * the dynamic linker rejects a bad audit library), then clear
1513 	 * __curthread().  We abandon whatever memory was allocated by
1514 	 * lmalloc() while running on this alternate link-map but we
1515 	 * don't care (and can't find the memory in any case); we just
1516 	 * want to protect the application from this bad audit library.
1517 	 * No fini processing is done by libc in the normal case.
1518 	 */
1519 
1520 	uberdata_t *udp = curthread->ul_uberdata;
1521 
1522 	if (udp->primary_map == 0 && udp == &__uberdata)
1523 		set_curthread(NULL);
1524 }
1525 
1526 /*
1527  * finish_init is called when we are about to become multi-threaded,
1528  * that is, on the first call to thr_create().
1529  */
1530 void
1531 finish_init()
1532 {
1533 	ulwp_t *self = curthread;
1534 	uberdata_t *udp = self->ul_uberdata;
1535 	thr_hash_table_t *htp;
1536 	void *data;
1537 	int i;
1538 
1539 	/*
1540 	 * No locks needed here; we are single-threaded on the first call.
1541 	 * We can be called only after the primary link map has been set up.
1542 	 */
1543 	ASSERT(self->ul_primarymap);
1544 	ASSERT(self == udp->ulwp_one);
1545 	ASSERT(!udp->uberflags.uf_mt);
1546 	ASSERT(udp->hash_size == 1);
1547 
1548 	/*
1549 	 * First allocate the queue_head array if not already allocated.
1550 	 */
1551 	if (udp->queue_head == NULL)
1552 		queue_alloc();
1553 
1554 	/*
1555 	 * Now allocate the thread hash table.
1556 	 */
1557 	if ((data = _private_mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1558 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1559 	    == MAP_FAILED)
1560 		thr_panic("cannot allocate thread hash table");
1561 
1562 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1563 	udp->hash_size = HASHTBLSZ;
1564 	udp->hash_mask = HASHTBLSZ - 1;
1565 
1566 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1567 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1568 		htp->hash_cond.cond_magic = COND_MAGIC;
1569 	}
1570 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1571 
1572 	/*
1573 	 * Set up the SIGCANCEL handler for threads cancellation.
1574 	 */
1575 	setup_cancelsig(SIGCANCEL);
1576 
1577 	/*
1578 	 * Arrange to do special things on exit --
1579 	 * - collect queue statistics from all remaining active threads.
1580 	 * - grab assert_lock to ensure that assertion failures
1581 	 *   and a core dump take precedence over _exit().
1582 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1583 	 * (Functions are called in the reverse order of their registration.)
1584 	 */
1585 	(void) _atexit(dump_queue_statistics);
1586 	(void) _atexit(grab_assert_lock);
1587 	(void) _atexit(collect_queue_statistics);
1588 }
1589 
1590 /*
1591  * Used only by postfork1_child(), below.
1592  */
1593 static void
1594 mark_dead_and_buried(ulwp_t *ulwp)
1595 {
1596 	ulwp->ul_dead = 1;
1597 	ulwp->ul_lwpid = (lwpid_t)(-1);
1598 	ulwp->ul_hash = NULL;
1599 	ulwp->ul_ix = -1;
1600 	ulwp->ul_schedctl = NULL;
1601 	ulwp->ul_schedctl_called = NULL;
1602 }
1603 
1604 /*
1605  * This is called from fork1() in the child.
1606  * Reset our data structures to reflect one lwp.
1607  */
1608 void
1609 postfork1_child()
1610 {
1611 	ulwp_t *self = curthread;
1612 	uberdata_t *udp = self->ul_uberdata;
1613 	ulwp_t *next;
1614 	ulwp_t *ulwp;
1615 	int i;
1616 
1617 	/* daemon threads shouldn't call fork1(), but oh well... */
1618 	self->ul_usropts &= ~THR_DAEMON;
1619 	udp->nthreads = 1;
1620 	udp->ndaemons = 0;
1621 	udp->uberflags.uf_mt = 0;
1622 	__libc_threaded = 0;
1623 	for (i = 0; i < udp->hash_size; i++)
1624 		udp->thr_hash_table[i].hash_bucket = NULL;
1625 	self->ul_lwpid = __lwp_self();
1626 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1627 
1628 	/* no one in the child is on a sleep queue; reinitialize */
1629 	if (udp->queue_head) {
1630 		(void) _private_memset(udp->queue_head, 0,
1631 			2 * QHASHSIZE * sizeof (queue_head_t));
1632 		for (i = 0; i < 2 * QHASHSIZE; i++)
1633 			udp->queue_head[i].qh_lock.mutex_magic = MUTEX_MAGIC;
1634 	}
1635 
1636 	/*
1637 	 * All lwps except ourself are gone.  Mark them so.
1638 	 * First mark all of the lwps that have already been freed.
1639 	 * Then mark and free all of the active lwps except ourself.
1640 	 * Since we are single-threaded, no locks are required here.
1641 	 */
1642 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1643 		mark_dead_and_buried(ulwp);
1644 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1645 		mark_dead_and_buried(ulwp);
1646 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1647 		next = ulwp->ul_forw;
1648 		ulwp->ul_forw = ulwp->ul_back = NULL;
1649 		mark_dead_and_buried(ulwp);
1650 		tsd_free(ulwp);
1651 		tls_free(ulwp);
1652 		rwl_free(ulwp);
1653 		ulwp_free(ulwp);
1654 	}
1655 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1656 	if (self != udp->ulwp_one)
1657 		mark_dead_and_buried(udp->ulwp_one);
1658 	if ((ulwp = udp->all_zombies) != NULL) {
1659 		ASSERT(udp->nzombies != 0);
1660 		do {
1661 			next = ulwp->ul_forw;
1662 			ulwp->ul_forw = ulwp->ul_back = NULL;
1663 			mark_dead_and_buried(ulwp);
1664 			udp->nzombies--;
1665 			if (ulwp->ul_replace) {
1666 				ulwp->ul_next = NULL;
1667 				if (udp->ulwp_replace_free == NULL) {
1668 					udp->ulwp_replace_free =
1669 						udp->ulwp_replace_last = ulwp;
1670 				} else {
1671 					udp->ulwp_replace_last->ul_next = ulwp;
1672 					udp->ulwp_replace_last = ulwp;
1673 				}
1674 			}
1675 		} while ((ulwp = next) != udp->all_zombies);
1676 		ASSERT(udp->nzombies == 0);
1677 		udp->all_zombies = NULL;
1678 		udp->nzombies = 0;
1679 	}
1680 	trim_stack_cache(0);
1681 
1682 	/*
1683 	 * Do post-fork1 processing for subsystems that need it.
1684 	 */
1685 	postfork1_child_tpool();
1686 	postfork1_child_sigev_aio();
1687 	postfork1_child_sigev_mq();
1688 	postfork1_child_sigev_timer();
1689 	postfork1_child_aio();
1690 }
1691 
1692 #pragma weak thr_setprio = _thr_setprio
1693 #pragma weak pthread_setschedprio = _thr_setprio
1694 #pragma weak _pthread_setschedprio = _thr_setprio
1695 int
1696 _thr_setprio(thread_t tid, int priority)
1697 {
1698 	struct sched_param param;
1699 
1700 	(void) _memset(&param, 0, sizeof (param));
1701 	param.sched_priority = priority;
1702 	return (_thread_setschedparam_main(tid, 0, &param, PRIO_SET_PRIO));
1703 }
1704 
1705 #pragma weak thr_getprio = _thr_getprio
1706 int
1707 _thr_getprio(thread_t tid, int *priority)
1708 {
1709 	uberdata_t *udp = curthread->ul_uberdata;
1710 	ulwp_t *ulwp;
1711 	int error = 0;
1712 
1713 	if ((ulwp = find_lwp(tid)) == NULL)
1714 		error = ESRCH;
1715 	else {
1716 		*priority = ulwp->ul_pri;
1717 		ulwp_unlock(ulwp, udp);
1718 	}
1719 	return (error);
1720 }
1721 
1722 lwpid_t
1723 lwp_self(void)
1724 {
1725 	return (curthread->ul_lwpid);
1726 }
1727 
1728 #pragma weak _ti_thr_self = _thr_self
1729 #pragma weak thr_self = _thr_self
1730 #pragma weak pthread_self = _thr_self
1731 #pragma weak _pthread_self = _thr_self
1732 thread_t
1733 _thr_self()
1734 {
1735 	return (curthread->ul_lwpid);
1736 }
1737 
1738 #pragma weak thr_main = _thr_main
1739 int
1740 _thr_main()
1741 {
1742 	ulwp_t *self = __curthread();
1743 
1744 	return ((self == NULL)? -1 : self->ul_main);
1745 }
1746 
1747 int
1748 _thrp_cancelled(void)
1749 {
1750 	return (curthread->ul_rval == PTHREAD_CANCELED);
1751 }
1752 
1753 int
1754 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1755 {
1756 	stk->ss_sp = (void *)ulwp->ul_stktop;
1757 	stk->ss_size = ulwp->ul_stksiz;
1758 	stk->ss_flags = 0;
1759 	return (0);
1760 }
1761 
1762 #pragma weak thr_stksegment = _thr_stksegment
1763 int
1764 _thr_stksegment(stack_t *stk)
1765 {
1766 	return (_thrp_stksegment(curthread, stk));
1767 }
1768 
1769 void
1770 force_continue(ulwp_t *ulwp)
1771 {
1772 #if defined(THREAD_DEBUG)
1773 	ulwp_t *self = curthread;
1774 	uberdata_t *udp = self->ul_uberdata;
1775 #endif
1776 	int error;
1777 	timespec_t ts;
1778 
1779 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1780 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1781 
1782 	for (;;) {
1783 		error = __lwp_continue(ulwp->ul_lwpid);
1784 		if (error != 0 && error != EINTR)
1785 			break;
1786 		error = 0;
1787 		if (ulwp->ul_stopping) {	/* he is stopping himself */
1788 			ts.tv_sec = 0;		/* give him a chance to run */
1789 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1790 			(void) __nanosleep(&ts, NULL);
1791 		}
1792 		if (!ulwp->ul_stopping)		/* he is running now */
1793 			break;			/* so we are done */
1794 		/*
1795 		 * He is marked as being in the process of stopping
1796 		 * himself.  Loop around and continue him again.
1797 		 * He may not have been stopped the first time.
1798 		 */
1799 	}
1800 }
1801 
1802 /*
1803  * Suspend an lwp with lwp_suspend(), then move it to a safe
1804  * point, that is, to a point where ul_critical is zero.
1805  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1806  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1807  * If we have to drop link_lock, we store 1 through link_dropped.
1808  * If the lwp exits before it can be suspended, we return ESRCH.
1809  */
1810 int
1811 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1812 {
1813 	ulwp_t *self = curthread;
1814 	uberdata_t *udp = self->ul_uberdata;
1815 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1816 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1817 	thread_t tid = ulwp->ul_lwpid;
1818 	int ix = ulwp->ul_ix;
1819 	int error = 0;
1820 
1821 	ASSERT(whystopped == TSTP_REGULAR ||
1822 	    whystopped == TSTP_MUTATOR ||
1823 	    whystopped == TSTP_FORK);
1824 	ASSERT(ulwp != self);
1825 	ASSERT(!ulwp->ul_stop);
1826 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1827 	ASSERT(MUTEX_OWNED(mp, self));
1828 
1829 	if (link_dropped != NULL)
1830 		*link_dropped = 0;
1831 
1832 	/*
1833 	 * We must grab the target's spin lock before suspending it.
1834 	 * See the comments below and in _thrp_suspend() for why.
1835 	 */
1836 	spin_lock_set(&ulwp->ul_spinlock);
1837 	(void) ___lwp_suspend(tid);
1838 	spin_lock_clear(&ulwp->ul_spinlock);
1839 
1840 top:
1841 	if (ulwp->ul_critical == 0 || ulwp->ul_stopping) {
1842 		/* thread is already safe */
1843 		ulwp->ul_stop |= whystopped;
1844 	} else {
1845 		/*
1846 		 * Setting ul_pleasestop causes the target thread to stop
1847 		 * itself in _thrp_suspend(), below, after we drop its lock.
1848 		 * We must continue the critical thread before dropping
1849 		 * link_lock because the critical thread may be holding
1850 		 * the queue lock for link_lock.  This is delicate.
1851 		 */
1852 		ulwp->ul_pleasestop |= whystopped;
1853 		force_continue(ulwp);
1854 		if (link_dropped != NULL) {
1855 			*link_dropped = 1;
1856 			lmutex_unlock(&udp->link_lock);
1857 			/* be sure to drop link_lock only once */
1858 			link_dropped = NULL;
1859 		}
1860 
1861 		/*
1862 		 * The thread may disappear by calling thr_exit() so we
1863 		 * cannot rely on the ulwp pointer after dropping the lock.
1864 		 * Instead, we search the hash table to find it again.
1865 		 * When we return, we may find that the thread has been
1866 		 * continued by some other thread.  The suspend/continue
1867 		 * interfaces are prone to such race conditions by design.
1868 		 */
1869 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1870 		    (ulwp->ul_pleasestop & whystopped)) {
1871 			(void) _cond_wait(cvp, mp);
1872 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1873 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1874 				if (ulwp->ul_lwpid == tid)
1875 					break;
1876 			}
1877 		}
1878 
1879 		if (ulwp == NULL || ulwp->ul_dead)
1880 			error = ESRCH;
1881 		else {
1882 			/*
1883 			 * Do another lwp_suspend() to make sure we don't
1884 			 * return until the target thread is fully stopped
1885 			 * in the kernel.  Don't apply lwp_suspend() until
1886 			 * we know that the target is not holding any
1887 			 * queue locks, that is, that it has completed
1888 			 * ulwp_unlock(self) and has, or at least is
1889 			 * about to, call lwp_suspend() on itself.  We do
1890 			 * this by grabbing the target's spin lock.
1891 			 */
1892 			ASSERT(ulwp->ul_lwpid == tid);
1893 			spin_lock_set(&ulwp->ul_spinlock);
1894 			(void) ___lwp_suspend(tid);
1895 			spin_lock_clear(&ulwp->ul_spinlock);
1896 			/*
1897 			 * If some other thread did a thr_continue()
1898 			 * on the target thread we have to start over.
1899 			 */
1900 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1901 				goto top;
1902 		}
1903 	}
1904 
1905 	(void) cond_broadcast_internal(cvp);
1906 	lmutex_unlock(mp);
1907 	return (error);
1908 }
1909 
1910 int
1911 _thrp_suspend(thread_t tid, uchar_t whystopped)
1912 {
1913 	ulwp_t *self = curthread;
1914 	uberdata_t *udp = self->ul_uberdata;
1915 	ulwp_t *ulwp;
1916 	int error = 0;
1917 
1918 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1919 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1920 
1921 	/*
1922 	 * We can't suspend anyone except ourself while
1923 	 * some other thread is performing a fork.
1924 	 * This also allows only one suspension at a time.
1925 	 */
1926 	if (tid != self->ul_lwpid)
1927 		(void) fork_lock_enter(NULL);
1928 
1929 	if ((ulwp = find_lwp(tid)) == NULL)
1930 		error = ESRCH;
1931 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1932 		ulwp_unlock(ulwp, udp);
1933 		error = EINVAL;
1934 	} else if (ulwp->ul_stop) {	/* already stopped */
1935 		ulwp->ul_stop |= whystopped;
1936 		ulwp_broadcast(ulwp);
1937 		ulwp_unlock(ulwp, udp);
1938 	} else if (ulwp != self) {
1939 		/*
1940 		 * After suspending the other thread, move it out of a
1941 		 * critical section and deal with the schedctl mappings.
1942 		 * safe_suspend() suspends the other thread, calls
1943 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1944 		 */
1945 		error = safe_suspend(ulwp, whystopped, NULL);
1946 	} else {
1947 		int schedctl_after_fork = 0;
1948 
1949 		/*
1950 		 * We are suspending ourself.  We must not take a signal
1951 		 * until we return from lwp_suspend() and clear ul_stopping.
1952 		 * This is to guard against siglongjmp().
1953 		 */
1954 		enter_critical(self);
1955 		self->ul_sp = stkptr();
1956 		_flush_windows();	/* sparc */
1957 		self->ul_pleasestop = 0;
1958 		self->ul_stop |= whystopped;
1959 		/*
1960 		 * Grab our spin lock before dropping ulwp_mutex(self).
1961 		 * This prevents the suspending thread from applying
1962 		 * lwp_suspend() to us before we emerge from
1963 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1964 		 */
1965 		spin_lock_set(&self->ul_spinlock);
1966 		self->ul_stopping = 1;
1967 		ulwp_broadcast(self);
1968 		ulwp_unlock(self, udp);
1969 		/*
1970 		 * From this point until we return from lwp_suspend(),
1971 		 * we must not call any function that might invoke the
1972 		 * dynamic linker, that is, we can only call functions
1973 		 * private to the library.
1974 		 *
1975 		 * Also, this is a nasty race condition for a process
1976 		 * that is undergoing a forkall() operation:
1977 		 * Once we clear our spinlock (below), we are vulnerable
1978 		 * to being suspended by the forkall() thread before
1979 		 * we manage to suspend ourself in ___lwp_suspend().
1980 		 * See safe_suspend() and force_continue().
1981 		 *
1982 		 * To avoid a SIGSEGV due to the disappearance
1983 		 * of the schedctl mappings in the child process,
1984 		 * which can happen in spin_lock_clear() if we
1985 		 * are suspended while we are in the middle of
1986 		 * its call to preempt(), we preemptively clear
1987 		 * our own schedctl pointer before dropping our
1988 		 * spinlock.  We reinstate it, in both the parent
1989 		 * and (if this really is a forkall()) the child.
1990 		 */
1991 		if (whystopped & TSTP_FORK) {
1992 			schedctl_after_fork = 1;
1993 			self->ul_schedctl = NULL;
1994 			self->ul_schedctl_called = &udp->uberflags;
1995 		}
1996 		spin_lock_clear(&self->ul_spinlock);
1997 		(void) ___lwp_suspend(tid);
1998 		/*
1999 		 * Somebody else continued us.
2000 		 * We can't grab ulwp_lock(self)
2001 		 * until after clearing ul_stopping.
2002 		 * force_continue() relies on this.
2003 		 */
2004 		self->ul_stopping = 0;
2005 		self->ul_sp = 0;
2006 		if (schedctl_after_fork) {
2007 			self->ul_schedctl_called = NULL;
2008 			self->ul_schedctl = NULL;
2009 			(void) setup_schedctl();
2010 		}
2011 		ulwp_lock(self, udp);
2012 		ulwp_broadcast(self);
2013 		ulwp_unlock(self, udp);
2014 		exit_critical(self);
2015 	}
2016 
2017 	if (tid != self->ul_lwpid)
2018 		fork_lock_exit();
2019 
2020 	return (error);
2021 }
2022 
2023 /*
2024  * Suspend all lwps other than ourself in preparation for fork.
2025  */
2026 void
2027 suspend_fork()
2028 {
2029 	ulwp_t *self = curthread;
2030 	uberdata_t *udp = self->ul_uberdata;
2031 	ulwp_t *ulwp;
2032 	int link_dropped;
2033 
2034 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2035 top:
2036 	lmutex_lock(&udp->link_lock);
2037 
2038 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2039 		ulwp_lock(ulwp, udp);
2040 		if (ulwp->ul_stop) {	/* already stopped */
2041 			ulwp->ul_stop |= TSTP_FORK;
2042 			ulwp_broadcast(ulwp);
2043 			ulwp_unlock(ulwp, udp);
2044 		} else {
2045 			/*
2046 			 * Move the stopped lwp out of a critical section.
2047 			 */
2048 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2049 			    link_dropped)
2050 				goto top;
2051 		}
2052 	}
2053 
2054 	lmutex_unlock(&udp->link_lock);
2055 }
2056 
2057 void
2058 continue_fork(int child)
2059 {
2060 	ulwp_t *self = curthread;
2061 	uberdata_t *udp = self->ul_uberdata;
2062 	ulwp_t *ulwp;
2063 
2064 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2065 
2066 	/*
2067 	 * Clear the schedctl pointers in the child of forkall().
2068 	 */
2069 	if (child) {
2070 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2071 			ulwp->ul_schedctl_called =
2072 				ulwp->ul_dead? &udp->uberflags : NULL;
2073 			ulwp->ul_schedctl = NULL;
2074 		}
2075 	}
2076 
2077 	/*
2078 	 * Set all lwps that were stopped for fork() running again.
2079 	 */
2080 	lmutex_lock(&udp->link_lock);
2081 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2082 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2083 		lmutex_lock(mp);
2084 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2085 		ulwp->ul_stop &= ~TSTP_FORK;
2086 		ulwp_broadcast(ulwp);
2087 		if (!ulwp->ul_stop)
2088 			force_continue(ulwp);
2089 		lmutex_unlock(mp);
2090 	}
2091 	lmutex_unlock(&udp->link_lock);
2092 }
2093 
2094 int
2095 _thrp_continue(thread_t tid, uchar_t whystopped)
2096 {
2097 	uberdata_t *udp = curthread->ul_uberdata;
2098 	ulwp_t *ulwp;
2099 	mutex_t *mp;
2100 	int error = 0;
2101 
2102 	ASSERT(whystopped == TSTP_REGULAR ||
2103 	    whystopped == TSTP_MUTATOR);
2104 
2105 	/*
2106 	 * We single-thread the entire thread suspend/continue mechanism.
2107 	 */
2108 	(void) fork_lock_enter(NULL);
2109 
2110 	if ((ulwp = find_lwp(tid)) == NULL) {
2111 		fork_lock_exit();
2112 		return (ESRCH);
2113 	}
2114 
2115 	mp = ulwp_mutex(ulwp, udp);
2116 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2117 		error = EINVAL;
2118 	} else if (ulwp->ul_stop & whystopped) {
2119 		ulwp->ul_stop &= ~whystopped;
2120 		ulwp_broadcast(ulwp);
2121 		if (!ulwp->ul_stop) {
2122 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2123 				ulwp->ul_sp = 0;
2124 				ulwp->ul_created = 0;
2125 			}
2126 			force_continue(ulwp);
2127 		}
2128 	}
2129 	lmutex_unlock(mp);
2130 
2131 	fork_lock_exit();
2132 	return (error);
2133 }
2134 
2135 #pragma weak thr_suspend = _thr_suspend
2136 int
2137 _thr_suspend(thread_t tid)
2138 {
2139 	return (_thrp_suspend(tid, TSTP_REGULAR));
2140 }
2141 
2142 #pragma weak thr_continue = _thr_continue
2143 int
2144 _thr_continue(thread_t tid)
2145 {
2146 	return (_thrp_continue(tid, TSTP_REGULAR));
2147 }
2148 
2149 #pragma weak thr_yield = _thr_yield
2150 void
2151 _thr_yield()
2152 {
2153 	lwp_yield();
2154 }
2155 
2156 #pragma weak thr_kill = _thr_kill
2157 #pragma weak pthread_kill = _thr_kill
2158 #pragma weak _pthread_kill = _thr_kill
2159 int
2160 _thr_kill(thread_t tid, int sig)
2161 {
2162 	if (sig == SIGCANCEL)
2163 		return (EINVAL);
2164 	return (__lwp_kill(tid, sig));
2165 }
2166 
2167 /*
2168  * Exit a critical section, take deferred actions if necessary.
2169  */
2170 void
2171 do_exit_critical()
2172 {
2173 	ulwp_t *self = curthread;
2174 	int sig;
2175 
2176 	ASSERT(self->ul_critical == 0);
2177 	if (self->ul_dead)
2178 		return;
2179 
2180 	while (self->ul_pleasestop ||
2181 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2182 		/*
2183 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2184 		 * by keeping self->ul_critical == 1 here.
2185 		 */
2186 		self->ul_critical++;
2187 		while (self->ul_pleasestop) {
2188 			/*
2189 			 * Guard against suspending ourself while on a sleep
2190 			 * queue.  See the comments in call_user_handler().
2191 			 */
2192 			unsleep_self();
2193 			set_parking_flag(self, 0);
2194 			(void) _thrp_suspend(self->ul_lwpid,
2195 				self->ul_pleasestop);
2196 		}
2197 		self->ul_critical--;
2198 
2199 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2200 			/*
2201 			 * Clear ul_cursig before proceeding.
2202 			 * This protects us from the dynamic linker's
2203 			 * calls to bind_guard()/bind_clear() in the
2204 			 * event that it is invoked to resolve a symbol
2205 			 * like take_deferred_signal() below.
2206 			 */
2207 			self->ul_cursig = 0;
2208 			take_deferred_signal(sig);
2209 			ASSERT(self->ul_cursig == 0);
2210 		}
2211 	}
2212 	ASSERT(self->ul_critical == 0);
2213 }
2214 
2215 int
2216 _ti_bind_guard(int bindflag)
2217 {
2218 	ulwp_t *self = curthread;
2219 
2220 	if ((self->ul_bindflags & bindflag) == bindflag)
2221 		return (0);
2222 	enter_critical(self);
2223 	self->ul_bindflags |= bindflag;
2224 	return (1);
2225 }
2226 
2227 int
2228 _ti_bind_clear(int bindflag)
2229 {
2230 	ulwp_t *self = curthread;
2231 
2232 	if ((self->ul_bindflags & bindflag) == 0)
2233 		return (self->ul_bindflags);
2234 	self->ul_bindflags &= ~bindflag;
2235 	exit_critical(self);
2236 	return (self->ul_bindflags);
2237 }
2238 
2239 /*
2240  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2241  * it does in the old libthread (see the comments in cond_wait_queue()).
2242  * Also, signals are deferred at thread startup until TLS constructors
2243  * have all been called, at which time _thr_setup() calls sigon().
2244  *
2245  * _sigoff() and _sigon() are external consolidation-private interfaces to
2246  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2247  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2248  * (librtc.so) to defer signals during its critical sections (not to be
2249  * confused with libc critical sections [see exit_critical() above]).
2250  */
2251 void
2252 _sigoff(void)
2253 {
2254 	sigoff(curthread);
2255 }
2256 
2257 void
2258 _sigon(void)
2259 {
2260 	sigon(curthread);
2261 }
2262 
2263 void
2264 sigon(ulwp_t *self)
2265 {
2266 	int sig;
2267 
2268 	ASSERT(self->ul_sigdefer > 0);
2269 	if (--self->ul_sigdefer == 0) {
2270 		if ((sig = self->ul_cursig) != 0 && self->ul_critical == 0) {
2271 			self->ul_cursig = 0;
2272 			take_deferred_signal(sig);
2273 			ASSERT(self->ul_cursig == 0);
2274 		}
2275 	}
2276 }
2277 
2278 #pragma weak thr_getconcurrency = _thr_getconcurrency
2279 int
2280 _thr_getconcurrency()
2281 {
2282 	return (thr_concurrency);
2283 }
2284 
2285 #pragma weak pthread_getconcurrency = _pthread_getconcurrency
2286 int
2287 _pthread_getconcurrency()
2288 {
2289 	return (pthread_concurrency);
2290 }
2291 
2292 #pragma weak thr_setconcurrency = _thr_setconcurrency
2293 int
2294 _thr_setconcurrency(int new_level)
2295 {
2296 	uberdata_t *udp = curthread->ul_uberdata;
2297 
2298 	if (new_level < 0)
2299 		return (EINVAL);
2300 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2301 		return (EAGAIN);
2302 	lmutex_lock(&udp->link_lock);
2303 	if (new_level > thr_concurrency)
2304 		thr_concurrency = new_level;
2305 	lmutex_unlock(&udp->link_lock);
2306 	return (0);
2307 }
2308 
2309 #pragma weak pthread_setconcurrency = _pthread_setconcurrency
2310 int
2311 _pthread_setconcurrency(int new_level)
2312 {
2313 	if (new_level < 0)
2314 		return (EINVAL);
2315 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2316 		return (EAGAIN);
2317 	pthread_concurrency = new_level;
2318 	return (0);
2319 }
2320 
2321 #pragma weak thr_min_stack = _thr_min_stack
2322 #pragma weak __pthread_min_stack = _thr_min_stack
2323 size_t
2324 _thr_min_stack(void)
2325 {
2326 	return (MINSTACK);
2327 }
2328 
2329 int
2330 __nthreads(void)
2331 {
2332 	return (curthread->ul_uberdata->nthreads);
2333 }
2334 
2335 /*
2336  * XXX
2337  * The remainder of this file implements the private interfaces to java for
2338  * garbage collection.  It is no longer used, at least by java 1.2.
2339  * It can all go away once all old JVMs have disappeared.
2340  */
2341 
2342 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2343 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2344 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2345 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2346 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2347 
2348 /*
2349  * Get the available register state for the target thread.
2350  * Return non-volatile registers: TRS_NONVOLATILE
2351  */
2352 #pragma weak thr_getstate = _thr_getstate
2353 int
2354 _thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2355 {
2356 	ulwp_t *self = curthread;
2357 	uberdata_t *udp = self->ul_uberdata;
2358 	ulwp_t **ulwpp;
2359 	ulwp_t *ulwp;
2360 	int error = 0;
2361 	int trs_flag = TRS_LWPID;
2362 
2363 	if (tid == 0 || self->ul_lwpid == tid) {
2364 		ulwp = self;
2365 		ulwp_lock(ulwp, udp);
2366 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2367 		ulwp = *ulwpp;
2368 	} else {
2369 		if (flag)
2370 			*flag = TRS_INVALID;
2371 		return (ESRCH);
2372 	}
2373 
2374 	if (ulwp->ul_dead) {
2375 		trs_flag = TRS_INVALID;
2376 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2377 		error = EINVAL;
2378 		trs_flag = TRS_INVALID;
2379 	} else if (ulwp->ul_stop) {
2380 		trs_flag = TRS_NONVOLATILE;
2381 		getgregs(ulwp, rs);
2382 	}
2383 
2384 	if (flag)
2385 		*flag = trs_flag;
2386 	if (lwp)
2387 		*lwp = tid;
2388 	if (ss != NULL)
2389 		(void) _thrp_stksegment(ulwp, ss);
2390 
2391 	ulwp_unlock(ulwp, udp);
2392 	return (error);
2393 }
2394 
2395 /*
2396  * Set the appropriate register state for the target thread.
2397  * This is not used by java.  It exists solely for the MSTC test suite.
2398  */
2399 #pragma weak thr_setstate = _thr_setstate
2400 int
2401 _thr_setstate(thread_t tid, int flag, gregset_t rs)
2402 {
2403 	uberdata_t *udp = curthread->ul_uberdata;
2404 	ulwp_t *ulwp;
2405 	int error = 0;
2406 
2407 	if ((ulwp = find_lwp(tid)) == NULL)
2408 		return (ESRCH);
2409 
2410 	if (!ulwp->ul_stop && !suspendedallmutators)
2411 		error = EINVAL;
2412 	else if (rs != NULL) {
2413 		switch (flag) {
2414 		case TRS_NONVOLATILE:
2415 			/* do /proc stuff here? */
2416 			if (ulwp->ul_stop)
2417 				setgregs(ulwp, rs);
2418 			else
2419 				error = EINVAL;
2420 			break;
2421 		case TRS_LWPID:		/* do /proc stuff here? */
2422 		default:
2423 			error = EINVAL;
2424 			break;
2425 		}
2426 	}
2427 
2428 	ulwp_unlock(ulwp, udp);
2429 	return (error);
2430 }
2431 
2432 int
2433 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2434 {
2435 	extern ssize_t _pread(int, void *, size_t, off_t);
2436 	char buf[100];
2437 	int fd;
2438 
2439 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2440 	(void) strcpy(buf, "/proc/self/lwp/");
2441 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2442 	(void) strcat(buf, "/lwpstatus");
2443 	if ((fd = _open(buf, O_RDONLY, 0)) >= 0) {
2444 		while (_pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2445 			if (sp->pr_flags & PR_STOPPED) {
2446 				(void) _close(fd);
2447 				return (0);
2448 			}
2449 			lwp_yield();	/* give him a chance to stop */
2450 		}
2451 		(void) _close(fd);
2452 	}
2453 	return (-1);
2454 }
2455 
2456 int
2457 putlwpregs(thread_t tid, prgregset_t prp)
2458 {
2459 	extern ssize_t _writev(int, const struct iovec *, int);
2460 	char buf[100];
2461 	int fd;
2462 	long dstop_sreg[2];
2463 	long run_null[2];
2464 	iovec_t iov[3];
2465 
2466 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2467 	(void) strcpy(buf, "/proc/self/lwp/");
2468 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2469 	(void) strcat(buf, "/lwpctl");
2470 	if ((fd = _open(buf, O_WRONLY, 0)) >= 0) {
2471 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2472 		dstop_sreg[1] = PCSREG;		/* set the registers */
2473 		iov[0].iov_base = (caddr_t)dstop_sreg;
2474 		iov[0].iov_len = sizeof (dstop_sreg);
2475 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2476 		iov[1].iov_len = sizeof (prgregset_t);
2477 		run_null[0] = PCRUN;		/* make it runnable again */
2478 		run_null[1] = 0;
2479 		iov[2].iov_base = (caddr_t)run_null;
2480 		iov[2].iov_len = sizeof (run_null);
2481 		if (_writev(fd, iov, 3) >= 0) {
2482 			(void) _close(fd);
2483 			return (0);
2484 		}
2485 		(void) _close(fd);
2486 	}
2487 	return (-1);
2488 }
2489 
2490 static ulong_t
2491 gettsp_slow(thread_t tid)
2492 {
2493 	char buf[100];
2494 	struct lwpstatus status;
2495 
2496 	if (getlwpstatus(tid, &status) != 0) {
2497 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2498 		(void) strcpy(buf, "__gettsp(");
2499 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2500 		(void) strcat(buf, "): can't read lwpstatus");
2501 		thr_panic(buf);
2502 	}
2503 	return (status.pr_reg[R_SP]);
2504 }
2505 
2506 ulong_t
2507 __gettsp(thread_t tid)
2508 {
2509 	uberdata_t *udp = curthread->ul_uberdata;
2510 	ulwp_t *ulwp;
2511 	ulong_t result;
2512 
2513 	if ((ulwp = find_lwp(tid)) == NULL)
2514 		return (0);
2515 
2516 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2517 		ulwp_unlock(ulwp, udp);
2518 		return (result);
2519 	}
2520 
2521 	result = gettsp_slow(tid);
2522 	ulwp_unlock(ulwp, udp);
2523 	return (result);
2524 }
2525 
2526 /*
2527  * This tells java stack walkers how to find the ucontext
2528  * structure passed to signal handlers.
2529  */
2530 #pragma weak thr_sighndlrinfo = _thr_sighndlrinfo
2531 void
2532 _thr_sighndlrinfo(void (**func)(), int *funcsize)
2533 {
2534 	*func = &__sighndlr;
2535 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2536 }
2537 
2538 /*
2539  * Mark a thread a mutator or reset a mutator to being a default,
2540  * non-mutator thread.
2541  */
2542 #pragma weak thr_setmutator = _thr_setmutator
2543 int
2544 _thr_setmutator(thread_t tid, int enabled)
2545 {
2546 	ulwp_t *self = curthread;
2547 	uberdata_t *udp = self->ul_uberdata;
2548 	ulwp_t *ulwp;
2549 	int error;
2550 
2551 	enabled = enabled?1:0;
2552 top:
2553 	if (tid == 0) {
2554 		ulwp = self;
2555 		ulwp_lock(ulwp, udp);
2556 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2557 		return (ESRCH);
2558 	}
2559 
2560 	/*
2561 	 * The target thread should be the caller itself or a suspended thread.
2562 	 * This prevents the target from also changing its ul_mutator field.
2563 	 */
2564 	error = 0;
2565 	if (ulwp != self && !ulwp->ul_stop && enabled)
2566 		error = EINVAL;
2567 	else if (ulwp->ul_mutator != enabled) {
2568 		lmutex_lock(&mutatorslock);
2569 		if (mutatorsbarrier) {
2570 			ulwp_unlock(ulwp, udp);
2571 			while (mutatorsbarrier)
2572 				(void) _cond_wait(&mutatorscv, &mutatorslock);
2573 			lmutex_unlock(&mutatorslock);
2574 			goto top;
2575 		}
2576 		ulwp->ul_mutator = enabled;
2577 		lmutex_unlock(&mutatorslock);
2578 	}
2579 
2580 	ulwp_unlock(ulwp, udp);
2581 	return (error);
2582 }
2583 
2584 /*
2585  * Establish a barrier against new mutators.  Any non-mutator trying
2586  * to become a mutator is suspended until the barrier is removed.
2587  */
2588 #pragma weak thr_mutators_barrier = _thr_mutators_barrier
2589 void
2590 _thr_mutators_barrier(int enabled)
2591 {
2592 	int oldvalue;
2593 
2594 	lmutex_lock(&mutatorslock);
2595 
2596 	/*
2597 	 * Wait if trying to set the barrier while it is already set.
2598 	 */
2599 	while (mutatorsbarrier && enabled)
2600 		(void) _cond_wait(&mutatorscv, &mutatorslock);
2601 
2602 	oldvalue = mutatorsbarrier;
2603 	mutatorsbarrier = enabled;
2604 	/*
2605 	 * Wakeup any blocked non-mutators when barrier is removed.
2606 	 */
2607 	if (oldvalue && !enabled)
2608 		(void) cond_broadcast_internal(&mutatorscv);
2609 	lmutex_unlock(&mutatorslock);
2610 }
2611 
2612 /*
2613  * Suspend the set of all mutators except for the caller.  The list
2614  * of actively running threads is searched and only the mutators
2615  * in this list are suspended.  Actively running non-mutators remain
2616  * running.  Any other thread is suspended.
2617  */
2618 #pragma weak thr_suspend_allmutators = _thr_suspend_allmutators
2619 int
2620 _thr_suspend_allmutators(void)
2621 {
2622 	ulwp_t *self = curthread;
2623 	uberdata_t *udp = self->ul_uberdata;
2624 	ulwp_t *ulwp;
2625 	int link_dropped;
2626 
2627 	/*
2628 	 * We single-thread the entire thread suspend/continue mechanism.
2629 	 */
2630 	(void) fork_lock_enter(NULL);
2631 
2632 top:
2633 	lmutex_lock(&udp->link_lock);
2634 
2635 	if (suspendingallmutators || suspendedallmutators) {
2636 		lmutex_unlock(&udp->link_lock);
2637 		fork_lock_exit();
2638 		return (EINVAL);
2639 	}
2640 	suspendingallmutators = 1;
2641 
2642 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2643 		ulwp_lock(ulwp, udp);
2644 		if (!ulwp->ul_mutator) {
2645 			ulwp_unlock(ulwp, udp);
2646 		} else if (ulwp->ul_stop) {	/* already stopped */
2647 			ulwp->ul_stop |= TSTP_MUTATOR;
2648 			ulwp_broadcast(ulwp);
2649 			ulwp_unlock(ulwp, udp);
2650 		} else {
2651 			/*
2652 			 * Move the stopped lwp out of a critical section.
2653 			 */
2654 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2655 			    link_dropped) {
2656 				suspendingallmutators = 0;
2657 				goto top;
2658 			}
2659 		}
2660 	}
2661 
2662 	suspendedallmutators = 1;
2663 	suspendingallmutators = 0;
2664 	lmutex_unlock(&udp->link_lock);
2665 	fork_lock_exit();
2666 	return (0);
2667 }
2668 
2669 /*
2670  * Suspend the target mutator.  The caller is permitted to suspend
2671  * itself.  If a mutator barrier is enabled, the caller will suspend
2672  * itself as though it had been suspended by thr_suspend_allmutators().
2673  * When the barrier is removed, this thread will be resumed.  Any
2674  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2675  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2676  */
2677 #pragma weak thr_suspend_mutator = _thr_suspend_mutator
2678 int
2679 _thr_suspend_mutator(thread_t tid)
2680 {
2681 	if (tid == 0)
2682 		tid = curthread->ul_lwpid;
2683 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2684 }
2685 
2686 /*
2687  * Resume the set of all suspended mutators.
2688  */
2689 #pragma weak thr_continue_allmutators = _thr_continue_allmutators
2690 int
2691 _thr_continue_allmutators()
2692 {
2693 	ulwp_t *self = curthread;
2694 	uberdata_t *udp = self->ul_uberdata;
2695 	ulwp_t *ulwp;
2696 
2697 	/*
2698 	 * We single-thread the entire thread suspend/continue mechanism.
2699 	 */
2700 	(void) fork_lock_enter(NULL);
2701 
2702 	lmutex_lock(&udp->link_lock);
2703 	if (!suspendedallmutators) {
2704 		lmutex_unlock(&udp->link_lock);
2705 		fork_lock_exit();
2706 		return (EINVAL);
2707 	}
2708 	suspendedallmutators = 0;
2709 
2710 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2711 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2712 		lmutex_lock(mp);
2713 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2714 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2715 			ulwp_broadcast(ulwp);
2716 			if (!ulwp->ul_stop)
2717 				force_continue(ulwp);
2718 		}
2719 		lmutex_unlock(mp);
2720 	}
2721 
2722 	lmutex_unlock(&udp->link_lock);
2723 	fork_lock_exit();
2724 	return (0);
2725 }
2726 
2727 /*
2728  * Resume a suspended mutator.
2729  */
2730 #pragma weak thr_continue_mutator = _thr_continue_mutator
2731 int
2732 _thr_continue_mutator(thread_t tid)
2733 {
2734 	return (_thrp_continue(tid, TSTP_MUTATOR));
2735 }
2736 
2737 #pragma weak thr_wait_mutator = _thr_wait_mutator
2738 int
2739 _thr_wait_mutator(thread_t tid, int dontwait)
2740 {
2741 	uberdata_t *udp = curthread->ul_uberdata;
2742 	ulwp_t *ulwp;
2743 	int error = 0;
2744 
2745 top:
2746 	if ((ulwp = find_lwp(tid)) == NULL)
2747 		return (ESRCH);
2748 
2749 	if (!ulwp->ul_mutator)
2750 		error = EINVAL;
2751 	else if (dontwait) {
2752 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2753 			error = EWOULDBLOCK;
2754 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2755 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2756 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2757 
2758 		(void) _cond_wait(cvp, mp);
2759 		(void) lmutex_unlock(mp);
2760 		goto top;
2761 	}
2762 
2763 	ulwp_unlock(ulwp, udp);
2764 	return (error);
2765 }
2766 
2767 /* PROBE_SUPPORT begin */
2768 
2769 void
2770 thr_probe_setup(void *data)
2771 {
2772 	curthread->ul_tpdp = data;
2773 }
2774 
2775 static void *
2776 _thread_probe_getfunc()
2777 {
2778 	return (curthread->ul_tpdp);
2779 }
2780 
2781 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2782 
2783 /* ARGSUSED */
2784 void
2785 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2786 {
2787 	/* never called */
2788 }
2789 
2790 /* ARGSUSED */
2791 void
2792 _resume_ret(ulwp_t *oldlwp)
2793 {
2794 	/* never called */
2795 }
2796 
2797 /* PROBE_SUPPORT end */
2798