xref: /titanic_41/usr/src/lib/libc/port/threads/thr.c (revision 7a1306a70fee0e017a445bde1dcfd1997f691cf4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <procfs.h>
32 #include <sys/uio.h>
33 #include <ctype.h>
34 
35 #undef errno
36 extern int errno;
37 
38 int __threaded = 0;	/* zero until first thr_create() */
39 
40 /*
41  * thr_concurrency and pthread_concurrency are not used by the library.
42  * They exist solely to hold and return the values set by calls to
43  * thr_setconcurrency() and pthread_setconcurrency().
44  * Because thr_concurrency is affected by the THR_NEW_LWP flag
45  * to thr_create(), thr_concurrency is protected by link_lock.
46  */
47 static	int	thr_concurrency = 1;
48 static	int	pthread_concurrency;
49 
50 size_t	_lpagesize;
51 
52 #define	HASHTBLSZ	1024	/* must be a power of two */
53 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
54 
55 /* initial allocation, just enough for one lwp */
56 #pragma align 64(init_hash_table)
57 thr_hash_table_t init_hash_table[1] = {
58 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
59 };
60 
61 extern const Lc_interface rtld_funcs[];
62 
63 /*
64  * The weak version is known to libc_db and mdb.
65  */
66 #pragma weak _uberdata = __uberdata
67 uberdata_t __uberdata = {
68 	{ DEFAULTMUTEX, DEFAULTCV },	/* link_lock */
69 	{ DEFAULTMUTEX, DEFAULTCV },	/* fork_lock */
70 	{ DEFAULTMUTEX, DEFAULTCV },	/* tdb_hash_lock */
71 	{ 0, },			/* tdb_hash_lock_stats */
72 	{ { 0 }, },		/* siguaction[NSIG] */
73 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
74 	{ DEFAULTMUTEX, NULL, 0 },
75 	{ DEFAULTMUTEX, NULL, 0 },
76 	{ DEFAULTMUTEX, NULL, 0 },
77 	{ DEFAULTMUTEX, NULL, 0 },
78 	{ DEFAULTMUTEX, NULL, 0 },
79 	{ DEFAULTMUTEX, NULL, 0 },
80 	{ DEFAULTMUTEX, NULL, 0 },
81 	{ DEFAULTMUTEX, NULL, 0 },
82 	{ DEFAULTMUTEX, NULL, 0 }},
83 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
84 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
85 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
86 	0,			/* primary_map */
87 	0,			/* bucket_init */
88 	0,			/* pad[0] */
89 	0,			/* pad[1] */
90 	{ 0 },			/* uberflags */
91 	NULL,			/* queue_head */
92 	init_hash_table,	/* thr_hash_table */
93 	1,			/* hash_size: size of the hash table */
94 	0,			/* hash_mask: hash_size - 1 */
95 	NULL,			/* ulwp_one */
96 	NULL,			/* all_lwps */
97 	NULL,			/* all_zombies */
98 	0,			/* nthreads */
99 	0,			/* nzombies */
100 	0,			/* ndaemons */
101 	0,			/* pid */
102 	sigacthandler,		/* sigacthandler */
103 	NULL,			/* lwp_stacks */
104 	NULL,			/* lwp_laststack */
105 	0,			/* nfreestack */
106 	10,			/* thread_stack_cache */
107 	NULL,			/* ulwp_freelist */
108 	NULL,			/* ulwp_lastfree */
109 	NULL,			/* ulwp_replace_free */
110 	NULL,			/* ulwp_replace_last */
111 	NULL,			/* atforklist */
112 	NULL,			/* __tdb_bootstrap */
113 	{			/* tdb */
114 		NULL,		/* tdb_sync_addr_hash */
115 		0,		/* tdb_register_count */
116 		0,		/* tdb_hash_alloc_failed */
117 		NULL,		/* tdb_sync_addr_free */
118 		NULL,		/* tdb_sync_addr_last */
119 		0,		/* tdb_sync_alloc */
120 		{ 0, 0 },	/* tdb_ev_global_mask */
121 		tdb_events,	/* tdb_events array */
122 	},
123 };
124 
125 /*
126  * The weak version is known to libc_db and mdb.
127  */
128 #pragma weak _tdb_bootstrap = __tdb_bootstrap
129 uberdata_t **__tdb_bootstrap = NULL;
130 
131 int	thread_queue_fifo = 4;
132 int	thread_queue_dump = 0;
133 int	thread_cond_wait_defer = 0;
134 int	thread_error_detection = 0;
135 int	thread_async_safe = 0;
136 int	thread_stack_cache = 10;
137 
138 int	thread_door_noreserve = 0;
139 
140 static	ulwp_t	*ulwp_alloc(void);
141 static	void	ulwp_free(ulwp_t *);
142 
143 /*
144  * Insert the lwp into the hash table.
145  */
146 void
147 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
148 {
149 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
150 	udp->thr_hash_table[ix].hash_bucket = ulwp;
151 	ulwp->ul_ix = ix;
152 }
153 
154 void
155 hash_in(ulwp_t *ulwp, uberdata_t *udp)
156 {
157 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
158 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
159 
160 	lmutex_lock(mp);
161 	hash_in_unlocked(ulwp, ix, udp);
162 	lmutex_unlock(mp);
163 }
164 
165 /*
166  * Delete the lwp from the hash table.
167  */
168 void
169 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
170 {
171 	ulwp_t **ulwpp;
172 
173 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
174 	    ulwp != *ulwpp;
175 	    ulwpp = &(*ulwpp)->ul_hash)
176 		;
177 	*ulwpp = ulwp->ul_hash;
178 	ulwp->ul_hash = NULL;
179 	ulwp->ul_ix = -1;
180 }
181 
182 void
183 hash_out(ulwp_t *ulwp, uberdata_t *udp)
184 {
185 	int ix;
186 
187 	if ((ix = ulwp->ul_ix) >= 0) {
188 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
189 
190 		lmutex_lock(mp);
191 		hash_out_unlocked(ulwp, ix, udp);
192 		lmutex_unlock(mp);
193 	}
194 }
195 
196 static void
197 ulwp_clean(ulwp_t *ulwp)
198 {
199 	ulwp->ul_self = NULL;
200 	ulwp->ul_rval = NULL;
201 	ulwp->ul_lwpid = 0;
202 	ulwp->ul_pri = 0;
203 	ulwp->ul_mappedpri = 0;
204 	ulwp->ul_policy = 0;
205 	ulwp->ul_pri_mapped = 0;
206 	ulwp->ul_mutator = 0;
207 	ulwp->ul_pleasestop = 0;
208 	ulwp->ul_stop = 0;
209 	ulwp->ul_dead = 0;
210 	ulwp->ul_unwind = 0;
211 	ulwp->ul_detached = 0;
212 	ulwp->ul_stopping = 0;
213 	ulwp->ul_sp = 0;
214 	ulwp->ul_critical = 0;
215 	ulwp->ul_cancelable = 0;
216 	ulwp->ul_preempt = 0;
217 	ulwp->ul_sigsuspend = 0;
218 	ulwp->ul_cancel_pending = 0;
219 	ulwp->ul_cancel_disabled = 0;
220 	ulwp->ul_cancel_async = 0;
221 	ulwp->ul_save_async = 0;
222 	ulwp->ul_cursig = 0;
223 	ulwp->ul_created = 0;
224 	ulwp->ul_replace = 0;
225 	ulwp->ul_schedctl_called = NULL;
226 	ulwp->ul_errno = 0;
227 	ulwp->ul_errnop = NULL;
228 	ulwp->ul_clnup_hdr = NULL;
229 	ulwp->ul_schedctl = NULL;
230 	ulwp->ul_bindflags = 0;
231 	(void) _private_memset(&ulwp->ul_td_evbuf, 0,
232 		sizeof (ulwp->ul_td_evbuf));
233 	ulwp->ul_td_events_enable = 0;
234 	ulwp->ul_qtype = 0;
235 	ulwp->ul_usropts = 0;
236 	ulwp->ul_startpc = NULL;
237 	ulwp->ul_startarg = NULL;
238 	ulwp->ul_wchan = NULL;
239 	ulwp->ul_link = NULL;
240 	ulwp->ul_sleepq = NULL;
241 	ulwp->ul_mxchain = NULL;
242 	ulwp->ul_epri = 0;
243 	ulwp->ul_emappedpri = 0;
244 	/* PROBE_SUPPORT begin */
245 	ulwp->ul_tpdp = NULL;
246 	/* PROBE_SUPPORT end */
247 	ulwp->ul_siglink = NULL;
248 	(void) _private_memset(ulwp->ul_ftsd, 0,
249 		sizeof (void *) * TSD_NFAST);
250 	ulwp->ul_stsd = NULL;
251 	(void) _private_memset(&ulwp->ul_spinlock, 0,
252 		sizeof (ulwp->ul_spinlock));
253 	ulwp->ul_spin_lock_spin = 0;
254 	ulwp->ul_spin_lock_spin2 = 0;
255 	ulwp->ul_spin_lock_sleep = 0;
256 	ulwp->ul_spin_lock_wakeup = 0;
257 	ulwp->ul_ex_unwind = NULL;
258 }
259 
260 static int stackprot;
261 
262 /*
263  * Answer the question, "Is the lwp in question really dead?"
264  * We must inquire of the operating system to be really sure
265  * because the lwp may have called lwp_exit() but it has not
266  * yet completed the exit.
267  */
268 static int
269 dead_and_buried(ulwp_t *ulwp)
270 {
271 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
272 		return (1);
273 	if (ulwp->ul_dead && ulwp->ul_detached &&
274 	    __lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
275 		ulwp->ul_lwpid = (lwpid_t)(-1);
276 		return (1);
277 	}
278 	return (0);
279 }
280 
281 /*
282  * Attempt to keep the stack cache within the specified cache limit.
283  */
284 static void
285 trim_stack_cache(int cache_limit)
286 {
287 	ulwp_t *self = curthread;
288 	uberdata_t *udp = self->ul_uberdata;
289 	ulwp_t *prev = NULL;
290 	ulwp_t **ulwpp = &udp->lwp_stacks;
291 	ulwp_t *ulwp;
292 
293 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
294 
295 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
296 		if (dead_and_buried(ulwp)) {
297 			*ulwpp = ulwp->ul_next;
298 			if (ulwp == udp->lwp_laststack)
299 				udp->lwp_laststack = prev;
300 			hash_out(ulwp, udp);
301 			udp->nfreestack--;
302 			(void) _private_munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
303 			/*
304 			 * Now put the free ulwp on the ulwp freelist.
305 			 */
306 			ulwp->ul_mapsiz = 0;
307 			ulwp->ul_next = NULL;
308 			if (udp->ulwp_freelist == NULL)
309 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
310 			else {
311 				udp->ulwp_lastfree->ul_next = ulwp;
312 				udp->ulwp_lastfree = ulwp;
313 			}
314 		} else {
315 			prev = ulwp;
316 			ulwpp = &ulwp->ul_next;
317 		}
318 	}
319 }
320 
321 /*
322  * Find an unused stack of the requested size
323  * or create a new stack of the requested size.
324  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
325  * thr_exit() stores 1 in the ul_dead member.
326  * thr_join() stores -1 in the ul_lwpid member.
327  */
328 ulwp_t *
329 find_stack(size_t stksize, size_t guardsize)
330 {
331 	uberdata_t *udp = curthread->ul_uberdata;
332 	size_t mapsize;
333 	ulwp_t *prev;
334 	ulwp_t *ulwp;
335 	ulwp_t **ulwpp;
336 	void *stk;
337 
338 	/*
339 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
340 	 * unless overridden by the system's configuration.
341 	 */
342 	if (stackprot == 0) {	/* do this once */
343 		long lprot = _sysconf(_SC_STACK_PROT);
344 		if (lprot <= 0)
345 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
346 		stackprot = (int)lprot;
347 	}
348 	if (_lpagesize == 0)
349 		_lpagesize = _sysconf(_SC_PAGESIZE);
350 	/*
351 	 * One megabyte stacks by default, but subtract off
352 	 * two pages for the system-created red zones.
353 	 * Round up a non-zero stack size to a pagesize multiple.
354 	 */
355 	if (stksize == 0)
356 		stksize = DEFAULTSTACK - 2 * _lpagesize;
357 	else
358 		stksize = ((stksize + _lpagesize - 1) & -_lpagesize);
359 
360 	/*
361 	 * Round up the mapping size to a multiple of pagesize.
362 	 * Note: mmap() provides at least one page of red zone
363 	 * so we deduct that from the value of guardsize.
364 	 */
365 	if (guardsize != 0)
366 		guardsize = ((guardsize + _lpagesize - 1) & -_lpagesize) -
367 			_lpagesize;
368 	mapsize = stksize + guardsize;
369 
370 	lmutex_lock(&udp->link_lock);
371 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
372 	    (ulwp = *ulwpp) != NULL;
373 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
374 		if (ulwp->ul_mapsiz == mapsize &&
375 		    ulwp->ul_guardsize == guardsize &&
376 		    dead_and_buried(ulwp)) {
377 			/*
378 			 * The previous lwp is gone; reuse the stack.
379 			 * Remove the ulwp from the stack list.
380 			 */
381 			*ulwpp = ulwp->ul_next;
382 			ulwp->ul_next = NULL;
383 			if (ulwp == udp->lwp_laststack)
384 				udp->lwp_laststack = prev;
385 			hash_out(ulwp, udp);
386 			udp->nfreestack--;
387 			lmutex_unlock(&udp->link_lock);
388 			ulwp_clean(ulwp);
389 			return (ulwp);
390 		}
391 	}
392 
393 	/*
394 	 * None of the cached stacks matched our mapping size.
395 	 * Reduce the stack cache to get rid of possibly
396 	 * very old stacks that will never be reused.
397 	 */
398 	if (udp->nfreestack > udp->thread_stack_cache)
399 		trim_stack_cache(udp->thread_stack_cache);
400 	else if (udp->nfreestack > 0)
401 		trim_stack_cache(udp->nfreestack - 1);
402 	lmutex_unlock(&udp->link_lock);
403 
404 	/*
405 	 * Create a new stack.
406 	 */
407 	if ((stk = _private_mmap(NULL, mapsize, stackprot,
408 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
409 		/*
410 		 * We have allocated our stack.  Now allocate the ulwp.
411 		 */
412 		ulwp = ulwp_alloc();
413 		if (ulwp == NULL)
414 			(void) _private_munmap(stk, mapsize);
415 		else {
416 			ulwp->ul_stk = stk;
417 			ulwp->ul_mapsiz = mapsize;
418 			ulwp->ul_guardsize = guardsize;
419 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
420 			ulwp->ul_stksiz = stksize;
421 			ulwp->ul_ix = -1;
422 			if (guardsize)	/* protect the extra red zone */
423 				(void) _private_mprotect(stk,
424 					guardsize, PROT_NONE);
425 		}
426 	}
427 	return (ulwp);
428 }
429 
430 /*
431  * Get a ulwp_t structure from the free list or allocate a new one.
432  * Such ulwp_t's do not have a stack allocated by the library.
433  */
434 static ulwp_t *
435 ulwp_alloc(void)
436 {
437 	ulwp_t *self = curthread;
438 	uberdata_t *udp = self->ul_uberdata;
439 	size_t tls_size;
440 	ulwp_t *prev;
441 	ulwp_t *ulwp;
442 	ulwp_t **ulwpp;
443 	caddr_t data;
444 
445 	lmutex_lock(&udp->link_lock);
446 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
447 	    (ulwp = *ulwpp) != NULL;
448 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
449 		if (dead_and_buried(ulwp)) {
450 			*ulwpp = ulwp->ul_next;
451 			ulwp->ul_next = NULL;
452 			if (ulwp == udp->ulwp_lastfree)
453 				udp->ulwp_lastfree = prev;
454 			hash_out(ulwp, udp);
455 			lmutex_unlock(&udp->link_lock);
456 			ulwp_clean(ulwp);
457 			return (ulwp);
458 		}
459 	}
460 	lmutex_unlock(&udp->link_lock);
461 
462 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
463 	data = lmalloc(sizeof (*ulwp) + tls_size);
464 	if (data != NULL) {
465 		/* LINTED pointer cast may result in improper alignment */
466 		ulwp = (ulwp_t *)(data + tls_size);
467 	}
468 	return (ulwp);
469 }
470 
471 /*
472  * Free a ulwp structure.
473  * If there is an associated stack, put it on the stack list and
474  * munmap() previously freed stacks up to the residual cache limit.
475  * Else put it on the ulwp free list and never call lfree() on it.
476  */
477 static void
478 ulwp_free(ulwp_t *ulwp)
479 {
480 	uberdata_t *udp = curthread->ul_uberdata;
481 
482 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
483 	ulwp->ul_next = NULL;
484 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
485 		/*EMPTY*/;
486 	else if (ulwp->ul_mapsiz != 0) {
487 		if (udp->lwp_stacks == NULL)
488 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
489 		else {
490 			udp->lwp_laststack->ul_next = ulwp;
491 			udp->lwp_laststack = ulwp;
492 		}
493 		if (++udp->nfreestack > udp->thread_stack_cache)
494 			trim_stack_cache(udp->thread_stack_cache);
495 	} else {
496 		if (udp->ulwp_freelist == NULL)
497 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
498 		else {
499 			udp->ulwp_lastfree->ul_next = ulwp;
500 			udp->ulwp_lastfree = ulwp;
501 		}
502 	}
503 }
504 
505 /*
506  * Find a named lwp and return a pointer to its hash list location.
507  * On success, returns with the hash lock held.
508  */
509 ulwp_t **
510 find_lwpp(thread_t tid)
511 {
512 	uberdata_t *udp = curthread->ul_uberdata;
513 	int ix = TIDHASH(tid, udp);
514 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
515 	ulwp_t *ulwp;
516 	ulwp_t **ulwpp;
517 
518 	if (tid == 0)
519 		return (NULL);
520 
521 	lmutex_lock(mp);
522 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
523 	    (ulwp = *ulwpp) != NULL;
524 	    ulwpp = &ulwp->ul_hash) {
525 		if (ulwp->ul_lwpid == tid)
526 			return (ulwpp);
527 	}
528 	lmutex_unlock(mp);
529 	return (NULL);
530 }
531 
532 /*
533  * Wake up all lwps waiting on this lwp for some reason.
534  */
535 void
536 ulwp_broadcast(ulwp_t *ulwp)
537 {
538 	ulwp_t *self = curthread;
539 	uberdata_t *udp = self->ul_uberdata;
540 
541 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
542 	(void) cond_broadcast_internal(ulwp_condvar(ulwp, udp));
543 }
544 
545 /*
546  * Find a named lwp and return a pointer to it.
547  * Returns with the hash lock held.
548  */
549 ulwp_t *
550 find_lwp(thread_t tid)
551 {
552 	ulwp_t *self = curthread;
553 	uberdata_t *udp = self->ul_uberdata;
554 	ulwp_t *ulwp = NULL;
555 	ulwp_t **ulwpp;
556 
557 	if (self->ul_lwpid == tid) {
558 		ulwp = self;
559 		ulwp_lock(ulwp, udp);
560 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
561 		ulwp = *ulwpp;
562 	}
563 
564 	if (ulwp && ulwp->ul_dead) {
565 		ulwp_unlock(ulwp, udp);
566 		ulwp = NULL;
567 	}
568 
569 	return (ulwp);
570 }
571 
572 int
573 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
574 	long flags, thread_t *new_thread, pri_t priority, int policy,
575 	size_t guardsize)
576 {
577 	ulwp_t *self = curthread;
578 	uberdata_t *udp = self->ul_uberdata;
579 	ucontext_t uc;
580 	uint_t lwp_flags;
581 	thread_t tid;
582 	int error = 0;
583 	ulwp_t *ulwp;
584 
585 	/*
586 	 * Enforce the restriction of not creating any threads
587 	 * until the primary link map has been initialized.
588 	 * Also, disallow thread creation to a child of vfork().
589 	 */
590 	if (!self->ul_primarymap || self->ul_vfork)
591 		return (ENOTSUP);
592 
593 	if (udp->hash_size == 1)
594 		finish_init();
595 
596 	if (((stk || stksize) && stksize < MINSTACK) ||
597 	    priority < THREAD_MIN_PRIORITY || priority > THREAD_MAX_PRIORITY)
598 		return (EINVAL);
599 
600 	if (stk == NULL) {
601 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
602 			return (ENOMEM);
603 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
604 	} else {
605 		/* initialize the private stack */
606 		if ((ulwp = ulwp_alloc()) == NULL)
607 			return (ENOMEM);
608 		ulwp->ul_stk = stk;
609 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
610 		ulwp->ul_stksiz = stksize;
611 		ulwp->ul_ix = -1;
612 	}
613 	ulwp->ul_errnop = &ulwp->ul_errno;
614 
615 	lwp_flags = LWP_SUSPENDED;
616 	if (flags & (THR_DETACHED|THR_DAEMON)) {
617 		flags |= THR_DETACHED;
618 		lwp_flags |= LWP_DETACHED;
619 	}
620 	if (flags & THR_DAEMON)
621 		lwp_flags |= LWP_DAEMON;
622 
623 	/* creating a thread: enforce mt-correctness in _mutex_lock() */
624 	self->ul_async_safe = 1;
625 
626 	/* per-thread copies of global variables, for speed */
627 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
628 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
629 	ulwp->ul_error_detection = self->ul_error_detection;
630 	ulwp->ul_async_safe = self->ul_async_safe;
631 	ulwp->ul_max_spinners = self->ul_max_spinners;
632 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
633 	ulwp->ul_release_spin = self->ul_release_spin;
634 	ulwp->ul_queue_spin = self->ul_queue_spin;
635 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
636 
637 	ulwp->ul_primarymap = self->ul_primarymap;
638 	ulwp->ul_self = ulwp;
639 	ulwp->ul_uberdata = udp;
640 
641 	/* debugger support */
642 	ulwp->ul_usropts = flags;
643 
644 #ifdef __sparc
645 	/*
646 	 * We cache several instructions in the thread structure for use
647 	 * by the fasttrap DTrace provider. When changing this, read the
648 	 * comment in fasttrap.h for the all the other places that must
649 	 * be changed.
650 	 */
651 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
652 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
653 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
654 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
655 #endif
656 
657 	ulwp->ul_startpc = func;
658 	ulwp->ul_startarg = arg;
659 	_fpinherit(ulwp);
660 	/*
661 	 * Defer signals on the new thread until its TLS constructors
662 	 * have been called.  _thr_setup() will call sigon() after
663 	 * it has called tls_setup().
664 	 */
665 	ulwp->ul_sigdefer = 1;
666 
667 	if (setup_context(&uc, _thr_setup, ulwp,
668 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize) != 0)
669 		error = EAGAIN;
670 
671 	/*
672 	 * Call enter_critical() to avoid being suspended until we
673 	 * have linked the new thread into the proper lists.
674 	 * This is necessary because forkall() and fork1() must
675 	 * suspend all threads and they must see a complete list.
676 	 */
677 	enter_critical(self);
678 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
679 	if (error != 0 ||
680 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
681 		exit_critical(self);
682 		ulwp->ul_lwpid = (lwpid_t)(-1);
683 		ulwp->ul_dead = 1;
684 		ulwp->ul_detached = 1;
685 		lmutex_lock(&udp->link_lock);
686 		ulwp_free(ulwp);
687 		lmutex_unlock(&udp->link_lock);
688 		return (error);
689 	}
690 	self->ul_nocancel = 0;	/* cancellation is now possible */
691 	ulwp->ul_nocancel = 0;
692 	udp->uberflags.uf_mt = 1;
693 	if (new_thread)
694 		*new_thread = tid;
695 	if (flags & THR_DETACHED)
696 		ulwp->ul_detached = 1;
697 	ulwp->ul_lwpid = tid;
698 	ulwp->ul_stop = TSTP_REGULAR;
699 	ulwp->ul_created = 1;
700 	ulwp->ul_policy = policy;
701 	ulwp->ul_pri = priority;
702 
703 	lmutex_lock(&udp->link_lock);
704 	ulwp->ul_forw = udp->all_lwps;
705 	ulwp->ul_back = udp->all_lwps->ul_back;
706 	ulwp->ul_back->ul_forw = ulwp;
707 	ulwp->ul_forw->ul_back = ulwp;
708 	hash_in(ulwp, udp);
709 	udp->nthreads++;
710 	if (flags & THR_DAEMON)
711 		udp->ndaemons++;
712 	if (flags & THR_NEW_LWP)
713 		thr_concurrency++;
714 	__threaded = 1;		/* inform stdio */
715 	lmutex_unlock(&udp->link_lock);
716 
717 	if (__td_event_report(self, TD_CREATE, udp)) {
718 		self->ul_td_evbuf.eventnum = TD_CREATE;
719 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
720 		tdb_event(TD_CREATE, udp);
721 	}
722 	if (!(flags & THR_SUSPENDED)) {
723 		ulwp->ul_created = 0;
724 		(void) _thrp_continue(tid, TSTP_REGULAR);
725 	}
726 
727 	exit_critical(self);
728 	return (0);
729 }
730 
731 #pragma weak thr_create = _thr_create
732 int
733 _thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
734 	long flags, thread_t *new_thread)
735 {
736 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread,
737 		curthread->ul_pri, curthread->ul_policy, 0));
738 }
739 
740 /*
741  * A special cancellation cleanup hook for DCE.
742  * cleanuphndlr, when it is not NULL, will contain a callback
743  * function to be called before a thread is terminated in
744  * _thr_exit() as a result of being cancelled.
745  */
746 static void (*cleanuphndlr)(void) = NULL;
747 
748 /*
749  * _pthread_setcleanupinit: sets the cleanup hook.
750  */
751 int
752 _pthread_setcleanupinit(void (*func)(void))
753 {
754 	cleanuphndlr = func;
755 	return (0);
756 }
757 
758 void
759 _thrp_exit()
760 {
761 	ulwp_t *self = curthread;
762 	uberdata_t *udp = self->ul_uberdata;
763 	ulwp_t *replace = NULL;
764 
765 	if (__td_event_report(self, TD_DEATH, udp)) {
766 		self->ul_td_evbuf.eventnum = TD_DEATH;
767 		tdb_event(TD_DEATH, udp);
768 	}
769 
770 	ASSERT(self->ul_sigdefer != 0);
771 
772 	lmutex_lock(&udp->link_lock);
773 	udp->nthreads--;
774 	if (self->ul_usropts & THR_NEW_LWP)
775 		thr_concurrency--;
776 	if (self->ul_usropts & THR_DAEMON)
777 		udp->ndaemons--;
778 	else if (udp->nthreads == udp->ndaemons) {
779 		/*
780 		 * We are the last non-daemon thread exiting.
781 		 * Exit the process.  We retain our TSD and TLS so
782 		 * that atexit() application functions can use them.
783 		 */
784 		lmutex_unlock(&udp->link_lock);
785 		exit(0);
786 		thr_panic("_thrp_exit(): exit(0) returned");
787 	}
788 	lmutex_unlock(&udp->link_lock);
789 
790 	tsd_exit();	/* deallocate thread-specific data */
791 	tls_exit();	/* deallocate thread-local storage */
792 
793 	/* block all signals to finish exiting */
794 	block_all_signals(self);
795 	/* also prevent ourself from being suspended */
796 	enter_critical(self);
797 	rwl_free(self);
798 	lmutex_lock(&udp->link_lock);
799 	ulwp_free(self);
800 	(void) ulwp_lock(self, udp);
801 
802 	if (self->ul_mapsiz && !self->ul_detached) {
803 		/*
804 		 * We want to free the stack for reuse but must keep
805 		 * the ulwp_t struct for the benefit of thr_join().
806 		 * For this purpose we allocate a replacement ulwp_t.
807 		 */
808 		if ((replace = udp->ulwp_replace_free) == NULL)
809 			replace = lmalloc(REPLACEMENT_SIZE);
810 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
811 			udp->ulwp_replace_last = NULL;
812 	}
813 
814 	if (udp->all_lwps == self)
815 		udp->all_lwps = self->ul_forw;
816 	if (udp->all_lwps == self)
817 		udp->all_lwps = NULL;
818 	else {
819 		self->ul_forw->ul_back = self->ul_back;
820 		self->ul_back->ul_forw = self->ul_forw;
821 	}
822 	self->ul_forw = self->ul_back = NULL;
823 	/* collect queue lock statistics before marking ourself dead */
824 	record_spin_locks(self);
825 	self->ul_dead = 1;
826 	self->ul_pleasestop = 0;
827 	if (replace != NULL) {
828 		int ix = self->ul_ix;		/* the hash index */
829 		(void) _private_memcpy(replace, self, REPLACEMENT_SIZE);
830 		replace->ul_self = replace;
831 		replace->ul_gs = 0;		/* clone does not carry %gs */
832 		replace->ul_next = NULL;	/* clone not on stack list */
833 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
834 		replace->ul_replace = 1;	/* requires clone to be freed */
835 		hash_out_unlocked(self, ix, udp);
836 		hash_in_unlocked(replace, ix, udp);
837 		ASSERT(!(self->ul_detached));
838 		self->ul_detached = 1;		/* this frees the stack */
839 		self->ul_schedctl = NULL;
840 		self->ul_schedctl_called = &udp->uberflags;
841 		set_curthread(self = replace);
842 		/*
843 		 * Having just changed the address of curthread, we
844 		 * must reset the ownership of the locks we hold so
845 		 * that assertions will not fire when we release them.
846 		 */
847 		udp->link_lock.mutex_owner = (uintptr_t)self;
848 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
849 		/*
850 		 * NOTE:
851 		 * On i386, %gs still references the original, not the
852 		 * replacement, ulwp structure.  Fetching the replacement
853 		 * curthread pointer via %gs:0 works correctly since the
854 		 * original ulwp structure will not be reallocated until
855 		 * this lwp has completed its lwp_exit() system call (see
856 		 * dead_and_buried()), but from here on out, we must make
857 		 * no references to %gs:<offset> other than %gs:0.
858 		 */
859 	}
860 	/*
861 	 * Put non-detached terminated threads in the all_zombies list.
862 	 */
863 	if (!self->ul_detached) {
864 		udp->nzombies++;
865 		if (udp->all_zombies == NULL) {
866 			ASSERT(udp->nzombies == 1);
867 			udp->all_zombies = self->ul_forw = self->ul_back = self;
868 		} else {
869 			self->ul_forw = udp->all_zombies;
870 			self->ul_back = udp->all_zombies->ul_back;
871 			self->ul_back->ul_forw = self;
872 			self->ul_forw->ul_back = self;
873 		}
874 	}
875 	/*
876 	 * Notify everyone waiting for this thread.
877 	 */
878 	ulwp_broadcast(self);
879 	(void) ulwp_unlock(self, udp);
880 	/*
881 	 * Prevent any more references to the schedctl data.
882 	 * We are exiting and continue_fork() may not find us.
883 	 * Do this just before dropping link_lock, since fork
884 	 * serializes on link_lock.
885 	 */
886 	self->ul_schedctl = NULL;
887 	self->ul_schedctl_called = &udp->uberflags;
888 	lmutex_unlock(&udp->link_lock);
889 
890 	ASSERT(self->ul_critical == 1);
891 	ASSERT(self->ul_preempt == 0);
892 	_lwp_terminate();	/* never returns */
893 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
894 }
895 
896 void
897 collect_queue_statistics()
898 {
899 	uberdata_t *udp = curthread->ul_uberdata;
900 	ulwp_t *ulwp;
901 
902 	if (thread_queue_dump) {
903 		lmutex_lock(&udp->link_lock);
904 		if ((ulwp = udp->all_lwps) != NULL) {
905 			do {
906 				record_spin_locks(ulwp);
907 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
908 		}
909 		lmutex_unlock(&udp->link_lock);
910 	}
911 }
912 
913 void
914 _thr_exit_common(void *status, int unwind)
915 {
916 	ulwp_t *self = curthread;
917 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
918 
919 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
920 
921 	/*
922 	 * Disable cancellation and call the special DCE cancellation
923 	 * cleanup hook if it is enabled.  Do nothing else before calling
924 	 * the DCE cancellation cleanup hook; it may call longjmp() and
925 	 * never return here.
926 	 */
927 	self->ul_cancel_disabled = 1;
928 	self->ul_cancel_async = 0;
929 	self->ul_save_async = 0;
930 	self->ul_cancelable = 0;
931 	self->ul_cancel_pending = 0;
932 	if (cancelled && cleanuphndlr != NULL)
933 		(*cleanuphndlr)();
934 
935 	/*
936 	 * Block application signals while we are exiting.
937 	 * We call out to C++, TSD, and TLS destructors while exiting
938 	 * and these are application-defined, so we cannot be assured
939 	 * that they won't reset the signal mask.  We use sigoff() to
940 	 * defer any signals that may be received as a result of this
941 	 * bad behavior.  Such signals will be lost to the process
942 	 * when the thread finishes exiting.
943 	 */
944 	(void) _thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
945 	sigoff(self);
946 
947 	self->ul_rval = status;
948 
949 	/*
950 	 * If thr_exit is being called from the places where
951 	 * C++ destructors are to be called such as cancellation
952 	 * points, then set this flag. It is checked in _t_cancel()
953 	 * to decide whether _ex_unwind() is to be called or not.
954 	 */
955 	if (unwind)
956 		self->ul_unwind = 1;
957 
958 	/*
959 	 * _thrp_unwind() will eventually call _thrp_exit().
960 	 * It never returns.
961 	 */
962 	_thrp_unwind(NULL);
963 	thr_panic("_thr_exit_common(): _thrp_unwind() returned");
964 }
965 
966 /*
967  * Called when a thread returns from its start function.
968  * We are at the top of the stack; no unwinding is necessary.
969  */
970 void
971 _thr_terminate(void *status)
972 {
973 	_thr_exit_common(status, 0);
974 }
975 
976 #pragma weak thr_exit = _thr_exit
977 #pragma weak pthread_exit = _thr_exit
978 #pragma weak _pthread_exit = _thr_exit
979 void
980 _thr_exit(void *status)
981 {
982 	_thr_exit_common(status, 1);
983 }
984 
985 int
986 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
987 {
988 	uberdata_t *udp = curthread->ul_uberdata;
989 	mutex_t *mp;
990 	void *rval;
991 	thread_t found;
992 	ulwp_t *ulwp;
993 	ulwp_t **ulwpp;
994 	int replace;
995 	int error;
996 
997 	if (do_cancel)
998 		error = lwp_wait(tid, &found);
999 	else {
1000 		while ((error = __lwp_wait(tid, &found)) == EINTR)
1001 			;
1002 	}
1003 	if (error)
1004 		return (error);
1005 
1006 	/*
1007 	 * We must hold link_lock to avoid a race condition with find_stack().
1008 	 */
1009 	lmutex_lock(&udp->link_lock);
1010 	if ((ulwpp = find_lwpp(found)) == NULL) {
1011 		/*
1012 		 * lwp_wait() found an lwp that the library doesn't know
1013 		 * about.  It must have been created with _lwp_create().
1014 		 * Just return its lwpid; we can't know its status.
1015 		 */
1016 		lmutex_unlock(&udp->link_lock);
1017 		rval = NULL;
1018 	} else {
1019 		/*
1020 		 * Remove ulwp from the hash table.
1021 		 */
1022 		ulwp = *ulwpp;
1023 		*ulwpp = ulwp->ul_hash;
1024 		ulwp->ul_hash = NULL;
1025 		/*
1026 		 * Remove ulwp from all_zombies list.
1027 		 */
1028 		ASSERT(udp->nzombies >= 1);
1029 		if (udp->all_zombies == ulwp)
1030 			udp->all_zombies = ulwp->ul_forw;
1031 		if (udp->all_zombies == ulwp)
1032 			udp->all_zombies = NULL;
1033 		else {
1034 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1035 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1036 		}
1037 		ulwp->ul_forw = ulwp->ul_back = NULL;
1038 		udp->nzombies--;
1039 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1040 			!(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1041 		/*
1042 		 * We can't call ulwp_unlock(ulwp) after we set
1043 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1044 		 * ulwp's hash table mutex now in order to unlock it below.
1045 		 */
1046 		mp = ulwp_mutex(ulwp, udp);
1047 		ulwp->ul_lwpid = (lwpid_t)(-1);
1048 		ulwp->ul_ix = -1;
1049 		rval = ulwp->ul_rval;
1050 		replace = ulwp->ul_replace;
1051 		lmutex_unlock(mp);
1052 		if (replace) {
1053 			ulwp->ul_next = NULL;
1054 			if (udp->ulwp_replace_free == NULL)
1055 				udp->ulwp_replace_free =
1056 					udp->ulwp_replace_last = ulwp;
1057 			else {
1058 				udp->ulwp_replace_last->ul_next = ulwp;
1059 				udp->ulwp_replace_last = ulwp;
1060 			}
1061 		}
1062 		lmutex_unlock(&udp->link_lock);
1063 	}
1064 
1065 	if (departed != NULL)
1066 		*departed = found;
1067 	if (status != NULL)
1068 		*status = rval;
1069 	return (0);
1070 }
1071 
1072 #pragma weak thr_join = _thr_join
1073 int
1074 _thr_join(thread_t tid, thread_t *departed, void **status)
1075 {
1076 	int error = _thrp_join(tid, departed, status, 1);
1077 	return ((error == EINVAL)? ESRCH : error);
1078 }
1079 
1080 /*
1081  * pthread_join() differs from Solaris thr_join():
1082  * It does not return the departed thread's id
1083  * and hence does not have a "departed" argument.
1084  * It returns EINVAL if tid refers to a detached thread.
1085  */
1086 #pragma weak pthread_join = _pthread_join
1087 int
1088 _pthread_join(pthread_t tid, void **status)
1089 {
1090 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1091 }
1092 
1093 #pragma weak pthread_detach = _thr_detach
1094 #pragma weak _pthread_detach = _thr_detach
1095 int
1096 _thr_detach(thread_t tid)
1097 {
1098 	uberdata_t *udp = curthread->ul_uberdata;
1099 	ulwp_t *ulwp;
1100 	ulwp_t **ulwpp;
1101 	int error = 0;
1102 
1103 	if ((ulwpp = find_lwpp(tid)) == NULL)
1104 		return (ESRCH);
1105 	ulwp = *ulwpp;
1106 
1107 	if (ulwp->ul_dead) {
1108 		ulwp_unlock(ulwp, udp);
1109 		error = _thrp_join(tid, NULL, NULL, 0);
1110 	} else {
1111 		error = __lwp_detach(tid);
1112 		ulwp->ul_detached = 1;
1113 		ulwp->ul_usropts |= THR_DETACHED;
1114 		ulwp_unlock(ulwp, udp);
1115 	}
1116 	return (error);
1117 }
1118 
1119 /*
1120  * Static local string compare function to avoid calling strncmp()
1121  * (and hence the dynamic linker) during library initialization.
1122  */
1123 static int
1124 sncmp(const char *s1, const char *s2, size_t n)
1125 {
1126 	n++;
1127 	while (--n != 0 && *s1 == *s2++)
1128 		if (*s1++ == '\0')
1129 			return (0);
1130 	return (n == 0 ? 0 : *(uchar_t *)s1 - *(uchar_t *)--s2);
1131 }
1132 
1133 static const char *
1134 ematch(const char *ev, const char *match)
1135 {
1136 	int c;
1137 
1138 	while ((c = *match++) != '\0') {
1139 		if (*ev++ != c)
1140 			return (NULL);
1141 	}
1142 	if (*ev++ != '=')
1143 		return (NULL);
1144 	return (ev);
1145 }
1146 
1147 static int
1148 envvar(const char *ev, const char *match, int limit)
1149 {
1150 	int val = -1;
1151 	const char *ename;
1152 
1153 	if ((ename = ematch(ev, match)) != NULL) {
1154 		int c;
1155 		for (val = 0; (c = *ename) != '\0'; ename++) {
1156 			if (!isdigit(c)) {
1157 				val = -1;
1158 				break;
1159 			}
1160 			val = val * 10 + (c - '0');
1161 			if (val > limit) {
1162 				val = limit;
1163 				break;
1164 			}
1165 		}
1166 	}
1167 	return (val);
1168 }
1169 
1170 static void
1171 etest(const char *ev)
1172 {
1173 	int value;
1174 
1175 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1176 		thread_queue_spin = value;
1177 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0) {
1178 		thread_adaptive_spin = value;
1179 		thread_release_spin = (value + 1) / 2;
1180 	}
1181 	if ((value = envvar(ev, "RELEASE_SPIN", 1000000)) >= 0)
1182 		thread_release_spin = value;
1183 	if ((value = envvar(ev, "MAX_SPINNERS", 100)) >= 0)
1184 		thread_max_spinners = value;
1185 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1186 		thread_queue_fifo = value;
1187 #if defined(THREAD_DEBUG)
1188 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1189 		thread_queue_verify = value;
1190 #endif
1191 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1192 		thread_queue_dump = value;
1193 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1194 		thread_stack_cache = value;
1195 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1196 		thread_cond_wait_defer = value;
1197 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1198 		thread_error_detection = value;
1199 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1200 		thread_async_safe = value;
1201 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1202 		thread_door_noreserve = value;
1203 }
1204 
1205 /*
1206  * Look for and evaluate environment variables of the form "_THREAD_*".
1207  * For compatibility with the past, we also look for environment
1208  * names of the form "LIBTHREAD_*".
1209  */
1210 static void
1211 set_thread_vars()
1212 {
1213 	extern const char **_environ;
1214 	const char **pev;
1215 	const char *ev;
1216 	char c;
1217 
1218 	if ((pev = _environ) == NULL)
1219 		return;
1220 	while ((ev = *pev++) != NULL) {
1221 		c = *ev;
1222 		if (c == '_' && sncmp(ev, "_THREAD_", 8) == 0)
1223 			etest(ev + 8);
1224 		if (c == 'L' && sncmp(ev, "LIBTHREAD_", 10) == 0)
1225 			etest(ev + 10);
1226 	}
1227 }
1228 
1229 /* PROBE_SUPPORT begin */
1230 #pragma weak __tnf_probe_notify
1231 extern void __tnf_probe_notify(void);
1232 /* PROBE_SUPPORT end */
1233 
1234 /* same as atexit() but private to the library */
1235 extern int _atexit(void (*)(void));
1236 
1237 /* same as _cleanup() but private to the library */
1238 extern void __cleanup(void);
1239 
1240 extern void atfork_init(void);
1241 
1242 #ifdef __amd64
1243 extern void __amd64id(void);
1244 #endif
1245 
1246 /*
1247  * libc_init() is called by ld.so.1 for library initialization.
1248  * We perform minimal initialization; enough to work with the main thread.
1249  */
1250 void
1251 libc_init(void)
1252 {
1253 	uberdata_t *udp = &__uberdata;
1254 	ulwp_t *oldself = __curthread();
1255 	ucontext_t uc;
1256 	ulwp_t *self;
1257 	struct rlimit rl;
1258 	caddr_t data;
1259 	size_t tls_size;
1260 	int setmask;
1261 
1262 	/*
1263 	 * For the initial stage of initialization, we must be careful
1264 	 * not to call any function that could possibly call _cerror().
1265 	 * For this purpose, we call only the raw system call wrappers.
1266 	 */
1267 
1268 #ifdef __amd64
1269 	/*
1270 	 * Gather information about cache layouts for optimized
1271 	 * AMD assembler strfoo() and memfoo() functions.
1272 	 */
1273 	__amd64id();
1274 #endif
1275 
1276 	/*
1277 	 * Every libc, regardless of which link map, must register __cleanup().
1278 	 */
1279 	(void) _atexit(__cleanup);
1280 
1281 	/*
1282 	 * We keep our uberdata on one of (a) the first alternate link map
1283 	 * or (b) the primary link map.  We switch to the primary link map
1284 	 * and stay there once we see it.  All intermediate link maps are
1285 	 * subject to being unloaded at any time.
1286 	 */
1287 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1288 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1289 		mutex_setup();
1290 		atfork_init();	/* every link map needs atfork() processing */
1291 		return;
1292 	}
1293 
1294 	/*
1295 	 * To establish the main stack information, we have to get our context.
1296 	 * This is also convenient to use for getting our signal mask.
1297 	 */
1298 	uc.uc_flags = UC_ALL;
1299 	(void) __getcontext_syscall(&uc);
1300 	ASSERT(uc.uc_link == NULL);
1301 
1302 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1303 	ASSERT(primary_link_map || tls_size == 0);
1304 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1305 	if (data == NULL)
1306 		thr_panic("cannot allocate thread structure for main thread");
1307 	/* LINTED pointer cast may result in improper alignment */
1308 	self = (ulwp_t *)(data + tls_size);
1309 	init_hash_table[0].hash_bucket = self;
1310 
1311 	self->ul_sigmask = uc.uc_sigmask;
1312 	delete_reserved_signals(&self->ul_sigmask);
1313 	/*
1314 	 * Are the old and new sets different?
1315 	 * (This can happen if we are currently blocking SIGCANCEL.)
1316 	 * If so, we must explicitly set our signal mask, below.
1317 	 */
1318 	setmask =
1319 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1320 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]));
1321 
1322 #ifdef __sparc
1323 	/*
1324 	 * We cache several instructions in the thread structure for use
1325 	 * by the fasttrap DTrace provider. When changing this, read the
1326 	 * comment in fasttrap.h for the all the other places that must
1327 	 * be changed.
1328 	 */
1329 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1330 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1331 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1332 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1333 #endif
1334 
1335 	self->ul_stktop =
1336 		(uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1337 	(void) _private_getrlimit(RLIMIT_STACK, &rl);
1338 	self->ul_stksiz = rl.rlim_cur;
1339 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1340 
1341 	self->ul_forw = self->ul_back = self;
1342 	self->ul_hash = NULL;
1343 	self->ul_ix = 0;
1344 	self->ul_lwpid = 1; /* __lwp_self() */
1345 	self->ul_main = 1;
1346 	self->ul_self = self;
1347 	self->ul_uberdata = udp;
1348 	if (oldself != NULL) {
1349 		int i;
1350 
1351 		ASSERT(primary_link_map);
1352 		ASSERT(oldself->ul_main == 1);
1353 		self->ul_stsd = oldself->ul_stsd;
1354 		for (i = 0; i < TSD_NFAST; i++)
1355 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1356 		self->ul_tls = oldself->ul_tls;
1357 		/*
1358 		 * Retrieve all pointers to uberdata allocated
1359 		 * while running on previous link maps.
1360 		 * This is a giant structure assignment.
1361 		 */
1362 		*udp = *oldself->ul_uberdata;
1363 		/*
1364 		 * These items point to global data on the primary link map.
1365 		 */
1366 		udp->thr_hash_table = init_hash_table;
1367 		udp->sigacthandler = sigacthandler;
1368 		udp->tdb.tdb_events = tdb_events;
1369 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1370 		ASSERT(udp->lwp_stacks == NULL);
1371 		ASSERT(udp->ulwp_freelist == NULL);
1372 		ASSERT(udp->ulwp_replace_free == NULL);
1373 		ASSERT(udp->hash_size == 1);
1374 	}
1375 	udp->all_lwps = self;
1376 	udp->ulwp_one = self;
1377 	udp->pid = _private_getpid();
1378 	udp->nthreads = 1;
1379 	/*
1380 	 * In every link map, tdb_bootstrap points to the same piece of
1381 	 * allocated memory.  When the primary link map is initialized,
1382 	 * the allocated memory is assigned a pointer to the one true
1383 	 * uberdata.  This allows libc_db to initialize itself regardless
1384 	 * of which instance of libc it finds in the address space.
1385 	 */
1386 	if (udp->tdb_bootstrap == NULL)
1387 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1388 	__tdb_bootstrap = udp->tdb_bootstrap;
1389 	if (primary_link_map) {
1390 		self->ul_primarymap = 1;
1391 		udp->primary_map = 1;
1392 		*udp->tdb_bootstrap = udp;
1393 	}
1394 	/*
1395 	 * Cancellation can't happen until:
1396 	 *	pthread_cancel() is called
1397 	 * or:
1398 	 *	another thread is created
1399 	 * For now, as a single-threaded process, set the flag that tells
1400 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1401 	 */
1402 	self->ul_nocancel = 1;
1403 
1404 #if defined(__amd64)
1405 	self->ul_gs = ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1406 #elif defined(__i386)
1407 	self->ul_gs = ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1408 #endif	/* __i386 || __amd64 */
1409 	set_curthread(self);		/* redundant on i386 */
1410 	/*
1411 	 * Now curthread is established and it is safe to call any
1412 	 * function in libc except one that uses thread-local storage.
1413 	 */
1414 	self->ul_errnop = &errno;
1415 	if (oldself != NULL) {
1416 		/* tls_size was zero when oldself was allocated */
1417 		lfree(oldself, sizeof (ulwp_t));
1418 	}
1419 	mutex_setup();
1420 	atfork_init();
1421 	signal_init();
1422 
1423 	/*
1424 	 * If the stack is unlimited, we set the size to zero to disable
1425 	 * stack checking.
1426 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1427 	 */
1428 	if (self->ul_stksiz == RLIM_INFINITY) {
1429 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1430 		self->ul_ustack.ss_size = 0;
1431 	} else {
1432 		self->ul_ustack.ss_sp = self->ul_stk;
1433 		self->ul_ustack.ss_size = self->ul_stksiz;
1434 	}
1435 	self->ul_ustack.ss_flags = 0;
1436 	(void) _private_setustack(&self->ul_ustack);
1437 
1438 	/*
1439 	 * Get the variables that affect thread behavior from the environment.
1440 	 */
1441 	set_thread_vars();
1442 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1443 	udp->thread_stack_cache = thread_stack_cache;
1444 
1445 	/*
1446 	 * Make per-thread copies of global variables, for speed.
1447 	 */
1448 	self->ul_queue_fifo = (char)thread_queue_fifo;
1449 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1450 	self->ul_error_detection = (char)thread_error_detection;
1451 	self->ul_async_safe = (char)thread_async_safe;
1452 	self->ul_door_noreserve = (char)thread_door_noreserve;
1453 	self->ul_max_spinners = (uchar_t)thread_max_spinners;
1454 	self->ul_adaptive_spin = thread_adaptive_spin;
1455 	self->ul_release_spin = thread_release_spin;
1456 	self->ul_queue_spin = thread_queue_spin;
1457 
1458 	/*
1459 	 * When we have initialized the primary link map, inform
1460 	 * the dynamic linker about our interface functions.
1461 	 */
1462 	if (self->ul_primarymap)
1463 		_ld_libc((void *)rtld_funcs);
1464 
1465 	/*
1466 	 * Defer signals until TLS constructors have been called.
1467 	 */
1468 	sigoff(self);
1469 	tls_setup();
1470 	sigon(self);
1471 	if (setmask)
1472 		(void) restore_signals(self);
1473 
1474 	/* PROBE_SUPPORT begin */
1475 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1476 		__tnf_probe_notify();
1477 	/* PROBE_SUPPORT end */
1478 }
1479 
1480 #pragma fini(libc_fini)
1481 void
1482 libc_fini()
1483 {
1484 	/*
1485 	 * If we are doing fini processing for the instance of libc
1486 	 * on the first alternate link map (this happens only when
1487 	 * the dynamic linker rejects a bad audit library), then clear
1488 	 * __curthread().  We abandon whatever memory was allocated by
1489 	 * lmalloc() while running on this alternate link-map but we
1490 	 * don't care (and can't find the memory in any case); we just
1491 	 * want to protect the application from this bad audit library.
1492 	 * No fini processing is done by libc in the normal case.
1493 	 */
1494 
1495 	uberdata_t *udp = curthread->ul_uberdata;
1496 
1497 	if (udp->primary_map == 0 && udp == &__uberdata)
1498 		set_curthread(NULL);
1499 }
1500 
1501 /*
1502  * finish_init is called when we are about to become multi-threaded,
1503  * that is, on the first call to thr_create().
1504  */
1505 void
1506 finish_init()
1507 {
1508 	ulwp_t *self = curthread;
1509 	uberdata_t *udp = self->ul_uberdata;
1510 	thr_hash_table_t *htp;
1511 	void *data;
1512 	int i;
1513 
1514 	/*
1515 	 * No locks needed here; we are single-threaded on the first call.
1516 	 * We can be called only after the primary link map has been set up.
1517 	 */
1518 	ASSERT(self->ul_primarymap);
1519 	ASSERT(self == udp->ulwp_one);
1520 	ASSERT(!udp->uberflags.uf_mt);
1521 	ASSERT(udp->hash_size == 1);
1522 
1523 	/*
1524 	 * First allocate the queue_head array if not already allocated.
1525 	 */
1526 	if (udp->queue_head == NULL)
1527 		queue_alloc();
1528 
1529 	/*
1530 	 * Now allocate the thread hash table.
1531 	 */
1532 	if ((data = _private_mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1533 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1534 	    == MAP_FAILED)
1535 		thr_panic("cannot allocate thread hash table");
1536 
1537 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1538 	udp->hash_size = HASHTBLSZ;
1539 	udp->hash_mask = HASHTBLSZ - 1;
1540 
1541 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1542 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1543 		htp->hash_cond.cond_magic = COND_MAGIC;
1544 	}
1545 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1546 
1547 	/*
1548 	 * Set up the SIGCANCEL handler for threads cancellation.
1549 	 */
1550 	init_sigcancel();
1551 
1552 	/*
1553 	 * Arrange to do special things on exit --
1554 	 * - collect queue statistics from all remaining active threads.
1555 	 * - grab assert_lock to ensure that assertion failures
1556 	 *   and a core dump take precedence over _exit().
1557 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1558 	 * (Functions are called in the reverse order of their registration.)
1559 	 */
1560 	(void) _atexit(dump_queue_statistics);
1561 	(void) _atexit(grab_assert_lock);
1562 	(void) _atexit(collect_queue_statistics);
1563 }
1564 
1565 /*
1566  * Used only by _postfork1_child(), below.
1567  */
1568 static void
1569 mark_dead_and_buried(ulwp_t *ulwp)
1570 {
1571 	ulwp->ul_dead = 1;
1572 	ulwp->ul_lwpid = (lwpid_t)(-1);
1573 	ulwp->ul_hash = NULL;
1574 	ulwp->ul_ix = -1;
1575 	ulwp->ul_schedctl = NULL;
1576 	ulwp->ul_schedctl_called = NULL;
1577 }
1578 
1579 /*
1580  * This is called from fork1() in the child.
1581  * Reset our data structures to reflect one lwp.
1582  */
1583 void
1584 _postfork1_child()
1585 {
1586 	ulwp_t *self = curthread;
1587 	uberdata_t *udp = self->ul_uberdata;
1588 	ulwp_t *next;
1589 	ulwp_t *ulwp;
1590 	int i;
1591 
1592 	/* daemon threads shouldn't call fork1(), but oh well... */
1593 	self->ul_usropts &= ~THR_DAEMON;
1594 	udp->nthreads = 1;
1595 	udp->ndaemons = 0;
1596 	udp->uberflags.uf_mt = 0;
1597 	__threaded = 0;
1598 	for (i = 0; i < udp->hash_size; i++)
1599 		udp->thr_hash_table[i].hash_bucket = NULL;
1600 	self->ul_lwpid = __lwp_self();
1601 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1602 
1603 	/* no one in the child is on a sleep queue; reinitialize */
1604 	if (udp->queue_head) {
1605 		(void) _private_memset(udp->queue_head, 0,
1606 			2 * QHASHSIZE * sizeof (queue_head_t));
1607 		for (i = 0; i < 2 * QHASHSIZE; i++)
1608 			udp->queue_head[i].qh_lock.mutex_magic = MUTEX_MAGIC;
1609 	}
1610 
1611 	/*
1612 	 * All lwps except ourself are gone.  Mark them so.
1613 	 * First mark all of the lwps that have already been freed.
1614 	 * Then mark and free all of the active lwps except ourself.
1615 	 * Since we are single-threaded, no locks are required here.
1616 	 */
1617 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1618 		mark_dead_and_buried(ulwp);
1619 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1620 		mark_dead_and_buried(ulwp);
1621 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1622 		next = ulwp->ul_forw;
1623 		ulwp->ul_forw = ulwp->ul_back = NULL;
1624 		mark_dead_and_buried(ulwp);
1625 		tsd_free(ulwp);
1626 		tls_free(ulwp);
1627 		rwl_free(ulwp);
1628 		ulwp_free(ulwp);
1629 	}
1630 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1631 	if (self != udp->ulwp_one)
1632 		mark_dead_and_buried(udp->ulwp_one);
1633 	if ((ulwp = udp->all_zombies) != NULL) {
1634 		ASSERT(udp->nzombies != 0);
1635 		do {
1636 			next = ulwp->ul_forw;
1637 			ulwp->ul_forw = ulwp->ul_back = NULL;
1638 			mark_dead_and_buried(ulwp);
1639 			udp->nzombies--;
1640 			if (ulwp->ul_replace) {
1641 				ulwp->ul_next = NULL;
1642 				if (udp->ulwp_replace_free == NULL) {
1643 					udp->ulwp_replace_free =
1644 						udp->ulwp_replace_last = ulwp;
1645 				} else {
1646 					udp->ulwp_replace_last->ul_next = ulwp;
1647 					udp->ulwp_replace_last = ulwp;
1648 				}
1649 			}
1650 		} while ((ulwp = next) != udp->all_zombies);
1651 		ASSERT(udp->nzombies == 0);
1652 		udp->all_zombies = NULL;
1653 		udp->nzombies = 0;
1654 	}
1655 	trim_stack_cache(0);
1656 }
1657 
1658 #pragma weak thr_setprio = _thr_setprio
1659 #pragma weak pthread_setschedprio = _thr_setprio
1660 #pragma weak _pthread_setschedprio = _thr_setprio
1661 int
1662 _thr_setprio(thread_t tid, int priority)
1663 {
1664 	struct sched_param param;
1665 
1666 	(void) _memset(&param, 0, sizeof (param));
1667 	param.sched_priority = priority;
1668 	return (_thread_setschedparam_main(tid, 0, &param, PRIO_SET_PRIO));
1669 }
1670 
1671 #pragma weak thr_getprio = _thr_getprio
1672 int
1673 _thr_getprio(thread_t tid, int *priority)
1674 {
1675 	uberdata_t *udp = curthread->ul_uberdata;
1676 	ulwp_t *ulwp;
1677 	int error = 0;
1678 
1679 	if ((ulwp = find_lwp(tid)) == NULL)
1680 		error = ESRCH;
1681 	else {
1682 		*priority = ulwp->ul_pri;
1683 		ulwp_unlock(ulwp, udp);
1684 	}
1685 	return (error);
1686 }
1687 
1688 lwpid_t
1689 lwp_self(void)
1690 {
1691 	return (curthread->ul_lwpid);
1692 }
1693 
1694 #pragma weak _ti_thr_self = _thr_self
1695 #pragma weak thr_self = _thr_self
1696 #pragma weak pthread_self = _thr_self
1697 #pragma weak _pthread_self = _thr_self
1698 thread_t
1699 _thr_self()
1700 {
1701 	return (curthread->ul_lwpid);
1702 }
1703 
1704 #pragma weak thr_main = _thr_main
1705 int
1706 _thr_main()
1707 {
1708 	ulwp_t *self = __curthread();
1709 
1710 	return ((self == NULL)? -1 : self->ul_main);
1711 }
1712 
1713 int
1714 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1715 {
1716 	stk->ss_sp = (void *)ulwp->ul_stktop;
1717 	stk->ss_size = ulwp->ul_stksiz;
1718 	stk->ss_flags = 0;
1719 	return (0);
1720 }
1721 
1722 #pragma weak thr_stksegment = _thr_stksegment
1723 int
1724 _thr_stksegment(stack_t *stk)
1725 {
1726 	return (_thrp_stksegment(curthread, stk));
1727 }
1728 
1729 void
1730 force_continue(ulwp_t *ulwp)
1731 {
1732 #if defined(THREAD_DEBUG)
1733 	ulwp_t *self = curthread;
1734 	uberdata_t *udp = self->ul_uberdata;
1735 #endif
1736 	int error;
1737 	timespec_t ts;
1738 
1739 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1740 
1741 	for (;;) {
1742 		error = __lwp_continue(ulwp->ul_lwpid);
1743 		if (error != 0 && error != EINTR)
1744 			break;
1745 		error = 0;
1746 		if (ulwp->ul_stopping) {	/* he is stopping himself */
1747 			ts.tv_sec = 0;		/* give him a chance to run */
1748 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1749 			(void) ___nanosleep(&ts, NULL);
1750 		}
1751 		if (!ulwp->ul_stopping)		/* he is running now */
1752 			break;			/* so we are done */
1753 		/*
1754 		 * He is marked as being in the process of stopping
1755 		 * himself.  Loop around and continue him again.
1756 		 * He may not have been stopped the first time.
1757 		 */
1758 	}
1759 }
1760 
1761 /*
1762  * Suspend an lwp with lwp_suspend(), then move it to a safe
1763  * point, that is, to a point where ul_critical is zero.
1764  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1765  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1766  * If we have to drop link_lock, we store 1 through link_dropped.
1767  * If the lwp exits before it can be suspended, we return ESRCH.
1768  */
1769 int
1770 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1771 {
1772 	ulwp_t *self = curthread;
1773 	uberdata_t *udp = self->ul_uberdata;
1774 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1775 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1776 	thread_t tid = ulwp->ul_lwpid;
1777 	int ix = ulwp->ul_ix;
1778 	int error = 0;
1779 
1780 	ASSERT(whystopped == TSTP_REGULAR ||
1781 	    whystopped == TSTP_MUTATOR ||
1782 	    whystopped == TSTP_FORK);
1783 	ASSERT(ulwp != self);
1784 	ASSERT(!ulwp->ul_stop);
1785 	ASSERT(MUTEX_OWNED(mp, self));
1786 
1787 	if (link_dropped != NULL)
1788 		*link_dropped = 0;
1789 
1790 	/*
1791 	 * We must grab the target's spin lock before suspending it.
1792 	 * See the comments below and in _thrp_suspend() for why.
1793 	 */
1794 	spin_lock_set(&ulwp->ul_spinlock);
1795 	(void) ___lwp_suspend(tid);
1796 	spin_lock_clear(&ulwp->ul_spinlock);
1797 
1798 top:
1799 	if (ulwp->ul_critical == 0 || ulwp->ul_stopping) {
1800 		/* thread is already safe */
1801 		ulwp->ul_stop |= whystopped;
1802 	} else {
1803 		/*
1804 		 * Setting ul_pleasestop causes the target thread to stop
1805 		 * itself in _thrp_suspend(), below, after we drop its lock.
1806 		 * We must continue the critical thread before dropping
1807 		 * link_lock because the critical thread may be holding
1808 		 * the queue lock for link_lock.  This is delicate.
1809 		 */
1810 		ulwp->ul_pleasestop |= whystopped;
1811 		force_continue(ulwp);
1812 		if (link_dropped != NULL) {
1813 			*link_dropped = 1;
1814 			lmutex_unlock(&udp->link_lock);
1815 			/* be sure to drop link_lock only once */
1816 			link_dropped = NULL;
1817 		}
1818 
1819 		/*
1820 		 * The thread may disappear by calling thr_exit() so we
1821 		 * cannot rely on the ulwp pointer after dropping the lock.
1822 		 * Instead, we search the hash table to find it again.
1823 		 * When we return, we may find that the thread has been
1824 		 * continued by some other thread.  The suspend/continue
1825 		 * interfaces are prone to such race conditions by design.
1826 		 */
1827 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1828 		    (ulwp->ul_pleasestop & whystopped)) {
1829 			(void) _cond_wait(cvp, mp);
1830 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1831 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1832 				if (ulwp->ul_lwpid == tid)
1833 					break;
1834 			}
1835 		}
1836 
1837 		if (ulwp == NULL || ulwp->ul_dead)
1838 			error = ESRCH;
1839 		else {
1840 			/*
1841 			 * Do another lwp_suspend() to make sure we don't
1842 			 * return until the target thread is fully stopped
1843 			 * in the kernel.  Don't apply lwp_suspend() until
1844 			 * we know that the target is not holding any
1845 			 * queue locks, that is, that it has completed
1846 			 * ulwp_unlock(self) and has, or at least is
1847 			 * about to, call lwp_suspend() on itself.  We do
1848 			 * this by grabbing the target's spin lock.
1849 			 */
1850 			ASSERT(ulwp->ul_lwpid == tid);
1851 			spin_lock_set(&ulwp->ul_spinlock);
1852 			(void) ___lwp_suspend(tid);
1853 			spin_lock_clear(&ulwp->ul_spinlock);
1854 			/*
1855 			 * If some other thread did a thr_continue()
1856 			 * on the target thread we have to start over.
1857 			 */
1858 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1859 				goto top;
1860 		}
1861 	}
1862 
1863 	(void) cond_broadcast_internal(cvp);
1864 	lmutex_unlock(mp);
1865 	return (error);
1866 }
1867 
1868 int
1869 _thrp_suspend(thread_t tid, uchar_t whystopped)
1870 {
1871 	ulwp_t *self = curthread;
1872 	uberdata_t *udp = self->ul_uberdata;
1873 	ulwp_t *ulwp;
1874 	int error = 0;
1875 
1876 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1877 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1878 
1879 	/*
1880 	 * We can't suspend anyone except ourself while a fork is happening.
1881 	 * This also has the effect of allowing only one suspension at a time.
1882 	 */
1883 	if (tid != self->ul_lwpid)
1884 		(void) fork_lock_enter(NULL);
1885 
1886 	if ((ulwp = find_lwp(tid)) == NULL)
1887 		error = ESRCH;
1888 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1889 		ulwp_unlock(ulwp, udp);
1890 		error = EINVAL;
1891 	} else if (ulwp->ul_stop) {	/* already stopped */
1892 		ulwp->ul_stop |= whystopped;
1893 		ulwp_broadcast(ulwp);
1894 		ulwp_unlock(ulwp, udp);
1895 	} else if (ulwp != self) {
1896 		/*
1897 		 * After suspending the other thread, move it out of a
1898 		 * critical section and deal with the schedctl mappings.
1899 		 * safe_suspend() suspends the other thread, calls
1900 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1901 		 */
1902 		error = safe_suspend(ulwp, whystopped, NULL);
1903 	} else {
1904 		int schedctl_after_fork = 0;
1905 
1906 		/*
1907 		 * We are suspending ourself.  We must not take a signal
1908 		 * until we return from lwp_suspend() and clear ul_stopping.
1909 		 * This is to guard against siglongjmp().
1910 		 */
1911 		enter_critical(self);
1912 		self->ul_sp = stkptr();
1913 		_flush_windows();	/* sparc */
1914 		self->ul_pleasestop = 0;
1915 		self->ul_stop |= whystopped;
1916 		/*
1917 		 * Grab our spin lock before dropping ulwp_mutex(self).
1918 		 * This prevents the suspending thread from applying
1919 		 * lwp_suspend() to us before we emerge from
1920 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1921 		 */
1922 		spin_lock_set(&self->ul_spinlock);
1923 		self->ul_stopping = 1;
1924 		ulwp_broadcast(self);
1925 		ulwp_unlock(self, udp);
1926 		/*
1927 		 * From this point until we return from lwp_suspend(),
1928 		 * we must not call any function that might invoke the
1929 		 * dynamic linker, that is, we can only call functions
1930 		 * private to the library.
1931 		 *
1932 		 * Also, this is a nasty race condition for a process
1933 		 * that is undergoing a forkall() operation:
1934 		 * Once we clear our spinlock (below), we are vulnerable
1935 		 * to being suspended by the forkall() thread before
1936 		 * we manage to suspend ourself in ___lwp_suspend().
1937 		 * See safe_suspend() and force_continue().
1938 		 *
1939 		 * To avoid a SIGSEGV due to the disappearance
1940 		 * of the schedctl mappings in the child process,
1941 		 * which can happen in spin_lock_clear() if we
1942 		 * are suspended while we are in the middle of
1943 		 * its call to preempt(), we preemptively clear
1944 		 * our own schedctl pointer before dropping our
1945 		 * spinlock.  We reinstate it, in both the parent
1946 		 * and (if this really is a forkall()) the child.
1947 		 */
1948 		if (whystopped & TSTP_FORK) {
1949 			schedctl_after_fork = 1;
1950 			self->ul_schedctl = NULL;
1951 			self->ul_schedctl_called = &udp->uberflags;
1952 		}
1953 		spin_lock_clear(&self->ul_spinlock);
1954 		(void) ___lwp_suspend(tid);
1955 		/*
1956 		 * Somebody else continued us.
1957 		 * We can't grab ulwp_lock(self)
1958 		 * until after clearing ul_stopping.
1959 		 * force_continue() relies on this.
1960 		 */
1961 		self->ul_stopping = 0;
1962 		self->ul_sp = 0;
1963 		if (schedctl_after_fork) {
1964 			self->ul_schedctl_called = NULL;
1965 			self->ul_schedctl = NULL;
1966 			(void) setup_schedctl();
1967 		}
1968 		ulwp_lock(self, udp);
1969 		ulwp_broadcast(self);
1970 		ulwp_unlock(self, udp);
1971 		exit_critical(self);
1972 	}
1973 
1974 	if (tid != self->ul_lwpid)
1975 		fork_lock_exit();
1976 
1977 	return (error);
1978 }
1979 
1980 /*
1981  * Suspend all lwps other than ourself in preparation for fork.
1982  */
1983 void
1984 suspend_fork()
1985 {
1986 	ulwp_t *self = curthread;
1987 	uberdata_t *udp = self->ul_uberdata;
1988 	ulwp_t *ulwp;
1989 	int link_dropped;
1990 
1991 top:
1992 	lmutex_lock(&udp->link_lock);
1993 
1994 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
1995 		ulwp_lock(ulwp, udp);
1996 		if (ulwp->ul_stop) {	/* already stopped */
1997 			ulwp->ul_stop |= TSTP_FORK;
1998 			ulwp_broadcast(ulwp);
1999 			ulwp_unlock(ulwp, udp);
2000 		} else {
2001 			/*
2002 			 * Move the stopped lwp out of a critical section.
2003 			 */
2004 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2005 			    link_dropped)
2006 				goto top;
2007 		}
2008 	}
2009 
2010 	lmutex_unlock(&udp->link_lock);
2011 }
2012 
2013 void
2014 continue_fork(int child)
2015 {
2016 	ulwp_t *self = curthread;
2017 	uberdata_t *udp = self->ul_uberdata;
2018 	ulwp_t *ulwp;
2019 
2020 	/*
2021 	 * Clear the schedctl pointers in the child of forkall().
2022 	 */
2023 	if (child) {
2024 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2025 			ulwp->ul_schedctl_called =
2026 				ulwp->ul_dead? &udp->uberflags : NULL;
2027 			ulwp->ul_schedctl = NULL;
2028 		}
2029 	}
2030 
2031 	/*
2032 	 * Set all lwps that were stopped for fork() running again.
2033 	 */
2034 	lmutex_lock(&udp->link_lock);
2035 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2036 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2037 		lmutex_lock(mp);
2038 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2039 		ulwp->ul_stop &= ~TSTP_FORK;
2040 		ulwp_broadcast(ulwp);
2041 		if (!ulwp->ul_stop)
2042 			force_continue(ulwp);
2043 		lmutex_unlock(mp);
2044 	}
2045 	lmutex_unlock(&udp->link_lock);
2046 }
2047 
2048 int
2049 _thrp_continue(thread_t tid, uchar_t whystopped)
2050 {
2051 	uberdata_t *udp = curthread->ul_uberdata;
2052 	ulwp_t *ulwp;
2053 	mutex_t *mp;
2054 	int error = 0;
2055 
2056 	ASSERT(whystopped == TSTP_REGULAR ||
2057 	    whystopped == TSTP_MUTATOR);
2058 
2059 	if ((ulwp = find_lwp(tid)) == NULL)
2060 		return (ESRCH);
2061 
2062 	mp = ulwp_mutex(ulwp, udp);
2063 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2064 		error = EINVAL;
2065 	} else if (ulwp->ul_stop & whystopped) {
2066 		ulwp->ul_stop &= ~whystopped;
2067 		ulwp_broadcast(ulwp);
2068 		if (!ulwp->ul_stop) {
2069 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2070 				ulwp->ul_sp = 0;
2071 				ulwp->ul_created = 0;
2072 			}
2073 			force_continue(ulwp);
2074 		}
2075 	}
2076 
2077 	lmutex_unlock(mp);
2078 	return (error);
2079 }
2080 
2081 #pragma weak thr_suspend = _thr_suspend
2082 int
2083 _thr_suspend(thread_t tid)
2084 {
2085 	return (_thrp_suspend(tid, TSTP_REGULAR));
2086 }
2087 
2088 #pragma weak thr_continue = _thr_continue
2089 int
2090 _thr_continue(thread_t tid)
2091 {
2092 	return (_thrp_continue(tid, TSTP_REGULAR));
2093 }
2094 
2095 #pragma weak thr_yield = _thr_yield
2096 void
2097 _thr_yield()
2098 {
2099 	lwp_yield();
2100 }
2101 
2102 #pragma weak thr_kill = _thr_kill
2103 #pragma weak pthread_kill = _thr_kill
2104 #pragma weak _pthread_kill = _thr_kill
2105 int
2106 _thr_kill(thread_t tid, int sig)
2107 {
2108 	if (sig == SIGCANCEL)
2109 		return (EINVAL);
2110 	return (__lwp_kill(tid, sig));
2111 }
2112 
2113 /*
2114  * Exit a critical section, take deferred actions if necessary.
2115  */
2116 void
2117 do_exit_critical()
2118 {
2119 	ulwp_t *self = curthread;
2120 	int sig;
2121 
2122 	ASSERT(self->ul_critical == 0);
2123 	if (self->ul_dead)
2124 		return;
2125 
2126 	while (self->ul_pleasestop ||
2127 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2128 		/*
2129 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2130 		 * by keeping self->ul_critical == 1 here.
2131 		 */
2132 		self->ul_critical++;
2133 		while (self->ul_pleasestop) {
2134 			/*
2135 			 * Guard against suspending ourself while on a sleep
2136 			 * queue.  See the comments in call_user_handler().
2137 			 */
2138 			unsleep_self();
2139 			set_parking_flag(self, 0);
2140 			(void) _thrp_suspend(self->ul_lwpid,
2141 				self->ul_pleasestop);
2142 		}
2143 		self->ul_critical--;
2144 
2145 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2146 			/*
2147 			 * Clear ul_cursig before proceeding.
2148 			 * This protects us from the dynamic linker's
2149 			 * calls to bind_guard()/bind_clear() in the
2150 			 * event that it is invoked to resolve a symbol
2151 			 * like take_deferred_signal() below.
2152 			 */
2153 			self->ul_cursig = 0;
2154 			take_deferred_signal(sig);
2155 			ASSERT(self->ul_cursig == 0);
2156 		}
2157 	}
2158 	ASSERT(self->ul_critical == 0);
2159 }
2160 
2161 int
2162 _ti_bind_guard(int bindflag)
2163 {
2164 	ulwp_t *self = curthread;
2165 
2166 	if ((self->ul_bindflags & bindflag) == bindflag)
2167 		return (0);
2168 	enter_critical(self);
2169 	self->ul_bindflags |= bindflag;
2170 	return (1);
2171 }
2172 
2173 int
2174 _ti_bind_clear(int bindflag)
2175 {
2176 	ulwp_t *self = curthread;
2177 
2178 	if ((self->ul_bindflags & bindflag) == 0)
2179 		return (self->ul_bindflags);
2180 	self->ul_bindflags &= ~bindflag;
2181 	exit_critical(self);
2182 	return (self->ul_bindflags);
2183 }
2184 
2185 /*
2186  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2187  * it does in the old libthread (see the comments in cond_wait_queue()).
2188  * Also, signals are deferred at thread startup until TLS constructors
2189  * have all been called, at which time _thr_setup() calls sigon().
2190  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2191  * (librtc.so) to defer signals during its critical sections (not to be
2192  * confused with libc critical sections [see exit_critical() above]).
2193  */
2194 void
2195 _sigoff(void)
2196 {
2197 	sigoff(curthread);
2198 }
2199 
2200 void
2201 _sigon(void)
2202 {
2203 	sigon(curthread);
2204 }
2205 
2206 void
2207 sigon(ulwp_t *self)
2208 {
2209 	int sig;
2210 
2211 	ASSERT(self->ul_sigdefer > 0);
2212 	if (--self->ul_sigdefer == 0) {
2213 		if ((sig = self->ul_cursig) != 0 && self->ul_critical == 0) {
2214 			self->ul_cursig = 0;
2215 			take_deferred_signal(sig);
2216 			ASSERT(self->ul_cursig == 0);
2217 		}
2218 	}
2219 }
2220 
2221 #pragma weak thr_getconcurrency = _thr_getconcurrency
2222 int
2223 _thr_getconcurrency()
2224 {
2225 	return (thr_concurrency);
2226 }
2227 
2228 #pragma weak pthread_getconcurrency = _pthread_getconcurrency
2229 int
2230 _pthread_getconcurrency()
2231 {
2232 	return (pthread_concurrency);
2233 }
2234 
2235 #pragma weak thr_setconcurrency = _thr_setconcurrency
2236 int
2237 _thr_setconcurrency(int new_level)
2238 {
2239 	uberdata_t *udp = curthread->ul_uberdata;
2240 
2241 	if (new_level < 0)
2242 		return (EINVAL);
2243 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2244 		return (EAGAIN);
2245 	lmutex_lock(&udp->link_lock);
2246 	if (new_level > thr_concurrency)
2247 		thr_concurrency = new_level;
2248 	lmutex_unlock(&udp->link_lock);
2249 	return (0);
2250 }
2251 
2252 #pragma weak pthread_setconcurrency = _pthread_setconcurrency
2253 int
2254 _pthread_setconcurrency(int new_level)
2255 {
2256 	if (new_level < 0)
2257 		return (EINVAL);
2258 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2259 		return (EAGAIN);
2260 	pthread_concurrency = new_level;
2261 	return (0);
2262 }
2263 
2264 #pragma weak thr_min_stack = _thr_min_stack
2265 #pragma weak __pthread_min_stack = _thr_min_stack
2266 size_t
2267 _thr_min_stack(void)
2268 {
2269 	return (MINSTACK);
2270 }
2271 
2272 int
2273 __nthreads(void)
2274 {
2275 	return (curthread->ul_uberdata->nthreads);
2276 }
2277 
2278 /*
2279  * XXX
2280  * The remainder of this file implements the private interfaces to java for
2281  * garbage collection.  It is no longer used, at least by java 1.2.
2282  * It can all go away once all old JVMs have disappeared.
2283  */
2284 
2285 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2286 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2287 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2288 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2289 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2290 
2291 /*
2292  * Get the available register state for the target thread.
2293  * Return non-volatile registers: TRS_NONVOLATILE
2294  */
2295 #pragma weak thr_getstate = _thr_getstate
2296 int
2297 _thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2298 {
2299 	ulwp_t *self = curthread;
2300 	uberdata_t *udp = self->ul_uberdata;
2301 	ulwp_t **ulwpp;
2302 	ulwp_t *ulwp;
2303 	int error = 0;
2304 	int trs_flag = TRS_LWPID;
2305 
2306 	if (tid == 0 || self->ul_lwpid == tid) {
2307 		ulwp = self;
2308 		ulwp_lock(ulwp, udp);
2309 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2310 		ulwp = *ulwpp;
2311 	} else {
2312 		if (flag)
2313 			*flag = TRS_INVALID;
2314 		return (ESRCH);
2315 	}
2316 
2317 	if (ulwp->ul_dead) {
2318 		trs_flag = TRS_INVALID;
2319 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2320 		error = EINVAL;
2321 		trs_flag = TRS_INVALID;
2322 	} else if (ulwp->ul_stop) {
2323 		trs_flag = TRS_NONVOLATILE;
2324 		getgregs(ulwp, rs);
2325 	}
2326 
2327 	if (flag)
2328 		*flag = trs_flag;
2329 	if (lwp)
2330 		*lwp = tid;
2331 	if (ss != NULL)
2332 		(void) _thrp_stksegment(ulwp, ss);
2333 
2334 	ulwp_unlock(ulwp, udp);
2335 	return (error);
2336 }
2337 
2338 /*
2339  * Set the appropriate register state for the target thread.
2340  * This is not used by java.  It exists solely for the MSTC test suite.
2341  */
2342 #pragma weak thr_setstate = _thr_setstate
2343 int
2344 _thr_setstate(thread_t tid, int flag, gregset_t rs)
2345 {
2346 	uberdata_t *udp = curthread->ul_uberdata;
2347 	ulwp_t *ulwp;
2348 	int error = 0;
2349 
2350 	if ((ulwp = find_lwp(tid)) == NULL)
2351 		return (ESRCH);
2352 
2353 	if (!ulwp->ul_stop && !suspendedallmutators)
2354 		error = EINVAL;
2355 	else if (rs != NULL) {
2356 		switch (flag) {
2357 		case TRS_NONVOLATILE:
2358 			/* do /proc stuff here? */
2359 			if (ulwp->ul_stop)
2360 				setgregs(ulwp, rs);
2361 			else
2362 				error = EINVAL;
2363 			break;
2364 		case TRS_LWPID:		/* do /proc stuff here? */
2365 		default:
2366 			error = EINVAL;
2367 			break;
2368 		}
2369 	}
2370 
2371 	ulwp_unlock(ulwp, udp);
2372 	return (error);
2373 }
2374 
2375 int
2376 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2377 {
2378 	extern ssize_t _pread(int, void *, size_t, off_t);
2379 	char buf[100];
2380 	int fd;
2381 
2382 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2383 	(void) strcpy(buf, "/proc/self/lwp/");
2384 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2385 	(void) strcat(buf, "/lwpstatus");
2386 	if ((fd = _open(buf, O_RDONLY, 0)) >= 0) {
2387 		while (_pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2388 			if (sp->pr_flags & PR_STOPPED) {
2389 				(void) _close(fd);
2390 				return (0);
2391 			}
2392 			lwp_yield();	/* give him a chance to stop */
2393 		}
2394 		(void) _close(fd);
2395 	}
2396 	return (-1);
2397 }
2398 
2399 int
2400 putlwpregs(thread_t tid, prgregset_t prp)
2401 {
2402 	extern ssize_t _writev(int, const struct iovec *, int);
2403 	char buf[100];
2404 	int fd;
2405 	long dstop_sreg[2];
2406 	long run_null[2];
2407 	iovec_t iov[3];
2408 
2409 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2410 	(void) strcpy(buf, "/proc/self/lwp/");
2411 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2412 	(void) strcat(buf, "/lwpctl");
2413 	if ((fd = _open(buf, O_WRONLY, 0)) >= 0) {
2414 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2415 		dstop_sreg[1] = PCSREG;		/* set the registers */
2416 		iov[0].iov_base = (caddr_t)dstop_sreg;
2417 		iov[0].iov_len = sizeof (dstop_sreg);
2418 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2419 		iov[1].iov_len = sizeof (prgregset_t);
2420 		run_null[0] = PCRUN;		/* make it runnable again */
2421 		run_null[1] = 0;
2422 		iov[2].iov_base = (caddr_t)run_null;
2423 		iov[2].iov_len = sizeof (run_null);
2424 		if (_writev(fd, iov, 3) >= 0) {
2425 			(void) _close(fd);
2426 			return (0);
2427 		}
2428 		(void) _close(fd);
2429 	}
2430 	return (-1);
2431 }
2432 
2433 static ulong_t
2434 gettsp_slow(thread_t tid)
2435 {
2436 	char buf[100];
2437 	struct lwpstatus status;
2438 
2439 	if (getlwpstatus(tid, &status) != 0) {
2440 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2441 		(void) strcpy(buf, "__gettsp(");
2442 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2443 		(void) strcat(buf, "): can't read lwpstatus");
2444 		thr_panic(buf);
2445 	}
2446 	return (status.pr_reg[R_SP]);
2447 }
2448 
2449 ulong_t
2450 __gettsp(thread_t tid)
2451 {
2452 	uberdata_t *udp = curthread->ul_uberdata;
2453 	ulwp_t *ulwp;
2454 	ulong_t result;
2455 
2456 	if ((ulwp = find_lwp(tid)) == NULL)
2457 		return (0);
2458 
2459 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2460 		ulwp_unlock(ulwp, udp);
2461 		return (result);
2462 	}
2463 
2464 	result = gettsp_slow(tid);
2465 	ulwp_unlock(ulwp, udp);
2466 	return (result);
2467 }
2468 
2469 /*
2470  * This tells java stack walkers how to find the ucontext
2471  * structure passed to signal handlers.
2472  */
2473 #pragma weak thr_sighndlrinfo = _thr_sighndlrinfo
2474 void
2475 _thr_sighndlrinfo(void (**func)(), int *funcsize)
2476 {
2477 	*func = &__sighndlr;
2478 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2479 }
2480 
2481 /*
2482  * Mark a thread a mutator or reset a mutator to being a default,
2483  * non-mutator thread.
2484  */
2485 #pragma weak thr_setmutator = _thr_setmutator
2486 int
2487 _thr_setmutator(thread_t tid, int enabled)
2488 {
2489 	ulwp_t *self = curthread;
2490 	uberdata_t *udp = self->ul_uberdata;
2491 	ulwp_t *ulwp;
2492 	int error;
2493 
2494 	enabled = enabled?1:0;
2495 top:
2496 	if (tid == 0) {
2497 		ulwp = self;
2498 		ulwp_lock(ulwp, udp);
2499 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2500 		return (ESRCH);
2501 	}
2502 
2503 	/*
2504 	 * The target thread should be the caller itself or a suspended thread.
2505 	 * This prevents the target from also changing its ul_mutator field.
2506 	 */
2507 	error = 0;
2508 	if (ulwp != self && !ulwp->ul_stop && enabled)
2509 		error = EINVAL;
2510 	else if (ulwp->ul_mutator != enabled) {
2511 		lmutex_lock(&mutatorslock);
2512 		if (mutatorsbarrier) {
2513 			ulwp_unlock(ulwp, udp);
2514 			while (mutatorsbarrier)
2515 				(void) _cond_wait(&mutatorscv, &mutatorslock);
2516 			lmutex_unlock(&mutatorslock);
2517 			goto top;
2518 		}
2519 		ulwp->ul_mutator = enabled;
2520 		lmutex_unlock(&mutatorslock);
2521 	}
2522 
2523 	ulwp_unlock(ulwp, udp);
2524 	return (error);
2525 }
2526 
2527 /*
2528  * Establish a barrier against new mutators.  Any non-mutator trying
2529  * to become a mutator is suspended until the barrier is removed.
2530  */
2531 #pragma weak thr_mutators_barrier = _thr_mutators_barrier
2532 void
2533 _thr_mutators_barrier(int enabled)
2534 {
2535 	int oldvalue;
2536 
2537 	lmutex_lock(&mutatorslock);
2538 
2539 	/*
2540 	 * Wait if trying to set the barrier while it is already set.
2541 	 */
2542 	while (mutatorsbarrier && enabled)
2543 		(void) _cond_wait(&mutatorscv, &mutatorslock);
2544 
2545 	oldvalue = mutatorsbarrier;
2546 	mutatorsbarrier = enabled;
2547 	/*
2548 	 * Wakeup any blocked non-mutators when barrier is removed.
2549 	 */
2550 	if (oldvalue && !enabled)
2551 		(void) cond_broadcast_internal(&mutatorscv);
2552 	lmutex_unlock(&mutatorslock);
2553 }
2554 
2555 /*
2556  * Suspend the set of all mutators except for the caller.  The list
2557  * of actively running threads is searched and only the mutators
2558  * in this list are suspended.  Actively running non-mutators remain
2559  * running.  Any other thread is suspended.
2560  */
2561 #pragma weak thr_suspend_allmutators = _thr_suspend_allmutators
2562 int
2563 _thr_suspend_allmutators(void)
2564 {
2565 	ulwp_t *self = curthread;
2566 	uberdata_t *udp = self->ul_uberdata;
2567 	ulwp_t *ulwp;
2568 	int link_dropped;
2569 
2570 	/*
2571 	 * We single-thread the entire thread suspend mechanism.
2572 	 */
2573 	(void) fork_lock_enter(NULL);
2574 top:
2575 	lmutex_lock(&udp->link_lock);
2576 
2577 	if (suspendingallmutators || suspendedallmutators) {
2578 		lmutex_unlock(&udp->link_lock);
2579 		fork_lock_exit();
2580 		return (EINVAL);
2581 	}
2582 	suspendingallmutators = 1;
2583 
2584 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2585 		ulwp_lock(ulwp, udp);
2586 		if (!ulwp->ul_mutator) {
2587 			ulwp_unlock(ulwp, udp);
2588 		} else if (ulwp->ul_stop) {	/* already stopped */
2589 			ulwp->ul_stop |= TSTP_MUTATOR;
2590 			ulwp_broadcast(ulwp);
2591 			ulwp_unlock(ulwp, udp);
2592 		} else {
2593 			/*
2594 			 * Move the stopped lwp out of a critical section.
2595 			 */
2596 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2597 			    link_dropped) {
2598 				suspendingallmutators = 0;
2599 				goto top;
2600 			}
2601 		}
2602 	}
2603 
2604 	suspendedallmutators = 1;
2605 	suspendingallmutators = 0;
2606 	lmutex_unlock(&udp->link_lock);
2607 	fork_lock_exit();
2608 	return (0);
2609 }
2610 
2611 /*
2612  * Suspend the target mutator.  The caller is permitted to suspend
2613  * itself.  If a mutator barrier is enabled, the caller will suspend
2614  * itself as though it had been suspended by thr_suspend_allmutators().
2615  * When the barrier is removed, this thread will be resumed.  Any
2616  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2617  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2618  */
2619 #pragma weak thr_suspend_mutator = _thr_suspend_mutator
2620 int
2621 _thr_suspend_mutator(thread_t tid)
2622 {
2623 	if (tid == 0)
2624 		tid = curthread->ul_lwpid;
2625 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2626 }
2627 
2628 /*
2629  * Resume the set of all suspended mutators.
2630  */
2631 #pragma weak thr_continue_allmutators = _thr_continue_allmutators
2632 int
2633 _thr_continue_allmutators()
2634 {
2635 	ulwp_t *self = curthread;
2636 	uberdata_t *udp = self->ul_uberdata;
2637 	ulwp_t *ulwp;
2638 
2639 	lmutex_lock(&udp->link_lock);
2640 	if (!suspendedallmutators) {
2641 		lmutex_unlock(&udp->link_lock);
2642 		return (EINVAL);
2643 	}
2644 	suspendedallmutators = 0;
2645 
2646 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2647 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2648 		lmutex_lock(mp);
2649 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2650 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2651 			ulwp_broadcast(ulwp);
2652 			if (!ulwp->ul_stop)
2653 				force_continue(ulwp);
2654 		}
2655 		lmutex_unlock(mp);
2656 	}
2657 
2658 	lmutex_unlock(&udp->link_lock);
2659 	return (0);
2660 }
2661 
2662 /*
2663  * Resume a suspended mutator.
2664  */
2665 #pragma weak thr_continue_mutator = _thr_continue_mutator
2666 int
2667 _thr_continue_mutator(thread_t tid)
2668 {
2669 	return (_thrp_continue(tid, TSTP_MUTATOR));
2670 }
2671 
2672 #pragma weak thr_wait_mutator = _thr_wait_mutator
2673 int
2674 _thr_wait_mutator(thread_t tid, int dontwait)
2675 {
2676 	uberdata_t *udp = curthread->ul_uberdata;
2677 	ulwp_t *ulwp;
2678 	int error = 0;
2679 
2680 top:
2681 	if ((ulwp = find_lwp(tid)) == NULL)
2682 		return (ESRCH);
2683 
2684 	if (!ulwp->ul_mutator)
2685 		error = EINVAL;
2686 	else if (dontwait) {
2687 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2688 			error = EWOULDBLOCK;
2689 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2690 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2691 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2692 
2693 		(void) _cond_wait(cvp, mp);
2694 		(void) lmutex_unlock(mp);
2695 		goto top;
2696 	}
2697 
2698 	ulwp_unlock(ulwp, udp);
2699 	return (error);
2700 }
2701 
2702 /* PROBE_SUPPORT begin */
2703 
2704 void
2705 thr_probe_setup(void *data)
2706 {
2707 	curthread->ul_tpdp = data;
2708 }
2709 
2710 static void *
2711 _thread_probe_getfunc()
2712 {
2713 	return (curthread->ul_tpdp);
2714 }
2715 
2716 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2717 
2718 /* ARGSUSED */
2719 void
2720 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2721 {
2722 	/* never called */
2723 }
2724 
2725 /* ARGSUSED */
2726 void
2727 _resume_ret(ulwp_t *oldlwp)
2728 {
2729 	/* never called */
2730 }
2731 
2732 /* PROBE_SUPPORT end */
2733