xref: /freebsd/sys/kern/kern_thread.c (revision a3cf0ef5a295c885c895fabfd56470c0d1db322d)
1 /*-
2  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3  *  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_witness.h"
30 #include "opt_hwpmc_hooks.h"
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mutex.h>
40 #include <sys/proc.h>
41 #include <sys/resourcevar.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/sched.h>
45 #include <sys/sleepqueue.h>
46 #include <sys/selinfo.h>
47 #include <sys/turnstile.h>
48 #include <sys/ktr.h>
49 #include <sys/rwlock.h>
50 #include <sys/umtx.h>
51 #include <sys/cpuset.h>
52 #ifdef	HWPMC_HOOKS
53 #include <sys/pmckern.h>
54 #endif
55 
56 #include <security/audit/audit.h>
57 
58 #include <vm/vm.h>
59 #include <vm/vm_extern.h>
60 #include <vm/uma.h>
61 #include <sys/eventhandler.h>
62 
63 /*
64  * thread related storage.
65  */
66 static uma_zone_t thread_zone;
67 
68 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
69 
70 int max_threads_per_proc = 1500;
71 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
72 	&max_threads_per_proc, 0, "Limit on threads per proc");
73 
74 int max_threads_hits;
75 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
76 	&max_threads_hits, 0, "");
77 
78 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
79 static struct mtx zombie_lock;
80 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
81 
82 static void thread_zombie(struct thread *);
83 
84 struct mtx tid_lock;
85 static struct unrhdr *tid_unrhdr;
86 
87 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
88 
89 struct	tidhashhead *tidhashtbl;
90 u_long	tidhash;
91 struct	rwlock tidhash_lock;
92 
93 /*
94  * Prepare a thread for use.
95  */
96 static int
97 thread_ctor(void *mem, int size, void *arg, int flags)
98 {
99 	struct thread	*td;
100 
101 	td = (struct thread *)mem;
102 	td->td_state = TDS_INACTIVE;
103 	td->td_oncpu = NOCPU;
104 
105 	td->td_tid = alloc_unr(tid_unrhdr);
106 
107 	/*
108 	 * Note that td_critnest begins life as 1 because the thread is not
109 	 * running and is thereby implicitly waiting to be on the receiving
110 	 * end of a context switch.
111 	 */
112 	td->td_critnest = 1;
113 	EVENTHANDLER_INVOKE(thread_ctor, td);
114 #ifdef AUDIT
115 	audit_thread_alloc(td);
116 #endif
117 	umtx_thread_alloc(td);
118 	return (0);
119 }
120 
121 /*
122  * Reclaim a thread after use.
123  */
124 static void
125 thread_dtor(void *mem, int size, void *arg)
126 {
127 	struct thread *td;
128 
129 	td = (struct thread *)mem;
130 
131 #ifdef INVARIANTS
132 	/* Verify that this thread is in a safe state to free. */
133 	switch (td->td_state) {
134 	case TDS_INHIBITED:
135 	case TDS_RUNNING:
136 	case TDS_CAN_RUN:
137 	case TDS_RUNQ:
138 		/*
139 		 * We must never unlink a thread that is in one of
140 		 * these states, because it is currently active.
141 		 */
142 		panic("bad state for thread unlinking");
143 		/* NOTREACHED */
144 	case TDS_INACTIVE:
145 		break;
146 	default:
147 		panic("bad thread state");
148 		/* NOTREACHED */
149 	}
150 #endif
151 #ifdef AUDIT
152 	audit_thread_free(td);
153 #endif
154 	/* Free all OSD associated to this thread. */
155 	osd_thread_exit(td);
156 
157 	EVENTHANDLER_INVOKE(thread_dtor, td);
158 	free_unr(tid_unrhdr, td->td_tid);
159 }
160 
161 /*
162  * Initialize type-stable parts of a thread (when newly created).
163  */
164 static int
165 thread_init(void *mem, int size, int flags)
166 {
167 	struct thread *td;
168 
169 	td = (struct thread *)mem;
170 
171 	td->td_sleepqueue = sleepq_alloc();
172 	td->td_turnstile = turnstile_alloc();
173 	EVENTHANDLER_INVOKE(thread_init, td);
174 	td->td_sched = (struct td_sched *)&td[1];
175 	umtx_thread_init(td);
176 	td->td_kstack = 0;
177 	return (0);
178 }
179 
180 /*
181  * Tear down type-stable parts of a thread (just before being discarded).
182  */
183 static void
184 thread_fini(void *mem, int size)
185 {
186 	struct thread *td;
187 
188 	td = (struct thread *)mem;
189 	EVENTHANDLER_INVOKE(thread_fini, td);
190 	turnstile_free(td->td_turnstile);
191 	sleepq_free(td->td_sleepqueue);
192 	umtx_thread_fini(td);
193 	seltdfini(td);
194 }
195 
196 /*
197  * For a newly created process,
198  * link up all the structures and its initial threads etc.
199  * called from:
200  * {arch}/{arch}/machdep.c   ia64_init(), init386() etc.
201  * proc_dtor() (should go away)
202  * proc_init()
203  */
204 void
205 proc_linkup0(struct proc *p, struct thread *td)
206 {
207 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
208 	proc_linkup(p, td);
209 }
210 
211 void
212 proc_linkup(struct proc *p, struct thread *td)
213 {
214 
215 	sigqueue_init(&p->p_sigqueue, p);
216 	p->p_ksi = ksiginfo_alloc(1);
217 	if (p->p_ksi != NULL) {
218 		/* XXX p_ksi may be null if ksiginfo zone is not ready */
219 		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
220 	}
221 	LIST_INIT(&p->p_mqnotifier);
222 	p->p_numthreads = 0;
223 	thread_link(td, p);
224 }
225 
226 /*
227  * Initialize global thread allocation resources.
228  */
229 void
230 threadinit(void)
231 {
232 
233 	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
234 	/* leave one number for thread0 */
235 	tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
236 
237 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
238 	    thread_ctor, thread_dtor, thread_init, thread_fini,
239 	    16 - 1, 0);
240 	tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
241 	rw_init(&tidhash_lock, "tidhash");
242 }
243 
244 /*
245  * Place an unused thread on the zombie list.
246  * Use the slpq as that must be unused by now.
247  */
248 void
249 thread_zombie(struct thread *td)
250 {
251 	mtx_lock_spin(&zombie_lock);
252 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
253 	mtx_unlock_spin(&zombie_lock);
254 }
255 
256 /*
257  * Release a thread that has exited after cpu_throw().
258  */
259 void
260 thread_stash(struct thread *td)
261 {
262 	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
263 	thread_zombie(td);
264 }
265 
266 /*
267  * Reap zombie resources.
268  */
269 void
270 thread_reap(void)
271 {
272 	struct thread *td_first, *td_next;
273 
274 	/*
275 	 * Don't even bother to lock if none at this instant,
276 	 * we really don't care about the next instant..
277 	 */
278 	if (!TAILQ_EMPTY(&zombie_threads)) {
279 		mtx_lock_spin(&zombie_lock);
280 		td_first = TAILQ_FIRST(&zombie_threads);
281 		if (td_first)
282 			TAILQ_INIT(&zombie_threads);
283 		mtx_unlock_spin(&zombie_lock);
284 		while (td_first) {
285 			td_next = TAILQ_NEXT(td_first, td_slpq);
286 			if (td_first->td_ucred)
287 				crfree(td_first->td_ucred);
288 			thread_free(td_first);
289 			td_first = td_next;
290 		}
291 	}
292 }
293 
294 /*
295  * Allocate a thread.
296  */
297 struct thread *
298 thread_alloc(int pages)
299 {
300 	struct thread *td;
301 
302 	thread_reap(); /* check if any zombies to get */
303 
304 	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
305 	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
306 	if (!vm_thread_new(td, pages)) {
307 		uma_zfree(thread_zone, td);
308 		return (NULL);
309 	}
310 	cpu_thread_alloc(td);
311 	return (td);
312 }
313 
314 int
315 thread_alloc_stack(struct thread *td, int pages)
316 {
317 
318 	KASSERT(td->td_kstack == 0,
319 	    ("thread_alloc_stack called on a thread with kstack"));
320 	if (!vm_thread_new(td, pages))
321 		return (0);
322 	cpu_thread_alloc(td);
323 	return (1);
324 }
325 
326 /*
327  * Deallocate a thread.
328  */
329 void
330 thread_free(struct thread *td)
331 {
332 
333 	lock_profile_thread_exit(td);
334 	if (td->td_cpuset)
335 		cpuset_rel(td->td_cpuset);
336 	td->td_cpuset = NULL;
337 	cpu_thread_free(td);
338 	if (td->td_kstack != 0)
339 		vm_thread_dispose(td);
340 	uma_zfree(thread_zone, td);
341 }
342 
343 /*
344  * Discard the current thread and exit from its context.
345  * Always called with scheduler locked.
346  *
347  * Because we can't free a thread while we're operating under its context,
348  * push the current thread into our CPU's deadthread holder. This means
349  * we needn't worry about someone else grabbing our context before we
350  * do a cpu_throw().
351  */
352 void
353 thread_exit(void)
354 {
355 	uint64_t new_switchtime;
356 	struct thread *td;
357 	struct thread *td2;
358 	struct proc *p;
359 	int wakeup_swapper;
360 
361 	td = curthread;
362 	p = td->td_proc;
363 
364 	PROC_SLOCK_ASSERT(p, MA_OWNED);
365 	mtx_assert(&Giant, MA_NOTOWNED);
366 
367 	PROC_LOCK_ASSERT(p, MA_OWNED);
368 	KASSERT(p != NULL, ("thread exiting without a process"));
369 	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
370 	    (long)p->p_pid, td->td_name);
371 	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
372 
373 #ifdef AUDIT
374 	AUDIT_SYSCALL_EXIT(0, td);
375 #endif
376 	umtx_thread_exit(td);
377 	/*
378 	 * drop FPU & debug register state storage, or any other
379 	 * architecture specific resources that
380 	 * would not be on a new untouched process.
381 	 */
382 	cpu_thread_exit(td);	/* XXXSMP */
383 
384 	/* Do the same timestamp bookkeeping that mi_switch() would do. */
385 	new_switchtime = cpu_ticks();
386 	p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
387 	PCPU_SET(switchtime, new_switchtime);
388 	PCPU_SET(switchticks, ticks);
389 	PCPU_INC(cnt.v_swtch);
390 	/* Save our resource usage in our process. */
391 	td->td_ru.ru_nvcsw++;
392 	rucollect(&p->p_ru, &td->td_ru);
393 	/*
394 	 * The last thread is left attached to the process
395 	 * So that the whole bundle gets recycled. Skip
396 	 * all this stuff if we never had threads.
397 	 * EXIT clears all sign of other threads when
398 	 * it goes to single threading, so the last thread always
399 	 * takes the short path.
400 	 */
401 	if (p->p_flag & P_HADTHREADS) {
402 		if (p->p_numthreads > 1) {
403 			thread_unlink(td);
404 			td2 = FIRST_THREAD_IN_PROC(p);
405 			sched_exit_thread(td2, td);
406 
407 			/*
408 			 * The test below is NOT true if we are the
409 			 * sole exiting thread. P_STOPPED_SINGLE is unset
410 			 * in exit1() after it is the only survivor.
411 			 */
412 			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
413 				if (p->p_numthreads == p->p_suspcount) {
414 					thread_lock(p->p_singlethread);
415 					wakeup_swapper = thread_unsuspend_one(
416 						p->p_singlethread);
417 					thread_unlock(p->p_singlethread);
418 					if (wakeup_swapper)
419 						kick_proc0();
420 				}
421 			}
422 
423 			atomic_add_int(&td->td_proc->p_exitthreads, 1);
424 			PCPU_SET(deadthread, td);
425 		} else {
426 			/*
427 			 * The last thread is exiting.. but not through exit()
428 			 */
429 			panic ("thread_exit: Last thread exiting on its own");
430 		}
431 	}
432 #ifdef	HWPMC_HOOKS
433 	/*
434 	 * If this thread is part of a process that is being tracked by hwpmc(4),
435 	 * inform the module of the thread's impending exit.
436 	 */
437 	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
438 		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
439 #endif
440 	PROC_UNLOCK(p);
441 	ruxagg(p, td);
442 	thread_lock(td);
443 	PROC_SUNLOCK(p);
444 	td->td_state = TDS_INACTIVE;
445 #ifdef WITNESS
446 	witness_thread_exit(td);
447 #endif
448 	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
449 	sched_throw(td);
450 	panic("I'm a teapot!");
451 	/* NOTREACHED */
452 }
453 
454 /*
455  * Do any thread specific cleanups that may be needed in wait()
456  * called with Giant, proc and schedlock not held.
457  */
458 void
459 thread_wait(struct proc *p)
460 {
461 	struct thread *td;
462 
463 	mtx_assert(&Giant, MA_NOTOWNED);
464 	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
465 	td = FIRST_THREAD_IN_PROC(p);
466 	/* Lock the last thread so we spin until it exits cpu_throw(). */
467 	thread_lock(td);
468 	thread_unlock(td);
469 	/* Wait for any remaining threads to exit cpu_throw(). */
470 	while (p->p_exitthreads)
471 		sched_relinquish(curthread);
472 	lock_profile_thread_exit(td);
473 	cpuset_rel(td->td_cpuset);
474 	td->td_cpuset = NULL;
475 	cpu_thread_clean(td);
476 	crfree(td->td_ucred);
477 	thread_reap();	/* check for zombie threads etc. */
478 }
479 
480 /*
481  * Link a thread to a process.
482  * set up anything that needs to be initialized for it to
483  * be used by the process.
484  */
485 void
486 thread_link(struct thread *td, struct proc *p)
487 {
488 
489 	/*
490 	 * XXX This can't be enabled because it's called for proc0 before
491 	 * its lock has been created.
492 	 * PROC_LOCK_ASSERT(p, MA_OWNED);
493 	 */
494 	td->td_state    = TDS_INACTIVE;
495 	td->td_proc     = p;
496 	td->td_flags    = TDF_INMEM;
497 
498 	LIST_INIT(&td->td_contested);
499 	LIST_INIT(&td->td_lprof[0]);
500 	LIST_INIT(&td->td_lprof[1]);
501 	sigqueue_init(&td->td_sigqueue, p);
502 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
503 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
504 	p->p_numthreads++;
505 }
506 
507 /*
508  * Convert a process with one thread to an unthreaded process.
509  */
510 void
511 thread_unthread(struct thread *td)
512 {
513 	struct proc *p = td->td_proc;
514 
515 	KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
516 	p->p_flag &= ~P_HADTHREADS;
517 }
518 
519 /*
520  * Called from:
521  *  thread_exit()
522  */
523 void
524 thread_unlink(struct thread *td)
525 {
526 	struct proc *p = td->td_proc;
527 
528 	PROC_LOCK_ASSERT(p, MA_OWNED);
529 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
530 	p->p_numthreads--;
531 	/* could clear a few other things here */
532 	/* Must  NOT clear links to proc! */
533 }
534 
535 static int
536 calc_remaining(struct proc *p, int mode)
537 {
538 	int remaining;
539 
540 	if (mode == SINGLE_EXIT)
541 		remaining = p->p_numthreads;
542 	else if (mode == SINGLE_BOUNDARY)
543 		remaining = p->p_numthreads - p->p_boundary_count;
544 	else if (mode == SINGLE_NO_EXIT)
545 		remaining = p->p_numthreads - p->p_suspcount;
546 	else
547 		panic("calc_remaining: wrong mode %d", mode);
548 	return (remaining);
549 }
550 
551 /*
552  * Enforce single-threading.
553  *
554  * Returns 1 if the caller must abort (another thread is waiting to
555  * exit the process or similar). Process is locked!
556  * Returns 0 when you are successfully the only thread running.
557  * A process has successfully single threaded in the suspend mode when
558  * There are no threads in user mode. Threads in the kernel must be
559  * allowed to continue until they get to the user boundary. They may even
560  * copy out their return values and data before suspending. They may however be
561  * accelerated in reaching the user boundary as we will wake up
562  * any sleeping threads that are interruptable. (PCATCH).
563  */
564 int
565 thread_single(int mode)
566 {
567 	struct thread *td;
568 	struct thread *td2;
569 	struct proc *p;
570 	int remaining, wakeup_swapper;
571 
572 	td = curthread;
573 	p = td->td_proc;
574 	mtx_assert(&Giant, MA_NOTOWNED);
575 	PROC_LOCK_ASSERT(p, MA_OWNED);
576 	KASSERT((td != NULL), ("curthread is NULL"));
577 
578 	if ((p->p_flag & P_HADTHREADS) == 0)
579 		return (0);
580 
581 	/* Is someone already single threading? */
582 	if (p->p_singlethread != NULL && p->p_singlethread != td)
583 		return (1);
584 
585 	if (mode == SINGLE_EXIT) {
586 		p->p_flag |= P_SINGLE_EXIT;
587 		p->p_flag &= ~P_SINGLE_BOUNDARY;
588 	} else {
589 		p->p_flag &= ~P_SINGLE_EXIT;
590 		if (mode == SINGLE_BOUNDARY)
591 			p->p_flag |= P_SINGLE_BOUNDARY;
592 		else
593 			p->p_flag &= ~P_SINGLE_BOUNDARY;
594 	}
595 	p->p_flag |= P_STOPPED_SINGLE;
596 	PROC_SLOCK(p);
597 	p->p_singlethread = td;
598 	remaining = calc_remaining(p, mode);
599 	while (remaining != 1) {
600 		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
601 			goto stopme;
602 		wakeup_swapper = 0;
603 		FOREACH_THREAD_IN_PROC(p, td2) {
604 			if (td2 == td)
605 				continue;
606 			thread_lock(td2);
607 			td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
608 			if (TD_IS_INHIBITED(td2)) {
609 				switch (mode) {
610 				case SINGLE_EXIT:
611 					if (TD_IS_SUSPENDED(td2))
612 						wakeup_swapper |=
613 						    thread_unsuspend_one(td2);
614 					if (TD_ON_SLEEPQ(td2) &&
615 					    (td2->td_flags & TDF_SINTR))
616 						wakeup_swapper |=
617 						    sleepq_abort(td2, EINTR);
618 					break;
619 				case SINGLE_BOUNDARY:
620 					if (TD_IS_SUSPENDED(td2) &&
621 					    !(td2->td_flags & TDF_BOUNDARY))
622 						wakeup_swapper |=
623 						    thread_unsuspend_one(td2);
624 					if (TD_ON_SLEEPQ(td2) &&
625 					    (td2->td_flags & TDF_SINTR))
626 						wakeup_swapper |=
627 						    sleepq_abort(td2, ERESTART);
628 					break;
629 				case SINGLE_NO_EXIT:
630 					if (TD_IS_SUSPENDED(td2) &&
631 					    !(td2->td_flags & TDF_BOUNDARY))
632 						wakeup_swapper |=
633 						    thread_unsuspend_one(td2);
634 					if (TD_ON_SLEEPQ(td2) &&
635 					    (td2->td_flags & TDF_SINTR))
636 						wakeup_swapper |=
637 						    sleepq_abort(td2, ERESTART);
638 					break;
639 				default:
640 					break;
641 				}
642 			}
643 #ifdef SMP
644 			else if (TD_IS_RUNNING(td2) && td != td2) {
645 				forward_signal(td2);
646 			}
647 #endif
648 			thread_unlock(td2);
649 		}
650 		if (wakeup_swapper)
651 			kick_proc0();
652 		remaining = calc_remaining(p, mode);
653 
654 		/*
655 		 * Maybe we suspended some threads.. was it enough?
656 		 */
657 		if (remaining == 1)
658 			break;
659 
660 stopme:
661 		/*
662 		 * Wake us up when everyone else has suspended.
663 		 * In the mean time we suspend as well.
664 		 */
665 		thread_suspend_switch(td);
666 		remaining = calc_remaining(p, mode);
667 	}
668 	if (mode == SINGLE_EXIT) {
669 		/*
670 		 * We have gotten rid of all the other threads and we
671 		 * are about to either exit or exec. In either case,
672 		 * we try our utmost  to revert to being a non-threaded
673 		 * process.
674 		 */
675 		p->p_singlethread = NULL;
676 		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
677 		thread_unthread(td);
678 	}
679 	PROC_SUNLOCK(p);
680 	return (0);
681 }
682 
683 /*
684  * Called in from locations that can safely check to see
685  * whether we have to suspend or at least throttle for a
686  * single-thread event (e.g. fork).
687  *
688  * Such locations include userret().
689  * If the "return_instead" argument is non zero, the thread must be able to
690  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
691  *
692  * The 'return_instead' argument tells the function if it may do a
693  * thread_exit() or suspend, or whether the caller must abort and back
694  * out instead.
695  *
696  * If the thread that set the single_threading request has set the
697  * P_SINGLE_EXIT bit in the process flags then this call will never return
698  * if 'return_instead' is false, but will exit.
699  *
700  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
701  *---------------+--------------------+---------------------
702  *       0       | returns 0          |   returns 0 or 1
703  *               | when ST ends       |   immediatly
704  *---------------+--------------------+---------------------
705  *       1       | thread exits       |   returns 1
706  *               |                    |  immediatly
707  * 0 = thread_exit() or suspension ok,
708  * other = return error instead of stopping the thread.
709  *
710  * While a full suspension is under effect, even a single threading
711  * thread would be suspended if it made this call (but it shouldn't).
712  * This call should only be made from places where
713  * thread_exit() would be safe as that may be the outcome unless
714  * return_instead is set.
715  */
716 int
717 thread_suspend_check(int return_instead)
718 {
719 	struct thread *td;
720 	struct proc *p;
721 	int wakeup_swapper;
722 
723 	td = curthread;
724 	p = td->td_proc;
725 	mtx_assert(&Giant, MA_NOTOWNED);
726 	PROC_LOCK_ASSERT(p, MA_OWNED);
727 	while (P_SHOULDSTOP(p) ||
728 	      ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
729 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
730 			KASSERT(p->p_singlethread != NULL,
731 			    ("singlethread not set"));
732 			/*
733 			 * The only suspension in action is a
734 			 * single-threading. Single threader need not stop.
735 			 * XXX Should be safe to access unlocked
736 			 * as it can only be set to be true by us.
737 			 */
738 			if (p->p_singlethread == td)
739 				return (0);	/* Exempt from stopping. */
740 		}
741 		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
742 			return (EINTR);
743 
744 		/* Should we goto user boundary if we didn't come from there? */
745 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
746 		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
747 			return (ERESTART);
748 
749 		/*
750 		 * If the process is waiting for us to exit,
751 		 * this thread should just suicide.
752 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
753 		 */
754 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
755 			PROC_UNLOCK(p);
756 			tidhash_remove(td);
757 			PROC_LOCK(p);
758 			tdsigcleanup(td);
759 			PROC_SLOCK(p);
760 			thread_stopped(p);
761 			thread_exit();
762 		}
763 
764 		PROC_SLOCK(p);
765 		thread_stopped(p);
766 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
767 			if (p->p_numthreads == p->p_suspcount + 1) {
768 				thread_lock(p->p_singlethread);
769 				wakeup_swapper =
770 				    thread_unsuspend_one(p->p_singlethread);
771 				thread_unlock(p->p_singlethread);
772 				if (wakeup_swapper)
773 					kick_proc0();
774 			}
775 		}
776 		PROC_UNLOCK(p);
777 		thread_lock(td);
778 		/*
779 		 * When a thread suspends, it just
780 		 * gets taken off all queues.
781 		 */
782 		thread_suspend_one(td);
783 		if (return_instead == 0) {
784 			p->p_boundary_count++;
785 			td->td_flags |= TDF_BOUNDARY;
786 		}
787 		PROC_SUNLOCK(p);
788 		mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
789 		if (return_instead == 0)
790 			td->td_flags &= ~TDF_BOUNDARY;
791 		thread_unlock(td);
792 		PROC_LOCK(p);
793 		if (return_instead == 0)
794 			p->p_boundary_count--;
795 	}
796 	return (0);
797 }
798 
799 void
800 thread_suspend_switch(struct thread *td)
801 {
802 	struct proc *p;
803 
804 	p = td->td_proc;
805 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
806 	PROC_LOCK_ASSERT(p, MA_OWNED);
807 	PROC_SLOCK_ASSERT(p, MA_OWNED);
808 	/*
809 	 * We implement thread_suspend_one in stages here to avoid
810 	 * dropping the proc lock while the thread lock is owned.
811 	 */
812 	thread_stopped(p);
813 	p->p_suspcount++;
814 	PROC_UNLOCK(p);
815 	thread_lock(td);
816 	td->td_flags &= ~TDF_NEEDSUSPCHK;
817 	TD_SET_SUSPENDED(td);
818 	sched_sleep(td, 0);
819 	PROC_SUNLOCK(p);
820 	DROP_GIANT();
821 	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
822 	thread_unlock(td);
823 	PICKUP_GIANT();
824 	PROC_LOCK(p);
825 	PROC_SLOCK(p);
826 }
827 
828 void
829 thread_suspend_one(struct thread *td)
830 {
831 	struct proc *p = td->td_proc;
832 
833 	PROC_SLOCK_ASSERT(p, MA_OWNED);
834 	THREAD_LOCK_ASSERT(td, MA_OWNED);
835 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
836 	p->p_suspcount++;
837 	td->td_flags &= ~TDF_NEEDSUSPCHK;
838 	TD_SET_SUSPENDED(td);
839 	sched_sleep(td, 0);
840 }
841 
842 int
843 thread_unsuspend_one(struct thread *td)
844 {
845 	struct proc *p = td->td_proc;
846 
847 	PROC_SLOCK_ASSERT(p, MA_OWNED);
848 	THREAD_LOCK_ASSERT(td, MA_OWNED);
849 	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
850 	TD_CLR_SUSPENDED(td);
851 	p->p_suspcount--;
852 	return (setrunnable(td));
853 }
854 
855 /*
856  * Allow all threads blocked by single threading to continue running.
857  */
858 void
859 thread_unsuspend(struct proc *p)
860 {
861 	struct thread *td;
862 	int wakeup_swapper;
863 
864 	PROC_LOCK_ASSERT(p, MA_OWNED);
865 	PROC_SLOCK_ASSERT(p, MA_OWNED);
866 	wakeup_swapper = 0;
867 	if (!P_SHOULDSTOP(p)) {
868                 FOREACH_THREAD_IN_PROC(p, td) {
869 			thread_lock(td);
870 			if (TD_IS_SUSPENDED(td)) {
871 				wakeup_swapper |= thread_unsuspend_one(td);
872 			}
873 			thread_unlock(td);
874 		}
875 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
876 	    (p->p_numthreads == p->p_suspcount)) {
877 		/*
878 		 * Stopping everything also did the job for the single
879 		 * threading request. Now we've downgraded to single-threaded,
880 		 * let it continue.
881 		 */
882 		thread_lock(p->p_singlethread);
883 		wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
884 		thread_unlock(p->p_singlethread);
885 	}
886 	if (wakeup_swapper)
887 		kick_proc0();
888 }
889 
890 /*
891  * End the single threading mode..
892  */
893 void
894 thread_single_end(void)
895 {
896 	struct thread *td;
897 	struct proc *p;
898 	int wakeup_swapper;
899 
900 	td = curthread;
901 	p = td->td_proc;
902 	PROC_LOCK_ASSERT(p, MA_OWNED);
903 	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
904 	PROC_SLOCK(p);
905 	p->p_singlethread = NULL;
906 	wakeup_swapper = 0;
907 	/*
908 	 * If there are other threads they may now run,
909 	 * unless of course there is a blanket 'stop order'
910 	 * on the process. The single threader must be allowed
911 	 * to continue however as this is a bad place to stop.
912 	 */
913 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
914                 FOREACH_THREAD_IN_PROC(p, td) {
915 			thread_lock(td);
916 			if (TD_IS_SUSPENDED(td)) {
917 				wakeup_swapper |= thread_unsuspend_one(td);
918 			}
919 			thread_unlock(td);
920 		}
921 	}
922 	PROC_SUNLOCK(p);
923 	if (wakeup_swapper)
924 		kick_proc0();
925 }
926 
927 struct thread *
928 thread_find(struct proc *p, lwpid_t tid)
929 {
930 	struct thread *td;
931 
932 	PROC_LOCK_ASSERT(p, MA_OWNED);
933 	FOREACH_THREAD_IN_PROC(p, td) {
934 		if (td->td_tid == tid)
935 			break;
936 	}
937 	return (td);
938 }
939 
940 /* Locate a thread by number; return with proc lock held. */
941 struct thread *
942 tdfind(lwpid_t tid, pid_t pid)
943 {
944 #define RUN_THRESH	16
945 	struct thread *td;
946 	int run = 0;
947 
948 	rw_rlock(&tidhash_lock);
949 	LIST_FOREACH(td, TIDHASH(tid), td_hash) {
950 		if (td->td_tid == tid) {
951 			if (pid != -1 && td->td_proc->p_pid != pid) {
952 				td = NULL;
953 				break;
954 			}
955 			if (td->td_proc->p_state == PRS_NEW) {
956 				td = NULL;
957 				break;
958 			}
959 			if (run > RUN_THRESH) {
960 				if (rw_try_upgrade(&tidhash_lock)) {
961 					LIST_REMOVE(td, td_hash);
962 					LIST_INSERT_HEAD(TIDHASH(td->td_tid),
963 						td, td_hash);
964 					PROC_LOCK(td->td_proc);
965 					rw_wunlock(&tidhash_lock);
966 					return (td);
967 				}
968 			}
969 			PROC_LOCK(td->td_proc);
970 			break;
971 		}
972 		run++;
973 	}
974 	rw_runlock(&tidhash_lock);
975 	return (td);
976 }
977 
978 void
979 tidhash_add(struct thread *td)
980 {
981 	rw_wlock(&tidhash_lock);
982 	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
983 	rw_wunlock(&tidhash_lock);
984 }
985 
986 void
987 tidhash_remove(struct thread *td)
988 {
989 	rw_wlock(&tidhash_lock);
990 	LIST_REMOVE(td, td_hash);
991 	rw_wunlock(&tidhash_lock);
992 }
993