xref: /freebsd/sys/kern/kern_thread.c (revision e39e854e27f53a784c3982cbeb68f4ad1cfd9162)
1 /*-
2  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3  *  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_witness.h"
30 #include "opt_kdtrace.h"
31 #include "opt_hwpmc_hooks.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/resourcevar.h>
43 #include <sys/sdt.h>
44 #include <sys/smp.h>
45 #include <sys/sched.h>
46 #include <sys/sleepqueue.h>
47 #include <sys/selinfo.h>
48 #include <sys/turnstile.h>
49 #include <sys/ktr.h>
50 #include <sys/rwlock.h>
51 #include <sys/umtx.h>
52 #include <sys/cpuset.h>
53 #ifdef	HWPMC_HOOKS
54 #include <sys/pmckern.h>
55 #endif
56 
57 #include <security/audit/audit.h>
58 
59 #include <vm/vm.h>
60 #include <vm/vm_extern.h>
61 #include <vm/uma.h>
62 #include <sys/eventhandler.h>
63 
64 SDT_PROVIDER_DECLARE(proc);
65 SDT_PROBE_DEFINE(proc, , , lwp_exit, lwp-exit);
66 
67 
68 /*
69  * thread related storage.
70  */
71 static uma_zone_t thread_zone;
72 
73 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
74 static struct mtx zombie_lock;
75 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
76 
77 static void thread_zombie(struct thread *);
78 
79 #define TID_BUFFER_SIZE	1024
80 
81 struct mtx tid_lock;
82 static struct unrhdr *tid_unrhdr;
83 static lwpid_t tid_buffer[TID_BUFFER_SIZE];
84 static int tid_head, tid_tail;
85 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
86 
87 struct	tidhashhead *tidhashtbl;
88 u_long	tidhash;
89 struct	rwlock tidhash_lock;
90 
91 static lwpid_t
92 tid_alloc(void)
93 {
94 	lwpid_t	tid;
95 
96 	tid = alloc_unr(tid_unrhdr);
97 	if (tid != -1)
98 		return (tid);
99 	mtx_lock(&tid_lock);
100 	if (tid_head == tid_tail) {
101 		mtx_unlock(&tid_lock);
102 		return (-1);
103 	}
104 	tid = tid_buffer[tid_head++];
105 	tid_head %= TID_BUFFER_SIZE;
106 	mtx_unlock(&tid_lock);
107 	return (tid);
108 }
109 
110 static void
111 tid_free(lwpid_t tid)
112 {
113 	lwpid_t tmp_tid = -1;
114 
115 	mtx_lock(&tid_lock);
116 	if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) {
117 		tmp_tid = tid_buffer[tid_head++];
118 		tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
119 	}
120 	tid_buffer[tid_tail++] = tid;
121 	tid_tail %= TID_BUFFER_SIZE;
122 	mtx_unlock(&tid_lock);
123 	if (tmp_tid != -1)
124 		free_unr(tid_unrhdr, tmp_tid);
125 }
126 
127 /*
128  * Prepare a thread for use.
129  */
130 static int
131 thread_ctor(void *mem, int size, void *arg, int flags)
132 {
133 	struct thread	*td;
134 
135 	td = (struct thread *)mem;
136 	td->td_state = TDS_INACTIVE;
137 	td->td_oncpu = NOCPU;
138 
139 	td->td_tid = tid_alloc();
140 
141 	/*
142 	 * Note that td_critnest begins life as 1 because the thread is not
143 	 * running and is thereby implicitly waiting to be on the receiving
144 	 * end of a context switch.
145 	 */
146 	td->td_critnest = 1;
147 	td->td_lend_user_pri = PRI_MAX;
148 	EVENTHANDLER_INVOKE(thread_ctor, td);
149 #ifdef AUDIT
150 	audit_thread_alloc(td);
151 #endif
152 	umtx_thread_alloc(td);
153 	return (0);
154 }
155 
156 /*
157  * Reclaim a thread after use.
158  */
159 static void
160 thread_dtor(void *mem, int size, void *arg)
161 {
162 	struct thread *td;
163 
164 	td = (struct thread *)mem;
165 
166 #ifdef INVARIANTS
167 	/* Verify that this thread is in a safe state to free. */
168 	switch (td->td_state) {
169 	case TDS_INHIBITED:
170 	case TDS_RUNNING:
171 	case TDS_CAN_RUN:
172 	case TDS_RUNQ:
173 		/*
174 		 * We must never unlink a thread that is in one of
175 		 * these states, because it is currently active.
176 		 */
177 		panic("bad state for thread unlinking");
178 		/* NOTREACHED */
179 	case TDS_INACTIVE:
180 		break;
181 	default:
182 		panic("bad thread state");
183 		/* NOTREACHED */
184 	}
185 #endif
186 #ifdef AUDIT
187 	audit_thread_free(td);
188 #endif
189 	/* Free all OSD associated to this thread. */
190 	osd_thread_exit(td);
191 
192 	EVENTHANDLER_INVOKE(thread_dtor, td);
193 	tid_free(td->td_tid);
194 }
195 
196 /*
197  * Initialize type-stable parts of a thread (when newly created).
198  */
199 static int
200 thread_init(void *mem, int size, int flags)
201 {
202 	struct thread *td;
203 
204 	td = (struct thread *)mem;
205 
206 	td->td_sleepqueue = sleepq_alloc();
207 	td->td_turnstile = turnstile_alloc();
208 	EVENTHANDLER_INVOKE(thread_init, td);
209 	td->td_sched = (struct td_sched *)&td[1];
210 	umtx_thread_init(td);
211 	td->td_kstack = 0;
212 	return (0);
213 }
214 
215 /*
216  * Tear down type-stable parts of a thread (just before being discarded).
217  */
218 static void
219 thread_fini(void *mem, int size)
220 {
221 	struct thread *td;
222 
223 	td = (struct thread *)mem;
224 	EVENTHANDLER_INVOKE(thread_fini, td);
225 	turnstile_free(td->td_turnstile);
226 	sleepq_free(td->td_sleepqueue);
227 	umtx_thread_fini(td);
228 	seltdfini(td);
229 }
230 
231 /*
232  * For a newly created process,
233  * link up all the structures and its initial threads etc.
234  * called from:
235  * {arch}/{arch}/machdep.c   ia64_init(), init386() etc.
236  * proc_dtor() (should go away)
237  * proc_init()
238  */
239 void
240 proc_linkup0(struct proc *p, struct thread *td)
241 {
242 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
243 	proc_linkup(p, td);
244 }
245 
246 void
247 proc_linkup(struct proc *p, struct thread *td)
248 {
249 
250 	sigqueue_init(&p->p_sigqueue, p);
251 	p->p_ksi = ksiginfo_alloc(1);
252 	if (p->p_ksi != NULL) {
253 		/* XXX p_ksi may be null if ksiginfo zone is not ready */
254 		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
255 	}
256 	LIST_INIT(&p->p_mqnotifier);
257 	p->p_numthreads = 0;
258 	thread_link(td, p);
259 }
260 
261 /*
262  * Initialize global thread allocation resources.
263  */
264 void
265 threadinit(void)
266 {
267 
268 	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
269 	/* leave one number for thread0 */
270 	tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
271 
272 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
273 	    thread_ctor, thread_dtor, thread_init, thread_fini,
274 	    16 - 1, 0);
275 	tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
276 	rw_init(&tidhash_lock, "tidhash");
277 }
278 
279 /*
280  * Place an unused thread on the zombie list.
281  * Use the slpq as that must be unused by now.
282  */
283 void
284 thread_zombie(struct thread *td)
285 {
286 	mtx_lock_spin(&zombie_lock);
287 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
288 	mtx_unlock_spin(&zombie_lock);
289 }
290 
291 /*
292  * Release a thread that has exited after cpu_throw().
293  */
294 void
295 thread_stash(struct thread *td)
296 {
297 	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
298 	thread_zombie(td);
299 }
300 
301 /*
302  * Reap zombie resources.
303  */
304 void
305 thread_reap(void)
306 {
307 	struct thread *td_first, *td_next;
308 
309 	/*
310 	 * Don't even bother to lock if none at this instant,
311 	 * we really don't care about the next instant..
312 	 */
313 	if (!TAILQ_EMPTY(&zombie_threads)) {
314 		mtx_lock_spin(&zombie_lock);
315 		td_first = TAILQ_FIRST(&zombie_threads);
316 		if (td_first)
317 			TAILQ_INIT(&zombie_threads);
318 		mtx_unlock_spin(&zombie_lock);
319 		while (td_first) {
320 			td_next = TAILQ_NEXT(td_first, td_slpq);
321 			if (td_first->td_ucred)
322 				crfree(td_first->td_ucred);
323 			thread_free(td_first);
324 			td_first = td_next;
325 		}
326 	}
327 }
328 
329 /*
330  * Allocate a thread.
331  */
332 struct thread *
333 thread_alloc(int pages)
334 {
335 	struct thread *td;
336 
337 	thread_reap(); /* check if any zombies to get */
338 
339 	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
340 	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
341 	if (!vm_thread_new(td, pages)) {
342 		uma_zfree(thread_zone, td);
343 		return (NULL);
344 	}
345 	cpu_thread_alloc(td);
346 	return (td);
347 }
348 
349 int
350 thread_alloc_stack(struct thread *td, int pages)
351 {
352 
353 	KASSERT(td->td_kstack == 0,
354 	    ("thread_alloc_stack called on a thread with kstack"));
355 	if (!vm_thread_new(td, pages))
356 		return (0);
357 	cpu_thread_alloc(td);
358 	return (1);
359 }
360 
361 /*
362  * Deallocate a thread.
363  */
364 void
365 thread_free(struct thread *td)
366 {
367 
368 	lock_profile_thread_exit(td);
369 	if (td->td_cpuset)
370 		cpuset_rel(td->td_cpuset);
371 	td->td_cpuset = NULL;
372 	cpu_thread_free(td);
373 	if (td->td_kstack != 0)
374 		vm_thread_dispose(td);
375 	uma_zfree(thread_zone, td);
376 }
377 
378 /*
379  * Discard the current thread and exit from its context.
380  * Always called with scheduler locked.
381  *
382  * Because we can't free a thread while we're operating under its context,
383  * push the current thread into our CPU's deadthread holder. This means
384  * we needn't worry about someone else grabbing our context before we
385  * do a cpu_throw().
386  */
387 void
388 thread_exit(void)
389 {
390 	uint64_t runtime, new_switchtime;
391 	struct thread *td;
392 	struct thread *td2;
393 	struct proc *p;
394 	int wakeup_swapper;
395 
396 	td = curthread;
397 	p = td->td_proc;
398 
399 	PROC_SLOCK_ASSERT(p, MA_OWNED);
400 	mtx_assert(&Giant, MA_NOTOWNED);
401 
402 	PROC_LOCK_ASSERT(p, MA_OWNED);
403 	KASSERT(p != NULL, ("thread exiting without a process"));
404 	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
405 	    (long)p->p_pid, td->td_name);
406 	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
407 
408 #ifdef AUDIT
409 	AUDIT_SYSCALL_EXIT(0, td);
410 #endif
411 	umtx_thread_exit(td);
412 	/*
413 	 * drop FPU & debug register state storage, or any other
414 	 * architecture specific resources that
415 	 * would not be on a new untouched process.
416 	 */
417 	cpu_thread_exit(td);	/* XXXSMP */
418 
419 	/*
420 	 * The last thread is left attached to the process
421 	 * So that the whole bundle gets recycled. Skip
422 	 * all this stuff if we never had threads.
423 	 * EXIT clears all sign of other threads when
424 	 * it goes to single threading, so the last thread always
425 	 * takes the short path.
426 	 */
427 	if (p->p_flag & P_HADTHREADS) {
428 		if (p->p_numthreads > 1) {
429 			thread_unlink(td);
430 			td2 = FIRST_THREAD_IN_PROC(p);
431 			sched_exit_thread(td2, td);
432 
433 			/*
434 			 * The test below is NOT true if we are the
435 			 * sole exiting thread. P_STOPPED_SINGLE is unset
436 			 * in exit1() after it is the only survivor.
437 			 */
438 			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
439 				if (p->p_numthreads == p->p_suspcount) {
440 					thread_lock(p->p_singlethread);
441 					wakeup_swapper = thread_unsuspend_one(
442 						p->p_singlethread);
443 					thread_unlock(p->p_singlethread);
444 					if (wakeup_swapper)
445 						kick_proc0();
446 				}
447 			}
448 
449 			atomic_add_int(&td->td_proc->p_exitthreads, 1);
450 			PCPU_SET(deadthread, td);
451 		} else {
452 			/*
453 			 * The last thread is exiting.. but not through exit()
454 			 */
455 			panic ("thread_exit: Last thread exiting on its own");
456 		}
457 	}
458 #ifdef	HWPMC_HOOKS
459 	/*
460 	 * If this thread is part of a process that is being tracked by hwpmc(4),
461 	 * inform the module of the thread's impending exit.
462 	 */
463 	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
464 		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
465 #endif
466 	PROC_UNLOCK(p);
467 
468 	/* Do the same timestamp bookkeeping that mi_switch() would do. */
469 	new_switchtime = cpu_ticks();
470 	runtime = new_switchtime - PCPU_GET(switchtime);
471 	td->td_runtime += runtime;
472 	td->td_incruntime += runtime;
473 	PCPU_SET(switchtime, new_switchtime);
474 	PCPU_SET(switchticks, ticks);
475 	PCPU_INC(cnt.v_swtch);
476 
477 	/* Save our resource usage in our process. */
478 	td->td_ru.ru_nvcsw++;
479 	ruxagg(p, td);
480 	rucollect(&p->p_ru, &td->td_ru);
481 
482 	thread_lock(td);
483 	PROC_SUNLOCK(p);
484 	td->td_state = TDS_INACTIVE;
485 #ifdef WITNESS
486 	witness_thread_exit(td);
487 #endif
488 	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
489 	sched_throw(td);
490 	panic("I'm a teapot!");
491 	/* NOTREACHED */
492 }
493 
494 /*
495  * Do any thread specific cleanups that may be needed in wait()
496  * called with Giant, proc and schedlock not held.
497  */
498 void
499 thread_wait(struct proc *p)
500 {
501 	struct thread *td;
502 
503 	mtx_assert(&Giant, MA_NOTOWNED);
504 	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
505 	td = FIRST_THREAD_IN_PROC(p);
506 	/* Lock the last thread so we spin until it exits cpu_throw(). */
507 	thread_lock(td);
508 	thread_unlock(td);
509 	/* Wait for any remaining threads to exit cpu_throw(). */
510 	while (p->p_exitthreads)
511 		sched_relinquish(curthread);
512 	lock_profile_thread_exit(td);
513 	cpuset_rel(td->td_cpuset);
514 	td->td_cpuset = NULL;
515 	cpu_thread_clean(td);
516 	crfree(td->td_ucred);
517 	thread_reap();	/* check for zombie threads etc. */
518 }
519 
520 /*
521  * Link a thread to a process.
522  * set up anything that needs to be initialized for it to
523  * be used by the process.
524  */
525 void
526 thread_link(struct thread *td, struct proc *p)
527 {
528 
529 	/*
530 	 * XXX This can't be enabled because it's called for proc0 before
531 	 * its lock has been created.
532 	 * PROC_LOCK_ASSERT(p, MA_OWNED);
533 	 */
534 	td->td_state    = TDS_INACTIVE;
535 	td->td_proc     = p;
536 	td->td_flags    = TDF_INMEM;
537 
538 	LIST_INIT(&td->td_contested);
539 	LIST_INIT(&td->td_lprof[0]);
540 	LIST_INIT(&td->td_lprof[1]);
541 	sigqueue_init(&td->td_sigqueue, p);
542 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
543 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
544 	p->p_numthreads++;
545 }
546 
547 /*
548  * Convert a process with one thread to an unthreaded process.
549  */
550 void
551 thread_unthread(struct thread *td)
552 {
553 	struct proc *p = td->td_proc;
554 
555 	KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
556 	p->p_flag &= ~P_HADTHREADS;
557 }
558 
559 /*
560  * Called from:
561  *  thread_exit()
562  */
563 void
564 thread_unlink(struct thread *td)
565 {
566 	struct proc *p = td->td_proc;
567 
568 	PROC_LOCK_ASSERT(p, MA_OWNED);
569 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
570 	p->p_numthreads--;
571 	/* could clear a few other things here */
572 	/* Must  NOT clear links to proc! */
573 }
574 
575 static int
576 calc_remaining(struct proc *p, int mode)
577 {
578 	int remaining;
579 
580 	PROC_LOCK_ASSERT(p, MA_OWNED);
581 	PROC_SLOCK_ASSERT(p, MA_OWNED);
582 	if (mode == SINGLE_EXIT)
583 		remaining = p->p_numthreads;
584 	else if (mode == SINGLE_BOUNDARY)
585 		remaining = p->p_numthreads - p->p_boundary_count;
586 	else if (mode == SINGLE_NO_EXIT)
587 		remaining = p->p_numthreads - p->p_suspcount;
588 	else
589 		panic("calc_remaining: wrong mode %d", mode);
590 	return (remaining);
591 }
592 
593 /*
594  * Enforce single-threading.
595  *
596  * Returns 1 if the caller must abort (another thread is waiting to
597  * exit the process or similar). Process is locked!
598  * Returns 0 when you are successfully the only thread running.
599  * A process has successfully single threaded in the suspend mode when
600  * There are no threads in user mode. Threads in the kernel must be
601  * allowed to continue until they get to the user boundary. They may even
602  * copy out their return values and data before suspending. They may however be
603  * accelerated in reaching the user boundary as we will wake up
604  * any sleeping threads that are interruptable. (PCATCH).
605  */
606 int
607 thread_single(int mode)
608 {
609 	struct thread *td;
610 	struct thread *td2;
611 	struct proc *p;
612 	int remaining, wakeup_swapper;
613 
614 	td = curthread;
615 	p = td->td_proc;
616 	mtx_assert(&Giant, MA_NOTOWNED);
617 	PROC_LOCK_ASSERT(p, MA_OWNED);
618 	KASSERT((td != NULL), ("curthread is NULL"));
619 
620 	if ((p->p_flag & P_HADTHREADS) == 0)
621 		return (0);
622 
623 	/* Is someone already single threading? */
624 	if (p->p_singlethread != NULL && p->p_singlethread != td)
625 		return (1);
626 
627 	if (mode == SINGLE_EXIT) {
628 		p->p_flag |= P_SINGLE_EXIT;
629 		p->p_flag &= ~P_SINGLE_BOUNDARY;
630 	} else {
631 		p->p_flag &= ~P_SINGLE_EXIT;
632 		if (mode == SINGLE_BOUNDARY)
633 			p->p_flag |= P_SINGLE_BOUNDARY;
634 		else
635 			p->p_flag &= ~P_SINGLE_BOUNDARY;
636 	}
637 	p->p_flag |= P_STOPPED_SINGLE;
638 	PROC_SLOCK(p);
639 	p->p_singlethread = td;
640 	remaining = calc_remaining(p, mode);
641 	while (remaining != 1) {
642 		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
643 			goto stopme;
644 		wakeup_swapper = 0;
645 		FOREACH_THREAD_IN_PROC(p, td2) {
646 			if (td2 == td)
647 				continue;
648 			thread_lock(td2);
649 			td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
650 			if (TD_IS_INHIBITED(td2)) {
651 				switch (mode) {
652 				case SINGLE_EXIT:
653 					if (TD_IS_SUSPENDED(td2))
654 						wakeup_swapper |=
655 						    thread_unsuspend_one(td2);
656 					if (TD_ON_SLEEPQ(td2) &&
657 					    (td2->td_flags & TDF_SINTR))
658 						wakeup_swapper |=
659 						    sleepq_abort(td2, EINTR);
660 					break;
661 				case SINGLE_BOUNDARY:
662 					if (TD_IS_SUSPENDED(td2) &&
663 					    !(td2->td_flags & TDF_BOUNDARY))
664 						wakeup_swapper |=
665 						    thread_unsuspend_one(td2);
666 					if (TD_ON_SLEEPQ(td2) &&
667 					    (td2->td_flags & TDF_SINTR))
668 						wakeup_swapper |=
669 						    sleepq_abort(td2, ERESTART);
670 					break;
671 				case SINGLE_NO_EXIT:
672 					if (TD_IS_SUSPENDED(td2) &&
673 					    !(td2->td_flags & TDF_BOUNDARY))
674 						wakeup_swapper |=
675 						    thread_unsuspend_one(td2);
676 					if (TD_ON_SLEEPQ(td2) &&
677 					    (td2->td_flags & TDF_SINTR))
678 						wakeup_swapper |=
679 						    sleepq_abort(td2, ERESTART);
680 					break;
681 				default:
682 					break;
683 				}
684 			}
685 #ifdef SMP
686 			else if (TD_IS_RUNNING(td2) && td != td2) {
687 				forward_signal(td2);
688 			}
689 #endif
690 			thread_unlock(td2);
691 		}
692 		if (wakeup_swapper)
693 			kick_proc0();
694 		remaining = calc_remaining(p, mode);
695 
696 		/*
697 		 * Maybe we suspended some threads.. was it enough?
698 		 */
699 		if (remaining == 1)
700 			break;
701 
702 stopme:
703 		/*
704 		 * Wake us up when everyone else has suspended.
705 		 * In the mean time we suspend as well.
706 		 */
707 		thread_suspend_switch(td);
708 		remaining = calc_remaining(p, mode);
709 	}
710 	if (mode == SINGLE_EXIT) {
711 		/*
712 		 * We have gotten rid of all the other threads and we
713 		 * are about to either exit or exec. In either case,
714 		 * we try our utmost  to revert to being a non-threaded
715 		 * process.
716 		 */
717 		p->p_singlethread = NULL;
718 		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
719 		thread_unthread(td);
720 	}
721 	PROC_SUNLOCK(p);
722 	return (0);
723 }
724 
725 /*
726  * Called in from locations that can safely check to see
727  * whether we have to suspend or at least throttle for a
728  * single-thread event (e.g. fork).
729  *
730  * Such locations include userret().
731  * If the "return_instead" argument is non zero, the thread must be able to
732  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
733  *
734  * The 'return_instead' argument tells the function if it may do a
735  * thread_exit() or suspend, or whether the caller must abort and back
736  * out instead.
737  *
738  * If the thread that set the single_threading request has set the
739  * P_SINGLE_EXIT bit in the process flags then this call will never return
740  * if 'return_instead' is false, but will exit.
741  *
742  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
743  *---------------+--------------------+---------------------
744  *       0       | returns 0          |   returns 0 or 1
745  *               | when ST ends       |   immediatly
746  *---------------+--------------------+---------------------
747  *       1       | thread exits       |   returns 1
748  *               |                    |  immediatly
749  * 0 = thread_exit() or suspension ok,
750  * other = return error instead of stopping the thread.
751  *
752  * While a full suspension is under effect, even a single threading
753  * thread would be suspended if it made this call (but it shouldn't).
754  * This call should only be made from places where
755  * thread_exit() would be safe as that may be the outcome unless
756  * return_instead is set.
757  */
758 int
759 thread_suspend_check(int return_instead)
760 {
761 	struct thread *td;
762 	struct proc *p;
763 	int wakeup_swapper;
764 
765 	td = curthread;
766 	p = td->td_proc;
767 	mtx_assert(&Giant, MA_NOTOWNED);
768 	PROC_LOCK_ASSERT(p, MA_OWNED);
769 	while (P_SHOULDSTOP(p) ||
770 	      ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
771 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
772 			KASSERT(p->p_singlethread != NULL,
773 			    ("singlethread not set"));
774 			/*
775 			 * The only suspension in action is a
776 			 * single-threading. Single threader need not stop.
777 			 * XXX Should be safe to access unlocked
778 			 * as it can only be set to be true by us.
779 			 */
780 			if (p->p_singlethread == td)
781 				return (0);	/* Exempt from stopping. */
782 		}
783 		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
784 			return (EINTR);
785 
786 		/* Should we goto user boundary if we didn't come from there? */
787 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
788 		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
789 			return (ERESTART);
790 
791 		/*
792 		 * If the process is waiting for us to exit,
793 		 * this thread should just suicide.
794 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
795 		 */
796 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
797 			PROC_UNLOCK(p);
798 			tidhash_remove(td);
799 			PROC_LOCK(p);
800 			tdsigcleanup(td);
801 			PROC_SLOCK(p);
802 			thread_stopped(p);
803 			thread_exit();
804 		}
805 
806 		PROC_SLOCK(p);
807 		thread_stopped(p);
808 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
809 			if (p->p_numthreads == p->p_suspcount + 1) {
810 				thread_lock(p->p_singlethread);
811 				wakeup_swapper =
812 				    thread_unsuspend_one(p->p_singlethread);
813 				thread_unlock(p->p_singlethread);
814 				if (wakeup_swapper)
815 					kick_proc0();
816 			}
817 		}
818 		PROC_UNLOCK(p);
819 		thread_lock(td);
820 		/*
821 		 * When a thread suspends, it just
822 		 * gets taken off all queues.
823 		 */
824 		thread_suspend_one(td);
825 		if (return_instead == 0) {
826 			p->p_boundary_count++;
827 			td->td_flags |= TDF_BOUNDARY;
828 		}
829 		PROC_SUNLOCK(p);
830 		mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
831 		if (return_instead == 0)
832 			td->td_flags &= ~TDF_BOUNDARY;
833 		thread_unlock(td);
834 		PROC_LOCK(p);
835 		if (return_instead == 0) {
836 			PROC_SLOCK(p);
837 			p->p_boundary_count--;
838 			PROC_SUNLOCK(p);
839 		}
840 	}
841 	return (0);
842 }
843 
844 void
845 thread_suspend_switch(struct thread *td)
846 {
847 	struct proc *p;
848 
849 	p = td->td_proc;
850 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
851 	PROC_LOCK_ASSERT(p, MA_OWNED);
852 	PROC_SLOCK_ASSERT(p, MA_OWNED);
853 	/*
854 	 * We implement thread_suspend_one in stages here to avoid
855 	 * dropping the proc lock while the thread lock is owned.
856 	 */
857 	thread_stopped(p);
858 	p->p_suspcount++;
859 	PROC_UNLOCK(p);
860 	thread_lock(td);
861 	td->td_flags &= ~TDF_NEEDSUSPCHK;
862 	TD_SET_SUSPENDED(td);
863 	sched_sleep(td, 0);
864 	PROC_SUNLOCK(p);
865 	DROP_GIANT();
866 	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
867 	thread_unlock(td);
868 	PICKUP_GIANT();
869 	PROC_LOCK(p);
870 	PROC_SLOCK(p);
871 }
872 
873 void
874 thread_suspend_one(struct thread *td)
875 {
876 	struct proc *p = td->td_proc;
877 
878 	PROC_SLOCK_ASSERT(p, MA_OWNED);
879 	THREAD_LOCK_ASSERT(td, MA_OWNED);
880 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
881 	p->p_suspcount++;
882 	td->td_flags &= ~TDF_NEEDSUSPCHK;
883 	TD_SET_SUSPENDED(td);
884 	sched_sleep(td, 0);
885 }
886 
887 int
888 thread_unsuspend_one(struct thread *td)
889 {
890 	struct proc *p = td->td_proc;
891 
892 	PROC_SLOCK_ASSERT(p, MA_OWNED);
893 	THREAD_LOCK_ASSERT(td, MA_OWNED);
894 	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
895 	TD_CLR_SUSPENDED(td);
896 	p->p_suspcount--;
897 	return (setrunnable(td));
898 }
899 
900 /*
901  * Allow all threads blocked by single threading to continue running.
902  */
903 void
904 thread_unsuspend(struct proc *p)
905 {
906 	struct thread *td;
907 	int wakeup_swapper;
908 
909 	PROC_LOCK_ASSERT(p, MA_OWNED);
910 	PROC_SLOCK_ASSERT(p, MA_OWNED);
911 	wakeup_swapper = 0;
912 	if (!P_SHOULDSTOP(p)) {
913                 FOREACH_THREAD_IN_PROC(p, td) {
914 			thread_lock(td);
915 			if (TD_IS_SUSPENDED(td)) {
916 				wakeup_swapper |= thread_unsuspend_one(td);
917 			}
918 			thread_unlock(td);
919 		}
920 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
921 	    (p->p_numthreads == p->p_suspcount)) {
922 		/*
923 		 * Stopping everything also did the job for the single
924 		 * threading request. Now we've downgraded to single-threaded,
925 		 * let it continue.
926 		 */
927 		thread_lock(p->p_singlethread);
928 		wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
929 		thread_unlock(p->p_singlethread);
930 	}
931 	if (wakeup_swapper)
932 		kick_proc0();
933 }
934 
935 /*
936  * End the single threading mode..
937  */
938 void
939 thread_single_end(void)
940 {
941 	struct thread *td;
942 	struct proc *p;
943 	int wakeup_swapper;
944 
945 	td = curthread;
946 	p = td->td_proc;
947 	PROC_LOCK_ASSERT(p, MA_OWNED);
948 	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
949 	PROC_SLOCK(p);
950 	p->p_singlethread = NULL;
951 	wakeup_swapper = 0;
952 	/*
953 	 * If there are other threads they may now run,
954 	 * unless of course there is a blanket 'stop order'
955 	 * on the process. The single threader must be allowed
956 	 * to continue however as this is a bad place to stop.
957 	 */
958 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
959                 FOREACH_THREAD_IN_PROC(p, td) {
960 			thread_lock(td);
961 			if (TD_IS_SUSPENDED(td)) {
962 				wakeup_swapper |= thread_unsuspend_one(td);
963 			}
964 			thread_unlock(td);
965 		}
966 	}
967 	PROC_SUNLOCK(p);
968 	if (wakeup_swapper)
969 		kick_proc0();
970 }
971 
972 struct thread *
973 thread_find(struct proc *p, lwpid_t tid)
974 {
975 	struct thread *td;
976 
977 	PROC_LOCK_ASSERT(p, MA_OWNED);
978 	FOREACH_THREAD_IN_PROC(p, td) {
979 		if (td->td_tid == tid)
980 			break;
981 	}
982 	return (td);
983 }
984 
985 /* Locate a thread by number; return with proc lock held. */
986 struct thread *
987 tdfind(lwpid_t tid, pid_t pid)
988 {
989 #define RUN_THRESH	16
990 	struct thread *td;
991 	int run = 0;
992 
993 	rw_rlock(&tidhash_lock);
994 	LIST_FOREACH(td, TIDHASH(tid), td_hash) {
995 		if (td->td_tid == tid) {
996 			if (pid != -1 && td->td_proc->p_pid != pid) {
997 				td = NULL;
998 				break;
999 			}
1000 			PROC_LOCK(td->td_proc);
1001 			if (td->td_proc->p_state == PRS_NEW) {
1002 				PROC_UNLOCK(td->td_proc);
1003 				td = NULL;
1004 				break;
1005 			}
1006 			if (run > RUN_THRESH) {
1007 				if (rw_try_upgrade(&tidhash_lock)) {
1008 					LIST_REMOVE(td, td_hash);
1009 					LIST_INSERT_HEAD(TIDHASH(td->td_tid),
1010 						td, td_hash);
1011 					rw_wunlock(&tidhash_lock);
1012 					return (td);
1013 				}
1014 			}
1015 			break;
1016 		}
1017 		run++;
1018 	}
1019 	rw_runlock(&tidhash_lock);
1020 	return (td);
1021 }
1022 
1023 void
1024 tidhash_add(struct thread *td)
1025 {
1026 	rw_wlock(&tidhash_lock);
1027 	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
1028 	rw_wunlock(&tidhash_lock);
1029 }
1030 
1031 void
1032 tidhash_remove(struct thread *td)
1033 {
1034 	rw_wlock(&tidhash_lock);
1035 	LIST_REMOVE(td, td_hash);
1036 	rw_wunlock(&tidhash_lock);
1037 }
1038