xref: /freebsd/sys/kern/kern_thread.c (revision fdcac92868ae2506749bb4edc925e754f2118006)
144990b8cSJulian Elischer /*
244990b8cSJulian Elischer  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
344990b8cSJulian Elischer  *  All rights reserved.
444990b8cSJulian Elischer  *
544990b8cSJulian Elischer  * Redistribution and use in source and binary forms, with or without
644990b8cSJulian Elischer  * modification, are permitted provided that the following conditions
744990b8cSJulian Elischer  * are met:
844990b8cSJulian Elischer  * 1. Redistributions of source code must retain the above copyright
944990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer as
1044990b8cSJulian Elischer  *    the first lines of this file unmodified other than the possible
1144990b8cSJulian Elischer  *    addition of one or more copyright notices.
1244990b8cSJulian Elischer  * 2. Redistributions in binary form must reproduce the above copyright
1344990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer in the
1444990b8cSJulian Elischer  *    documentation and/or other materials provided with the distribution.
1544990b8cSJulian Elischer  *
1644990b8cSJulian Elischer  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1744990b8cSJulian Elischer  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844990b8cSJulian Elischer  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1944990b8cSJulian Elischer  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2044990b8cSJulian Elischer  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2144990b8cSJulian Elischer  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2244990b8cSJulian Elischer  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2344990b8cSJulian Elischer  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2444990b8cSJulian Elischer  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2544990b8cSJulian Elischer  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2644990b8cSJulian Elischer  * DAMAGE.
2744990b8cSJulian Elischer  */
2844990b8cSJulian Elischer 
29677b542eSDavid E. O'Brien #include <sys/cdefs.h>
30677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
31677b542eSDavid E. O'Brien 
3244990b8cSJulian Elischer #include <sys/param.h>
3344990b8cSJulian Elischer #include <sys/systm.h>
3444990b8cSJulian Elischer #include <sys/kernel.h>
3544990b8cSJulian Elischer #include <sys/lock.h>
3644990b8cSJulian Elischer #include <sys/malloc.h>
3744990b8cSJulian Elischer #include <sys/mutex.h>
3844990b8cSJulian Elischer #include <sys/proc.h>
39904f1b77SJulian Elischer #include <sys/smp.h>
4044990b8cSJulian Elischer #include <sys/sysctl.h>
415c8329edSJulian Elischer #include <sys/sysproto.h>
4244990b8cSJulian Elischer #include <sys/filedesc.h>
43de028f5aSJeff Roberson #include <sys/sched.h>
4444990b8cSJulian Elischer #include <sys/signalvar.h>
4544f3b092SJohn Baldwin #include <sys/sleepqueue.h>
4644990b8cSJulian Elischer #include <sys/sx.h>
47de028f5aSJeff Roberson #include <sys/tty.h>
48961a7b24SJohn Baldwin #include <sys/turnstile.h>
4944990b8cSJulian Elischer #include <sys/user.h>
5044990b8cSJulian Elischer #include <sys/kse.h>
5144990b8cSJulian Elischer #include <sys/ktr.h>
52c76e33b6SJonathan Mini #include <sys/ucontext.h>
5344990b8cSJulian Elischer 
5444990b8cSJulian Elischer #include <vm/vm.h>
5549a2507bSAlan Cox #include <vm/vm_extern.h>
5644990b8cSJulian Elischer #include <vm/vm_object.h>
5744990b8cSJulian Elischer #include <vm/pmap.h>
5844990b8cSJulian Elischer #include <vm/uma.h>
5944990b8cSJulian Elischer #include <vm/vm_map.h>
6044990b8cSJulian Elischer 
6102fb42b0SPeter Wemm #include <machine/frame.h>
6202fb42b0SPeter Wemm 
6344990b8cSJulian Elischer /*
644f0db5e0SJulian Elischer  * KSEGRP related storage.
6544990b8cSJulian Elischer  */
664f0db5e0SJulian Elischer static uma_zone_t ksegrp_zone;
674f0db5e0SJulian Elischer static uma_zone_t kse_zone;
6844990b8cSJulian Elischer static uma_zone_t thread_zone;
695215b187SJeff Roberson static uma_zone_t upcall_zone;
7044990b8cSJulian Elischer 
714f0db5e0SJulian Elischer /* DEBUG ONLY */
7244990b8cSJulian Elischer SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
73696058c3SJulian Elischer static int thread_debug = 0;
74696058c3SJulian Elischer SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
75696058c3SJulian Elischer 	&thread_debug, 0, "thread debug");
76fdc5ecd2SDavid Xu 
7784eef27dSJulian Elischer static int max_threads_per_proc = 1500;
78fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
794f0db5e0SJulian Elischer 	&max_threads_per_proc, 0, "Limit on threads per proc");
804f0db5e0SJulian Elischer 
8184eef27dSJulian Elischer static int max_groups_per_proc = 500;
82fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
83fdc5ecd2SDavid Xu 	&max_groups_per_proc, 0, "Limit on thread groups per proc");
84fdc5ecd2SDavid Xu 
850252d203SDavid Xu static int max_threads_hits;
860252d203SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
870252d203SDavid Xu 	&max_threads_hits, 0, "");
880252d203SDavid Xu 
895215b187SJeff Roberson static int virtual_cpu;
905215b187SJeff Roberson 
9144990b8cSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
9244990b8cSJulian Elischer 
935215b187SJeff Roberson TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
945c8329edSJulian Elischer TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
955c8329edSJulian Elischer TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
965215b187SJeff Roberson TAILQ_HEAD(, kse_upcall) zombie_upcalls =
975215b187SJeff Roberson 	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
985215b187SJeff Roberson struct mtx kse_zombie_lock;
995215b187SJeff Roberson MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
10044990b8cSJulian Elischer 
101696058c3SJulian Elischer static void kse_purge(struct proc *p, struct thread *td);
1025215b187SJeff Roberson static void kse_purge_group(struct thread *td);
1034b4866edSDavid Xu static int thread_update_usr_ticks(struct thread *td, int user);
1045215b187SJeff Roberson static void thread_alloc_spare(struct thread *td, struct thread *spare);
1055215b187SJeff Roberson 
1065215b187SJeff Roberson static int
1075215b187SJeff Roberson sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
1085215b187SJeff Roberson {
1095215b187SJeff Roberson 	int error, new_val;
1105215b187SJeff Roberson 	int def_val;
1115215b187SJeff Roberson 
1125215b187SJeff Roberson #ifdef SMP
1135215b187SJeff Roberson 	def_val = mp_ncpus;
1145215b187SJeff Roberson #else
1155215b187SJeff Roberson 	def_val = 1;
1165215b187SJeff Roberson #endif
1175215b187SJeff Roberson 	if (virtual_cpu == 0)
1185215b187SJeff Roberson 		new_val = def_val;
1195215b187SJeff Roberson 	else
1205215b187SJeff Roberson 		new_val = virtual_cpu;
1215215b187SJeff Roberson 	error = sysctl_handle_int(oidp, &new_val, 0, req);
1225215b187SJeff Roberson         if (error != 0 || req->newptr == NULL)
1235215b187SJeff Roberson 		return (error);
1245215b187SJeff Roberson 	if (new_val < 0)
1255215b187SJeff Roberson 		return (EINVAL);
1265215b187SJeff Roberson 	virtual_cpu = new_val;
1275215b187SJeff Roberson 	return (0);
1285215b187SJeff Roberson }
1295215b187SJeff Roberson 
1305215b187SJeff Roberson /* DEBUG ONLY */
1315215b187SJeff Roberson SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
1325215b187SJeff Roberson 	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
1335215b187SJeff Roberson 	"debug virtual cpus");
1345c8329edSJulian Elischer 
13544990b8cSJulian Elischer /*
136fdcac928SMarcel Moolenaar  * Thread ID allocator. The allocator keeps track of assigned IDs by
137fdcac928SMarcel Moolenaar  * using a bitmap. The bitmap is created in parts. The parts are linked
138fdcac928SMarcel Moolenaar  * together.
139fdcac928SMarcel Moolenaar  */
140fdcac928SMarcel Moolenaar typedef u_long tid_bitmap_word;
141fdcac928SMarcel Moolenaar 
142fdcac928SMarcel Moolenaar #define	TID_IDS_PER_PART	1024
143fdcac928SMarcel Moolenaar #define	TID_IDS_PER_IDX		(sizeof(tid_bitmap_word) << 3)
144fdcac928SMarcel Moolenaar #define	TID_BITMAP_SIZE		(TID_IDS_PER_PART / TID_IDS_PER_IDX)
145fdcac928SMarcel Moolenaar #define	TID_MIN			(PID_MAX + 1)
146fdcac928SMarcel Moolenaar 
147fdcac928SMarcel Moolenaar struct tid_bitmap_part {
148fdcac928SMarcel Moolenaar 	STAILQ_ENTRY(tid_bitmap_part) bmp_next;
149fdcac928SMarcel Moolenaar 	tid_bitmap_word	bmp_bitmap[TID_BITMAP_SIZE];
150fdcac928SMarcel Moolenaar 	int		bmp_base;
151fdcac928SMarcel Moolenaar 	int		bmp_free;
152fdcac928SMarcel Moolenaar };
153fdcac928SMarcel Moolenaar 
154fdcac928SMarcel Moolenaar static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap =
155fdcac928SMarcel Moolenaar     STAILQ_HEAD_INITIALIZER(tid_bitmap);
156fdcac928SMarcel Moolenaar static uma_zone_t tid_zone;
157fdcac928SMarcel Moolenaar 
158fdcac928SMarcel Moolenaar struct mtx tid_lock;
159fdcac928SMarcel Moolenaar MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF);
160fdcac928SMarcel Moolenaar 
161fdcac928SMarcel Moolenaar /*
162696058c3SJulian Elischer  * Prepare a thread for use.
16344990b8cSJulian Elischer  */
16444990b8cSJulian Elischer static void
16544990b8cSJulian Elischer thread_ctor(void *mem, int size, void *arg)
16644990b8cSJulian Elischer {
16744990b8cSJulian Elischer 	struct thread	*td;
16844990b8cSJulian Elischer 
16944990b8cSJulian Elischer 	td = (struct thread *)mem;
170fdcac928SMarcel Moolenaar 	td->td_tid = 0;
17171fad9fdSJulian Elischer 	td->td_state = TDS_INACTIVE;
172060563ecSJulian Elischer 	td->td_oncpu	= NOCPU;
173139b7550SJohn Baldwin 	td->td_critnest = 1;
17444990b8cSJulian Elischer }
17544990b8cSJulian Elischer 
17644990b8cSJulian Elischer /*
17744990b8cSJulian Elischer  * Reclaim a thread after use.
17844990b8cSJulian Elischer  */
17944990b8cSJulian Elischer static void
18044990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg)
18144990b8cSJulian Elischer {
18244990b8cSJulian Elischer 	struct thread *td;
183fdcac928SMarcel Moolenaar 	struct tid_bitmap_part *bmp;
184fdcac928SMarcel Moolenaar 	int bit, idx, tid;
18544990b8cSJulian Elischer 
18644990b8cSJulian Elischer 	td = (struct thread *)mem;
18744990b8cSJulian Elischer 
188fdcac928SMarcel Moolenaar 	if (td->td_tid > PID_MAX) {
189fdcac928SMarcel Moolenaar 		STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
190fdcac928SMarcel Moolenaar 			if (td->td_tid >= bmp->bmp_base &&
191fdcac928SMarcel Moolenaar 			    td->td_tid < bmp->bmp_base + TID_IDS_PER_PART)
192fdcac928SMarcel Moolenaar 				break;
193fdcac928SMarcel Moolenaar 		}
194fdcac928SMarcel Moolenaar 		KASSERT(bmp != NULL, ("No TID bitmap?"));
195fdcac928SMarcel Moolenaar 		mtx_lock(&tid_lock);
196fdcac928SMarcel Moolenaar 		tid = td->td_tid - bmp->bmp_base;
197fdcac928SMarcel Moolenaar 		idx = tid / TID_IDS_PER_IDX;
198fdcac928SMarcel Moolenaar 		bit = 1UL << (tid % TID_IDS_PER_IDX);
199fdcac928SMarcel Moolenaar 		bmp->bmp_bitmap[idx] |= bit;
200fdcac928SMarcel Moolenaar 		bmp->bmp_free++;
201fdcac928SMarcel Moolenaar 		mtx_unlock(&tid_lock);
202fdcac928SMarcel Moolenaar 	}
203fdcac928SMarcel Moolenaar 
20444990b8cSJulian Elischer #ifdef INVARIANTS
20544990b8cSJulian Elischer 	/* Verify that this thread is in a safe state to free. */
20644990b8cSJulian Elischer 	switch (td->td_state) {
20771fad9fdSJulian Elischer 	case TDS_INHIBITED:
20871fad9fdSJulian Elischer 	case TDS_RUNNING:
20971fad9fdSJulian Elischer 	case TDS_CAN_RUN:
21044990b8cSJulian Elischer 	case TDS_RUNQ:
21144990b8cSJulian Elischer 		/*
21244990b8cSJulian Elischer 		 * We must never unlink a thread that is in one of
21344990b8cSJulian Elischer 		 * these states, because it is currently active.
21444990b8cSJulian Elischer 		 */
21544990b8cSJulian Elischer 		panic("bad state for thread unlinking");
21644990b8cSJulian Elischer 		/* NOTREACHED */
21771fad9fdSJulian Elischer 	case TDS_INACTIVE:
21844990b8cSJulian Elischer 		break;
21944990b8cSJulian Elischer 	default:
22044990b8cSJulian Elischer 		panic("bad thread state");
22144990b8cSJulian Elischer 		/* NOTREACHED */
22244990b8cSJulian Elischer 	}
22344990b8cSJulian Elischer #endif
22444990b8cSJulian Elischer }
22544990b8cSJulian Elischer 
22644990b8cSJulian Elischer /*
22744990b8cSJulian Elischer  * Initialize type-stable parts of a thread (when newly created).
22844990b8cSJulian Elischer  */
22944990b8cSJulian Elischer static void
23044990b8cSJulian Elischer thread_init(void *mem, int size)
23144990b8cSJulian Elischer {
23244990b8cSJulian Elischer 	struct thread	*td;
23344990b8cSJulian Elischer 
23444990b8cSJulian Elischer 	td = (struct thread *)mem;
23549a2507bSAlan Cox 	vm_thread_new(td, 0);
23644990b8cSJulian Elischer 	cpu_thread_setup(td);
23744f3b092SJohn Baldwin 	td->td_sleepqueue = sleepq_alloc();
238961a7b24SJohn Baldwin 	td->td_turnstile = turnstile_alloc();
239de028f5aSJeff Roberson 	td->td_sched = (struct td_sched *)&td[1];
24044990b8cSJulian Elischer }
24144990b8cSJulian Elischer 
24244990b8cSJulian Elischer /*
24344990b8cSJulian Elischer  * Tear down type-stable parts of a thread (just before being discarded).
24444990b8cSJulian Elischer  */
24544990b8cSJulian Elischer static void
24644990b8cSJulian Elischer thread_fini(void *mem, int size)
24744990b8cSJulian Elischer {
24844990b8cSJulian Elischer 	struct thread	*td;
24944990b8cSJulian Elischer 
25044990b8cSJulian Elischer 	td = (struct thread *)mem;
251961a7b24SJohn Baldwin 	turnstile_free(td->td_turnstile);
25244f3b092SJohn Baldwin 	sleepq_free(td->td_sleepqueue);
25349a2507bSAlan Cox 	vm_thread_dispose(td);
25444990b8cSJulian Elischer }
2555215b187SJeff Roberson 
256de028f5aSJeff Roberson /*
257de028f5aSJeff Roberson  * Initialize type-stable parts of a kse (when newly created).
258de028f5aSJeff Roberson  */
259de028f5aSJeff Roberson static void
260de028f5aSJeff Roberson kse_init(void *mem, int size)
261de028f5aSJeff Roberson {
262de028f5aSJeff Roberson 	struct kse	*ke;
263de028f5aSJeff Roberson 
264de028f5aSJeff Roberson 	ke = (struct kse *)mem;
265de028f5aSJeff Roberson 	ke->ke_sched = (struct ke_sched *)&ke[1];
266de028f5aSJeff Roberson }
2675215b187SJeff Roberson 
268de028f5aSJeff Roberson /*
269de028f5aSJeff Roberson  * Initialize type-stable parts of a ksegrp (when newly created).
270de028f5aSJeff Roberson  */
271de028f5aSJeff Roberson static void
272de028f5aSJeff Roberson ksegrp_init(void *mem, int size)
273de028f5aSJeff Roberson {
274de028f5aSJeff Roberson 	struct ksegrp	*kg;
275de028f5aSJeff Roberson 
276de028f5aSJeff Roberson 	kg = (struct ksegrp *)mem;
277de028f5aSJeff Roberson 	kg->kg_sched = (struct kg_sched *)&kg[1];
278de028f5aSJeff Roberson }
27944990b8cSJulian Elischer 
28044990b8cSJulian Elischer /*
2815215b187SJeff Roberson  * KSE is linked into kse group.
2825c8329edSJulian Elischer  */
2835c8329edSJulian Elischer void
2845c8329edSJulian Elischer kse_link(struct kse *ke, struct ksegrp *kg)
2855c8329edSJulian Elischer {
2865c8329edSJulian Elischer 	struct proc *p = kg->kg_proc;
2875c8329edSJulian Elischer 
2885c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
2895c8329edSJulian Elischer 	kg->kg_kses++;
2905c8329edSJulian Elischer 	ke->ke_state	= KES_UNQUEUED;
2915c8329edSJulian Elischer 	ke->ke_proc	= p;
2925c8329edSJulian Elischer 	ke->ke_ksegrp	= kg;
2935c8329edSJulian Elischer 	ke->ke_thread	= NULL;
2945c8329edSJulian Elischer 	ke->ke_oncpu	= NOCPU;
2955215b187SJeff Roberson 	ke->ke_flags	= 0;
2965c8329edSJulian Elischer }
2975c8329edSJulian Elischer 
2985c8329edSJulian Elischer void
2995c8329edSJulian Elischer kse_unlink(struct kse *ke)
3005c8329edSJulian Elischer {
3015c8329edSJulian Elischer 	struct ksegrp *kg;
3025c8329edSJulian Elischer 
3035c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
3045c8329edSJulian Elischer 	kg = ke->ke_ksegrp;
3055c8329edSJulian Elischer 	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
3065215b187SJeff Roberson 	if (ke->ke_state == KES_IDLE) {
3075215b187SJeff Roberson 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
3085215b187SJeff Roberson 		kg->kg_idle_kses--;
3096f8132a8SJulian Elischer 	}
310ab2baa72SDavid Xu 	--kg->kg_kses;
3115c8329edSJulian Elischer 	/*
3125c8329edSJulian Elischer 	 * Aggregate stats from the KSE
3135c8329edSJulian Elischer 	 */
3145c8329edSJulian Elischer 	kse_stash(ke);
3155c8329edSJulian Elischer }
3165c8329edSJulian Elischer 
3175c8329edSJulian Elischer void
3185c8329edSJulian Elischer ksegrp_link(struct ksegrp *kg, struct proc *p)
3195c8329edSJulian Elischer {
3205c8329edSJulian Elischer 
3215c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_threads);
3225c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
3235c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
3245c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
3255215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
3265215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
3275c8329edSJulian Elischer 	kg->kg_proc = p;
3285215b187SJeff Roberson 	/*
3295215b187SJeff Roberson 	 * the following counters are in the -zero- section
3305215b187SJeff Roberson 	 * and may not need clearing
3315215b187SJeff Roberson 	 */
3325c8329edSJulian Elischer 	kg->kg_numthreads = 0;
3335c8329edSJulian Elischer 	kg->kg_runnable   = 0;
3345c8329edSJulian Elischer 	kg->kg_kses       = 0;
3355c8329edSJulian Elischer 	kg->kg_runq_kses  = 0; /* XXXKSE change name */
3365215b187SJeff Roberson 	kg->kg_idle_kses  = 0;
3375215b187SJeff Roberson 	kg->kg_numupcalls = 0;
3385c8329edSJulian Elischer 	/* link it in now that it's consistent */
3395c8329edSJulian Elischer 	p->p_numksegrps++;
3405c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
3415c8329edSJulian Elischer }
3425c8329edSJulian Elischer 
3435c8329edSJulian Elischer void
3445c8329edSJulian Elischer ksegrp_unlink(struct ksegrp *kg)
3455c8329edSJulian Elischer {
3465c8329edSJulian Elischer 	struct proc *p;
3475c8329edSJulian Elischer 
3485c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
3495215b187SJeff Roberson 	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
3505215b187SJeff Roberson 	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
3515215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
3525215b187SJeff Roberson 
3535c8329edSJulian Elischer 	p = kg->kg_proc;
3545c8329edSJulian Elischer 	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
3555c8329edSJulian Elischer 	p->p_numksegrps--;
3565c8329edSJulian Elischer 	/*
3575c8329edSJulian Elischer 	 * Aggregate stats from the KSE
3585c8329edSJulian Elischer 	 */
3595c8329edSJulian Elischer 	ksegrp_stash(kg);
3605c8329edSJulian Elischer }
3615c8329edSJulian Elischer 
3625215b187SJeff Roberson struct kse_upcall *
3635215b187SJeff Roberson upcall_alloc(void)
3645215b187SJeff Roberson {
3655215b187SJeff Roberson 	struct kse_upcall *ku;
3665215b187SJeff Roberson 
36730621e14SDavid Xu 	ku = uma_zalloc(upcall_zone, M_WAITOK);
3685215b187SJeff Roberson 	bzero(ku, sizeof(*ku));
3695215b187SJeff Roberson 	return (ku);
3705215b187SJeff Roberson }
3715215b187SJeff Roberson 
3725215b187SJeff Roberson void
3735215b187SJeff Roberson upcall_free(struct kse_upcall *ku)
3745215b187SJeff Roberson {
3755215b187SJeff Roberson 
3765215b187SJeff Roberson 	uma_zfree(upcall_zone, ku);
3775215b187SJeff Roberson }
3785215b187SJeff Roberson 
3795215b187SJeff Roberson void
3805215b187SJeff Roberson upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
3815215b187SJeff Roberson {
3825215b187SJeff Roberson 
3835215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3845215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
3855215b187SJeff Roberson 	ku->ku_ksegrp = kg;
3865215b187SJeff Roberson 	kg->kg_numupcalls++;
3875215b187SJeff Roberson }
3885215b187SJeff Roberson 
3895215b187SJeff Roberson void
3905215b187SJeff Roberson upcall_unlink(struct kse_upcall *ku)
3915215b187SJeff Roberson {
3925215b187SJeff Roberson 	struct ksegrp *kg = ku->ku_ksegrp;
3935215b187SJeff Roberson 
3945215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3955215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
3965215b187SJeff Roberson 	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
3975215b187SJeff Roberson 	kg->kg_numupcalls--;
3985215b187SJeff Roberson 	upcall_stash(ku);
3995215b187SJeff Roberson }
4005215b187SJeff Roberson 
4015215b187SJeff Roberson void
4025215b187SJeff Roberson upcall_remove(struct thread *td)
4035215b187SJeff Roberson {
4045215b187SJeff Roberson 
4055215b187SJeff Roberson 	if (td->td_upcall) {
4065215b187SJeff Roberson 		td->td_upcall->ku_owner = NULL;
4075215b187SJeff Roberson 		upcall_unlink(td->td_upcall);
4085215b187SJeff Roberson 		td->td_upcall = 0;
4095215b187SJeff Roberson 	}
4105215b187SJeff Roberson }
4115215b187SJeff Roberson 
4125c8329edSJulian Elischer /*
4135215b187SJeff Roberson  * For a newly created process,
4145215b187SJeff Roberson  * link up all the structures and its initial threads etc.
4155c8329edSJulian Elischer  */
4165c8329edSJulian Elischer void
4175c8329edSJulian Elischer proc_linkup(struct proc *p, struct ksegrp *kg,
4185c8329edSJulian Elischer 	    struct kse *ke, struct thread *td)
4195c8329edSJulian Elischer {
4205c8329edSJulian Elischer 
4215c8329edSJulian Elischer 	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
4225c8329edSJulian Elischer 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
4235c8329edSJulian Elischer 	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
4245c8329edSJulian Elischer 	p->p_numksegrps = 0;
4255c8329edSJulian Elischer 	p->p_numthreads = 0;
4265c8329edSJulian Elischer 
4275c8329edSJulian Elischer 	ksegrp_link(kg, p);
4285c8329edSJulian Elischer 	kse_link(ke, kg);
4295c8329edSJulian Elischer 	thread_link(td, kg);
4305c8329edSJulian Elischer }
4315c8329edSJulian Elischer 
432702b2a17SMarcel Moolenaar #ifndef _SYS_SYSPROTO_H_
433702b2a17SMarcel Moolenaar struct kse_switchin_args {
434702b2a17SMarcel Moolenaar 	const struct __mcontext *mcp;
435702b2a17SMarcel Moolenaar 	long val;
436702b2a17SMarcel Moolenaar 	long *loc;
437702b2a17SMarcel Moolenaar };
438702b2a17SMarcel Moolenaar #endif
439702b2a17SMarcel Moolenaar 
440702b2a17SMarcel Moolenaar int
441702b2a17SMarcel Moolenaar kse_switchin(struct thread *td, struct kse_switchin_args *uap)
442702b2a17SMarcel Moolenaar {
443702b2a17SMarcel Moolenaar 	mcontext_t mc;
444702b2a17SMarcel Moolenaar 	int error;
445702b2a17SMarcel Moolenaar 
446702b2a17SMarcel Moolenaar 	error = (uap->mcp == NULL) ? EINVAL : 0;
447702b2a17SMarcel Moolenaar 	if (!error)
448702b2a17SMarcel Moolenaar 		error = copyin(uap->mcp, &mc, sizeof(mc));
449ccb46febSMarcel Moolenaar 	if (!error && uap->loc != NULL)
450ccb46febSMarcel Moolenaar 		error = (suword(uap->loc, uap->val) != 0) ? EINVAL : 0;
451702b2a17SMarcel Moolenaar 	if (!error)
452702b2a17SMarcel Moolenaar 		error = set_mcontext(td, &mc);
453702b2a17SMarcel Moolenaar 	return ((error == 0) ? EJUSTRETURN : error);
454702b2a17SMarcel Moolenaar }
455702b2a17SMarcel Moolenaar 
4565215b187SJeff Roberson /*
4575215b187SJeff Roberson struct kse_thr_interrupt_args {
4585215b187SJeff Roberson 	struct kse_thr_mailbox * tmbx;
459dd7da9aaSDavid Xu 	int cmd;
460dd7da9aaSDavid Xu 	long data;
4615215b187SJeff Roberson };
4625215b187SJeff Roberson */
4635c8329edSJulian Elischer int
4645c8329edSJulian Elischer kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
4655c8329edSJulian Elischer {
46634e80e02SDavid Xu 	struct proc *p;
46734e80e02SDavid Xu 	struct thread *td2;
4685c8329edSJulian Elischer 
469adac9400SDavid Xu 	p = td->td_proc;
470ab2baa72SDavid Xu 
471dd7da9aaSDavid Xu 	if (!(p->p_flag & P_SA))
4728db2431fSDavid Xu 		return (EINVAL);
4739dde3bc9SDavid Xu 
474dd7da9aaSDavid Xu 	switch (uap->cmd) {
475dd7da9aaSDavid Xu 	case KSE_INTR_SENDSIG:
476dd7da9aaSDavid Xu 		if (uap->data < 0 || uap->data > _SIG_MAXSIG)
477dd7da9aaSDavid Xu 			return (EINVAL);
478dd7da9aaSDavid Xu 	case KSE_INTR_INTERRUPT:
479dd7da9aaSDavid Xu 	case KSE_INTR_RESTART:
4809dde3bc9SDavid Xu 		PROC_LOCK(p);
48134e80e02SDavid Xu 		mtx_lock_spin(&sched_lock);
48234e80e02SDavid Xu 		FOREACH_THREAD_IN_PROC(p, td2) {
4839dde3bc9SDavid Xu 			if (td2->td_mailbox == uap->tmbx)
4849dde3bc9SDavid Xu 				break;
4859dde3bc9SDavid Xu 		}
4869dde3bc9SDavid Xu 		if (td2 == NULL) {
4879dde3bc9SDavid Xu 			mtx_unlock_spin(&sched_lock);
4889dde3bc9SDavid Xu 			PROC_UNLOCK(p);
4899dde3bc9SDavid Xu 			return (ESRCH);
4909dde3bc9SDavid Xu 		}
491dd7da9aaSDavid Xu 		if (uap->cmd == KSE_INTR_SENDSIG) {
492dd7da9aaSDavid Xu 			if (uap->data > 0) {
4939dde3bc9SDavid Xu 				td2->td_flags &= ~TDF_INTERRUPT;
4949dde3bc9SDavid Xu 				mtx_unlock_spin(&sched_lock);
495dd7da9aaSDavid Xu 				tdsignal(td2, (int)uap->data, SIGTARGET_TD);
496dd7da9aaSDavid Xu 			} else {
4979dde3bc9SDavid Xu 				mtx_unlock_spin(&sched_lock);
498dd7da9aaSDavid Xu 			}
4999dde3bc9SDavid Xu 		} else {
5009dde3bc9SDavid Xu 			td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
5019dde3bc9SDavid Xu 			if (TD_CAN_UNBIND(td2))
502df9c6cdaSDavid Xu 				td2->td_upcall->ku_flags |= KUF_DOUPCALL;
503dd7da9aaSDavid Xu 			if (uap->cmd == KSE_INTR_INTERRUPT)
5049dde3bc9SDavid Xu 				td2->td_intrval = EINTR;
505dd7da9aaSDavid Xu 			else
5069dde3bc9SDavid Xu 				td2->td_intrval = ERESTART;
50744f3b092SJohn Baldwin 			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
50844f3b092SJohn Baldwin 				sleepq_abort(td2);
50934e80e02SDavid Xu 			mtx_unlock_spin(&sched_lock);
5109dde3bc9SDavid Xu 		}
5119dde3bc9SDavid Xu 		PROC_UNLOCK(p);
512dd7da9aaSDavid Xu 		break;
513dd7da9aaSDavid Xu 	case KSE_INTR_SIGEXIT:
514dd7da9aaSDavid Xu 		if (uap->data < 1 || uap->data > _SIG_MAXSIG)
515dd7da9aaSDavid Xu 			return (EINVAL);
516dd7da9aaSDavid Xu 		PROC_LOCK(p);
517dd7da9aaSDavid Xu 		sigexit(td, (int)uap->data);
518dd7da9aaSDavid Xu 		break;
519dd7da9aaSDavid Xu 	default:
520dd7da9aaSDavid Xu 		return (EINVAL);
521dd7da9aaSDavid Xu 	}
5227b290dd0SDavid Xu 	return (0);
52334e80e02SDavid Xu }
5245c8329edSJulian Elischer 
5255215b187SJeff Roberson /*
5265215b187SJeff Roberson struct kse_exit_args {
5275215b187SJeff Roberson 	register_t dummy;
5285215b187SJeff Roberson };
5295215b187SJeff Roberson */
5305c8329edSJulian Elischer int
5315c8329edSJulian Elischer kse_exit(struct thread *td, struct kse_exit_args *uap)
5325c8329edSJulian Elischer {
5335c8329edSJulian Elischer 	struct proc *p;
5345c8329edSJulian Elischer 	struct ksegrp *kg;
535450c38d0SDavid Xu 	struct kse *ke;
5362b035cbeSJulian Elischer 	struct kse_upcall *ku, *ku2;
5372b035cbeSJulian Elischer 	int    error, count;
5385c8329edSJulian Elischer 
5395c8329edSJulian Elischer 	p = td->td_proc;
5402b035cbeSJulian Elischer 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
5417b290dd0SDavid Xu 		return (EINVAL);
5425c8329edSJulian Elischer 	kg = td->td_ksegrp;
5432b035cbeSJulian Elischer 	count = 0;
5445c8329edSJulian Elischer 	PROC_LOCK(p);
5455c8329edSJulian Elischer 	mtx_lock_spin(&sched_lock);
5462b035cbeSJulian Elischer 	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
5472b035cbeSJulian Elischer 		if (ku2->ku_flags & KUF_EXITING)
5482b035cbeSJulian Elischer 			count++;
5492b035cbeSJulian Elischer 	}
5502b035cbeSJulian Elischer 	if ((kg->kg_numupcalls - count) == 1 &&
5512b035cbeSJulian Elischer 	    (kg->kg_numthreads > 1)) {
5525c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5535c8329edSJulian Elischer 		PROC_UNLOCK(p);
5545c8329edSJulian Elischer 		return (EDEADLK);
5555c8329edSJulian Elischer 	}
5562b035cbeSJulian Elischer 	ku->ku_flags |= KUF_EXITING;
5572b035cbeSJulian Elischer 	mtx_unlock_spin(&sched_lock);
5582b035cbeSJulian Elischer 	PROC_UNLOCK(p);
5592b035cbeSJulian Elischer 	error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
5602b035cbeSJulian Elischer 	PROC_LOCK(p);
5612b035cbeSJulian Elischer 	if (error)
5622b035cbeSJulian Elischer 		psignal(p, SIGSEGV);
5632b035cbeSJulian Elischer 	mtx_lock_spin(&sched_lock);
5645215b187SJeff Roberson 	upcall_remove(td);
5652b035cbeSJulian Elischer 	ke = td->td_kse;
566450c38d0SDavid Xu 	if (p->p_numthreads == 1) {
5675215b187SJeff Roberson 		kse_purge(p, td);
5680e2a4d3aSDavid Xu 		p->p_flag &= ~P_SA;
5695c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5705c8329edSJulian Elischer 		PROC_UNLOCK(p);
5715c8329edSJulian Elischer 	} else {
5725215b187SJeff Roberson 		if (kg->kg_numthreads == 1) { /* Shutdown a group */
5735215b187SJeff Roberson 			kse_purge_group(td);
574450c38d0SDavid Xu 			ke->ke_flags |= KEF_EXIT;
5755215b187SJeff Roberson 		}
576e574e444SDavid Xu 		thread_stopped(p);
5775c8329edSJulian Elischer 		thread_exit();
5785c8329edSJulian Elischer 		/* NOTREACHED */
5795c8329edSJulian Elischer 	}
5807b290dd0SDavid Xu 	return (0);
5815c8329edSJulian Elischer }
5825c8329edSJulian Elischer 
583696058c3SJulian Elischer /*
58493a7aa79SJulian Elischer  * Either becomes an upcall or waits for an awakening event and
5855215b187SJeff Roberson  * then becomes an upcall. Only error cases return.
5865215b187SJeff Roberson  */
5875215b187SJeff Roberson /*
5885215b187SJeff Roberson struct kse_release_args {
589eb117d5cSDavid Xu 	struct timespec *timeout;
5905215b187SJeff Roberson };
591696058c3SJulian Elischer */
5925c8329edSJulian Elischer int
5935c8329edSJulian Elischer kse_release(struct thread *td, struct kse_release_args *uap)
5945c8329edSJulian Elischer {
5955c8329edSJulian Elischer 	struct proc *p;
596696058c3SJulian Elischer 	struct ksegrp *kg;
597cd4f6ebbSDavid Xu 	struct kse_upcall *ku;
598cd4f6ebbSDavid Xu 	struct timespec timeout;
599eb117d5cSDavid Xu 	struct timeval tv;
6009dde3bc9SDavid Xu 	sigset_t sigset;
601eb117d5cSDavid Xu 	int error;
6025c8329edSJulian Elischer 
6035c8329edSJulian Elischer 	p = td->td_proc;
604696058c3SJulian Elischer 	kg = td->td_ksegrp;
605cd4f6ebbSDavid Xu 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
6065c8329edSJulian Elischer 		return (EINVAL);
607eb117d5cSDavid Xu 	if (uap->timeout != NULL) {
608eb117d5cSDavid Xu 		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
609eb117d5cSDavid Xu 			return (error);
610eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
611eb117d5cSDavid Xu 	}
612cd4f6ebbSDavid Xu 	if (td->td_flags & TDF_SA)
6131d5a24beSDavid Xu 		td->td_pflags |= TDP_UPCALLING;
6149dde3bc9SDavid Xu 	else {
6159dde3bc9SDavid Xu 		ku->ku_mflags = fuword(&ku->ku_mailbox->km_flags);
6169dde3bc9SDavid Xu 		if (ku->ku_mflags == -1) {
617eb117d5cSDavid Xu 			PROC_LOCK(p);
6189dde3bc9SDavid Xu 			sigexit(td, SIGSEGV);
6199dde3bc9SDavid Xu 		}
6209dde3bc9SDavid Xu 	}
6219dde3bc9SDavid Xu 	PROC_LOCK(p);
6229dde3bc9SDavid Xu 	if (ku->ku_mflags & KMF_WAITSIGEVENT) {
6239dde3bc9SDavid Xu 		/* UTS wants to wait for signal event */
6249dde3bc9SDavid Xu 		if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL))
6259dde3bc9SDavid Xu 			error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
6269dde3bc9SDavid Xu 			    "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
6279dde3bc9SDavid Xu 		p->p_flag &= ~P_SIGEVENT;
6289dde3bc9SDavid Xu 		sigset = p->p_siglist;
6299dde3bc9SDavid Xu 		PROC_UNLOCK(p);
6309dde3bc9SDavid Xu 		error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
6319dde3bc9SDavid Xu 		    sizeof(sigset));
6329dde3bc9SDavid Xu 	} else {
6339dde3bc9SDavid Xu 		 if (! kg->kg_completed && !(ku->ku_flags & KUF_DOUPCALL)) {
6345215b187SJeff Roberson 			kg->kg_upsleeps++;
6359dde3bc9SDavid Xu 			error = msleep(&kg->kg_completed, &p->p_mtx,
6369dde3bc9SDavid Xu 				PPAUSE|PCATCH, "kserel",
6379dde3bc9SDavid Xu 				(uap->timeout ? tvtohz(&tv) : 0));
6385215b187SJeff Roberson 			kg->kg_upsleeps--;
639cd4f6ebbSDavid Xu 		}
6409dde3bc9SDavid Xu 		PROC_UNLOCK(p);
6419dde3bc9SDavid Xu 	}
642cd4f6ebbSDavid Xu 	if (ku->ku_flags & KUF_DOUPCALL) {
643cd4f6ebbSDavid Xu 		mtx_lock_spin(&sched_lock);
644cd4f6ebbSDavid Xu 		ku->ku_flags &= ~KUF_DOUPCALL;
645cd4f6ebbSDavid Xu 		mtx_unlock_spin(&sched_lock);
64693a7aa79SJulian Elischer 	}
647696058c3SJulian Elischer 	return (0);
6485c8329edSJulian Elischer }
6495c8329edSJulian Elischer 
6505c8329edSJulian Elischer /* struct kse_wakeup_args {
6515c8329edSJulian Elischer 	struct kse_mailbox *mbx;
6525c8329edSJulian Elischer }; */
6535c8329edSJulian Elischer int
6545c8329edSJulian Elischer kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
6555c8329edSJulian Elischer {
6565c8329edSJulian Elischer 	struct proc *p;
6575c8329edSJulian Elischer 	struct ksegrp *kg;
6585215b187SJeff Roberson 	struct kse_upcall *ku;
65993a7aa79SJulian Elischer 	struct thread *td2;
6605c8329edSJulian Elischer 
6615c8329edSJulian Elischer 	p = td->td_proc;
66293a7aa79SJulian Elischer 	td2 = NULL;
6635215b187SJeff Roberson 	ku = NULL;
6645c8329edSJulian Elischer 	/* KSE-enabled processes only, please. */
6650e2a4d3aSDavid Xu 	if (!(p->p_flag & P_SA))
6665215b187SJeff Roberson 		return (EINVAL);
6675215b187SJeff Roberson 	PROC_LOCK(p);
66803ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
6695c8329edSJulian Elischer 	if (uap->mbx) {
6705c8329edSJulian Elischer 		FOREACH_KSEGRP_IN_PROC(p, kg) {
6715215b187SJeff Roberson 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
6725215b187SJeff Roberson 				if (ku->ku_mailbox == uap->mbx)
67393a7aa79SJulian Elischer 					break;
67493a7aa79SJulian Elischer 			}
6755215b187SJeff Roberson 			if (ku)
67693a7aa79SJulian Elischer 				break;
6775c8329edSJulian Elischer 		}
6785c8329edSJulian Elischer 	} else {
6795c8329edSJulian Elischer 		kg = td->td_ksegrp;
6805215b187SJeff Roberson 		if (kg->kg_upsleeps) {
6815215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
6825215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
6835215b187SJeff Roberson 			PROC_UNLOCK(p);
6845215b187SJeff Roberson 			return (0);
6855c8329edSJulian Elischer 		}
6865215b187SJeff Roberson 		ku = TAILQ_FIRST(&kg->kg_upcalls);
6875c8329edSJulian Elischer 	}
6885215b187SJeff Roberson 	if (ku) {
6895215b187SJeff Roberson 		if ((td2 = ku->ku_owner) == NULL) {
6905215b187SJeff Roberson 			panic("%s: no owner", __func__);
691707559e4SJohn Baldwin 		} else if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) &&
6929dde3bc9SDavid Xu 		           ((td2->td_wchan == &kg->kg_completed) ||
6939dde3bc9SDavid Xu 			    (td2->td_wchan == &p->p_siglist &&
6949dde3bc9SDavid Xu 			     (ku->ku_mflags & KMF_WAITSIGEVENT)))) {
69544f3b092SJohn Baldwin 			sleepq_abort(td2);
6965215b187SJeff Roberson 		} else {
6975215b187SJeff Roberson 			ku->ku_flags |= KUF_DOUPCALL;
69803ea4720SJulian Elischer 		}
6995c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
7005215b187SJeff Roberson 		PROC_UNLOCK(p);
7017b290dd0SDavid Xu 		return (0);
7025c8329edSJulian Elischer 	}
70393a7aa79SJulian Elischer 	mtx_unlock_spin(&sched_lock);
7045215b187SJeff Roberson 	PROC_UNLOCK(p);
70593a7aa79SJulian Elischer 	return (ESRCH);
70693a7aa79SJulian Elischer }
7075c8329edSJulian Elischer 
7085c8329edSJulian Elischer /*
7095c8329edSJulian Elischer  * No new KSEG: first call: use current KSE, don't schedule an upcall
7105215b187SJeff Roberson  * All other situations, do allocate max new KSEs and schedule an upcall.
7115c8329edSJulian Elischer  */
7125c8329edSJulian Elischer /* struct kse_create_args {
7135c8329edSJulian Elischer 	struct kse_mailbox *mbx;
7145c8329edSJulian Elischer 	int newgroup;
7155c8329edSJulian Elischer }; */
7165c8329edSJulian Elischer int
7175c8329edSJulian Elischer kse_create(struct thread *td, struct kse_create_args *uap)
7185c8329edSJulian Elischer {
7195c8329edSJulian Elischer 	struct kse *newke;
7205c8329edSJulian Elischer 	struct ksegrp *newkg;
7215c8329edSJulian Elischer 	struct ksegrp *kg;
7225c8329edSJulian Elischer 	struct proc *p;
7235c8329edSJulian Elischer 	struct kse_mailbox mbx;
7245215b187SJeff Roberson 	struct kse_upcall *newku;
725cd4f6ebbSDavid Xu 	int err, ncpus, sa = 0, first = 0;
726cd4f6ebbSDavid Xu 	struct thread *newtd;
7275c8329edSJulian Elischer 
7285c8329edSJulian Elischer 	p = td->td_proc;
7295c8329edSJulian Elischer 	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
7305c8329edSJulian Elischer 		return (err);
7315c8329edSJulian Elischer 
7325215b187SJeff Roberson 	/* Too bad, why hasn't kernel always a cpu counter !? */
7335215b187SJeff Roberson #ifdef SMP
7345215b187SJeff Roberson 	ncpus = mp_ncpus;
7355215b187SJeff Roberson #else
7365215b187SJeff Roberson 	ncpus = 1;
7375215b187SJeff Roberson #endif
738cd4f6ebbSDavid Xu 	if (virtual_cpu != 0)
7395215b187SJeff Roberson 		ncpus = virtual_cpu;
740cd4f6ebbSDavid Xu 	if (!(mbx.km_flags & KMF_BOUND))
741cd4f6ebbSDavid Xu 		sa = TDF_SA;
742075102ccSDavid Xu 	else
743075102ccSDavid Xu 		ncpus = 1;
744661db6daSDavid Xu 	PROC_LOCK(p);
745cd4f6ebbSDavid Xu 	if (!(p->p_flag & P_SA)) {
746cd4f6ebbSDavid Xu 		first = 1;
7470e2a4d3aSDavid Xu 		p->p_flag |= P_SA;
748cd4f6ebbSDavid Xu 	}
749661db6daSDavid Xu 	PROC_UNLOCK(p);
750cd4f6ebbSDavid Xu 	if (!sa && !uap->newgroup && !first)
751cd4f6ebbSDavid Xu 		return (EINVAL);
7525c8329edSJulian Elischer 	kg = td->td_ksegrp;
7535c8329edSJulian Elischer 	if (uap->newgroup) {
7545215b187SJeff Roberson 		/* Have race condition but it is cheap */
755fdc5ecd2SDavid Xu 		if (p->p_numksegrps >= max_groups_per_proc)
756fdc5ecd2SDavid Xu 			return (EPROCLIM);
7575c8329edSJulian Elischer 		/*
7585c8329edSJulian Elischer 		 * If we want a new KSEGRP it doesn't matter whether
7595c8329edSJulian Elischer 		 * we have already fired up KSE mode before or not.
7605215b187SJeff Roberson 		 * We put the process in KSE mode and create a new KSEGRP.
7615c8329edSJulian Elischer 		 */
7625c8329edSJulian Elischer 		newkg = ksegrp_alloc();
7635c8329edSJulian Elischer 		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
7645c8329edSJulian Elischer 		      kg_startzero, kg_endzero));
7655c8329edSJulian Elischer 		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
7665c8329edSJulian Elischer 		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
767ab2baa72SDavid Xu 		PROC_LOCK(p);
7685215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
7695215b187SJeff Roberson 		if (p->p_numksegrps >= max_groups_per_proc) {
7705215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
771ab2baa72SDavid Xu 			PROC_UNLOCK(p);
7729948c47fSDavid Xu 			ksegrp_free(newkg);
7736f8132a8SJulian Elischer 			return (EPROCLIM);
7746f8132a8SJulian Elischer 		}
7759948c47fSDavid Xu 		ksegrp_link(newkg, p);
776ab2baa72SDavid Xu 		sched_fork_ksegrp(kg, newkg);
7775215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
778ab2baa72SDavid Xu 		PROC_UNLOCK(p);
7796f8132a8SJulian Elischer 	} else {
7804184d791SDavid Xu 		if (!first && ((td->td_flags & TDF_SA) ^ sa) != 0)
7814184d791SDavid Xu 			return (EINVAL);
7825215b187SJeff Roberson 		newkg = kg;
7836f8132a8SJulian Elischer 	}
7845215b187SJeff Roberson 
7855215b187SJeff Roberson 	/*
7865215b187SJeff Roberson 	 * Creating upcalls more than number of physical cpu does
7875215b187SJeff Roberson 	 * not help performance.
7885215b187SJeff Roberson 	 */
7895215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus)
7905215b187SJeff Roberson 		return (EPROCLIM);
7915215b187SJeff Roberson 
7925215b187SJeff Roberson 	if (newkg->kg_numupcalls == 0) {
7935215b187SJeff Roberson 		/*
794cd4f6ebbSDavid Xu 		 * Initialize KSE group
795cd4f6ebbSDavid Xu 		 *
796cd4f6ebbSDavid Xu 		 * For multiplxed group, create KSEs as many as physical
797cd4f6ebbSDavid Xu 		 * cpus. This increases concurrent even if userland
798cd4f6ebbSDavid Xu 		 * is not MP safe and can only run on single CPU.
7995215b187SJeff Roberson 		 * In ideal world, every physical cpu should execute a thread.
8005215b187SJeff Roberson 		 * If there is enough KSEs, threads in kernel can be
8015215b187SJeff Roberson 		 * executed parallel on different cpus with full speed,
8025215b187SJeff Roberson 		 * Concurrent in kernel shouldn't be restricted by number of
803cd4f6ebbSDavid Xu 		 * upcalls userland provides. Adding more upcall structures
804cd4f6ebbSDavid Xu 		 * only increases concurrent in userland.
805cd4f6ebbSDavid Xu 		 *
806cd4f6ebbSDavid Xu 		 * For bound thread group, because there is only thread in the
807cd4f6ebbSDavid Xu 		 * group, we only create one KSE for the group. Thread in this
808cd4f6ebbSDavid Xu 		 * kind of group will never schedule an upcall when blocked,
809cd4f6ebbSDavid Xu 		 * this intends to simulate pthread system scope thread.
8105215b187SJeff Roberson 		 */
8115215b187SJeff Roberson 		while (newkg->kg_kses < ncpus) {
8125215b187SJeff Roberson 			newke = kse_alloc();
8135c8329edSJulian Elischer 			bzero(&newke->ke_startzero, RANGEOF(struct kse,
8145c8329edSJulian Elischer 			      ke_startzero, ke_endzero));
8155c8329edSJulian Elischer #if 0
8165215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
8175c8329edSJulian Elischer 			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
8185c8329edSJulian Elischer 			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
8196f8132a8SJulian Elischer 			mtx_unlock_spin(&sched_lock);
8205215b187SJeff Roberson #endif
8215215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
8225c8329edSJulian Elischer 			kse_link(newke, newkg);
823ab2baa72SDavid Xu 			sched_fork_kse(td->td_kse, newke);
8245215b187SJeff Roberson 			/* Add engine */
8255215b187SJeff Roberson 			kse_reassign(newke);
8265c8329edSJulian Elischer 			mtx_unlock_spin(&sched_lock);
8275215b187SJeff Roberson 		}
8285215b187SJeff Roberson 	}
8295215b187SJeff Roberson 	newku = upcall_alloc();
8305215b187SJeff Roberson 	newku->ku_mailbox = uap->mbx;
8315215b187SJeff Roberson 	newku->ku_func = mbx.km_func;
8325215b187SJeff Roberson 	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
8335215b187SJeff Roberson 
8345215b187SJeff Roberson 	/* For the first call this may not have been set */
8355215b187SJeff Roberson 	if (td->td_standin == NULL)
8365215b187SJeff Roberson 		thread_alloc_spare(td, NULL);
8375215b187SJeff Roberson 
8389dde3bc9SDavid Xu 	PROC_LOCK(p);
8395215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus) {
8409dde3bc9SDavid Xu 		PROC_UNLOCK(p);
8413b3df40fSDavid Xu 		upcall_free(newku);
8425215b187SJeff Roberson 		return (EPROCLIM);
8435215b187SJeff Roberson 	}
844af161f22SDavid Xu 	if (first && sa) {
8459dde3bc9SDavid Xu 		SIGSETOR(p->p_siglist, td->td_siglist);
8469dde3bc9SDavid Xu 		SIGEMPTYSET(td->td_siglist);
8479dde3bc9SDavid Xu 		SIGFILLSET(td->td_sigmask);
8489dde3bc9SDavid Xu 		SIG_CANTMASK(td->td_sigmask);
8499dde3bc9SDavid Xu 	}
8509dde3bc9SDavid Xu 	mtx_lock_spin(&sched_lock);
8519dde3bc9SDavid Xu 	PROC_UNLOCK(p);
8525215b187SJeff Roberson 	upcall_link(newku, newkg);
8536ce75196SDavid Xu 	if (mbx.km_quantum)
8546ce75196SDavid Xu 		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
8555215b187SJeff Roberson 
8565215b187SJeff Roberson 	/*
8575215b187SJeff Roberson 	 * Each upcall structure has an owner thread, find which
8585215b187SJeff Roberson 	 * one owns it.
8595215b187SJeff Roberson 	 */
8605215b187SJeff Roberson 	if (uap->newgroup) {
8615215b187SJeff Roberson 		/*
8625215b187SJeff Roberson 		 * Because new ksegrp hasn't thread,
8635215b187SJeff Roberson 		 * create an initial upcall thread to own it.
8645215b187SJeff Roberson 		 */
865cd4f6ebbSDavid Xu 		newtd = thread_schedule_upcall(td, newku);
8665c8329edSJulian Elischer 	} else {
8675c8329edSJulian Elischer 		/*
8685215b187SJeff Roberson 		 * If current thread hasn't an upcall structure,
8695215b187SJeff Roberson 		 * just assign the upcall to it.
8705c8329edSJulian Elischer 		 */
8715215b187SJeff Roberson 		if (td->td_upcall == NULL) {
8725215b187SJeff Roberson 			newku->ku_owner = td;
8735215b187SJeff Roberson 			td->td_upcall = newku;
874cd4f6ebbSDavid Xu 			newtd = td;
8755215b187SJeff Roberson 		} else {
8765c8329edSJulian Elischer 			/*
8775215b187SJeff Roberson 			 * Create a new upcall thread to own it.
8785c8329edSJulian Elischer 			 */
879cd4f6ebbSDavid Xu 			newtd = thread_schedule_upcall(td, newku);
8805215b187SJeff Roberson 		}
8815215b187SJeff Roberson 	}
882cd4f6ebbSDavid Xu 	if (!sa) {
883cd4f6ebbSDavid Xu 		newtd->td_mailbox = mbx.km_curthread;
884cd4f6ebbSDavid Xu 		newtd->td_flags &= ~TDF_SA;
885ab78d4d6SDavid Xu 		if (newtd != td) {
886ab78d4d6SDavid Xu 			mtx_unlock_spin(&sched_lock);
887ab78d4d6SDavid Xu 			cpu_set_upcall_kse(newtd, newku);
888ab78d4d6SDavid Xu 			mtx_lock_spin(&sched_lock);
889ab78d4d6SDavid Xu 		}
890cd4f6ebbSDavid Xu 	} else {
891cd4f6ebbSDavid Xu 		newtd->td_flags |= TDF_SA;
892cd4f6ebbSDavid Xu 	}
893ab78d4d6SDavid Xu 	if (newtd != td)
894ab78d4d6SDavid Xu 		setrunqueue(newtd);
8955215b187SJeff Roberson 	mtx_unlock_spin(&sched_lock);
8965c8329edSJulian Elischer 	return (0);
8975c8329edSJulian Elischer }
8985c8329edSJulian Elischer 
8995c8329edSJulian Elischer /*
90044990b8cSJulian Elischer  * Initialize global thread allocation resources.
90144990b8cSJulian Elischer  */
90244990b8cSJulian Elischer void
90344990b8cSJulian Elischer threadinit(void)
90444990b8cSJulian Elischer {
90544990b8cSJulian Elischer 
906de028f5aSJeff Roberson 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
90744990b8cSJulian Elischer 	    thread_ctor, thread_dtor, thread_init, thread_fini,
90844990b8cSJulian Elischer 	    UMA_ALIGN_CACHE, 0);
909fdcac928SMarcel Moolenaar 	tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part),
910fdcac928SMarcel Moolenaar 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
911de028f5aSJeff Roberson 	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
912de028f5aSJeff Roberson 	    NULL, NULL, ksegrp_init, NULL,
9134f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
914de028f5aSJeff Roberson 	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
915de028f5aSJeff Roberson 	    NULL, NULL, kse_init, NULL,
9164f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
9175215b187SJeff Roberson 	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
9185215b187SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
91944990b8cSJulian Elischer }
92044990b8cSJulian Elischer 
92144990b8cSJulian Elischer /*
9221faf202eSJulian Elischer  * Stash an embarasingly extra thread into the zombie thread queue.
92344990b8cSJulian Elischer  */
92444990b8cSJulian Elischer void
92544990b8cSJulian Elischer thread_stash(struct thread *td)
92644990b8cSJulian Elischer {
9275215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
92844990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
9295215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
93044990b8cSJulian Elischer }
93144990b8cSJulian Elischer 
93244990b8cSJulian Elischer /*
9335c8329edSJulian Elischer  * Stash an embarasingly extra kse into the zombie kse queue.
9345c8329edSJulian Elischer  */
9355c8329edSJulian Elischer void
9365c8329edSJulian Elischer kse_stash(struct kse *ke)
9375c8329edSJulian Elischer {
9385215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
9395c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
9405215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
9415215b187SJeff Roberson }
9425215b187SJeff Roberson 
9435215b187SJeff Roberson /*
9445215b187SJeff Roberson  * Stash an embarasingly extra upcall into the zombie upcall queue.
9455215b187SJeff Roberson  */
9465215b187SJeff Roberson 
9475215b187SJeff Roberson void
9485215b187SJeff Roberson upcall_stash(struct kse_upcall *ku)
9495215b187SJeff Roberson {
9505215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
9515215b187SJeff Roberson 	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
9525215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
9535c8329edSJulian Elischer }
9545c8329edSJulian Elischer 
9555c8329edSJulian Elischer /*
9565c8329edSJulian Elischer  * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
9575c8329edSJulian Elischer  */
9585c8329edSJulian Elischer void
9595c8329edSJulian Elischer ksegrp_stash(struct ksegrp *kg)
9605c8329edSJulian Elischer {
9615215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
9625c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
9635215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
9645c8329edSJulian Elischer }
9655c8329edSJulian Elischer 
9665c8329edSJulian Elischer /*
9675215b187SJeff Roberson  * Reap zombie kse resource.
96844990b8cSJulian Elischer  */
96944990b8cSJulian Elischer void
97044990b8cSJulian Elischer thread_reap(void)
97144990b8cSJulian Elischer {
9725c8329edSJulian Elischer 	struct thread *td_first, *td_next;
9735c8329edSJulian Elischer 	struct kse *ke_first, *ke_next;
9745c8329edSJulian Elischer 	struct ksegrp *kg_first, * kg_next;
9755215b187SJeff Roberson 	struct kse_upcall *ku_first, *ku_next;
97644990b8cSJulian Elischer 
97744990b8cSJulian Elischer 	/*
9785215b187SJeff Roberson 	 * Don't even bother to lock if none at this instant,
9795215b187SJeff Roberson 	 * we really don't care about the next instant..
98044990b8cSJulian Elischer 	 */
9815c8329edSJulian Elischer 	if ((!TAILQ_EMPTY(&zombie_threads))
9825c8329edSJulian Elischer 	    || (!TAILQ_EMPTY(&zombie_kses))
9835215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_ksegrps))
9845215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
9855215b187SJeff Roberson 		mtx_lock_spin(&kse_zombie_lock);
9865c8329edSJulian Elischer 		td_first = TAILQ_FIRST(&zombie_threads);
9875c8329edSJulian Elischer 		ke_first = TAILQ_FIRST(&zombie_kses);
9885c8329edSJulian Elischer 		kg_first = TAILQ_FIRST(&zombie_ksegrps);
9895215b187SJeff Roberson 		ku_first = TAILQ_FIRST(&zombie_upcalls);
9905c8329edSJulian Elischer 		if (td_first)
9915c8329edSJulian Elischer 			TAILQ_INIT(&zombie_threads);
9925c8329edSJulian Elischer 		if (ke_first)
9935c8329edSJulian Elischer 			TAILQ_INIT(&zombie_kses);
9945c8329edSJulian Elischer 		if (kg_first)
9955c8329edSJulian Elischer 			TAILQ_INIT(&zombie_ksegrps);
9965215b187SJeff Roberson 		if (ku_first)
9975215b187SJeff Roberson 			TAILQ_INIT(&zombie_upcalls);
9985215b187SJeff Roberson 		mtx_unlock_spin(&kse_zombie_lock);
9995c8329edSJulian Elischer 		while (td_first) {
10005c8329edSJulian Elischer 			td_next = TAILQ_NEXT(td_first, td_runq);
10015215b187SJeff Roberson 			if (td_first->td_ucred)
10025215b187SJeff Roberson 				crfree(td_first->td_ucred);
10035c8329edSJulian Elischer 			thread_free(td_first);
10045c8329edSJulian Elischer 			td_first = td_next;
100544990b8cSJulian Elischer 		}
10065c8329edSJulian Elischer 		while (ke_first) {
10075c8329edSJulian Elischer 			ke_next = TAILQ_NEXT(ke_first, ke_procq);
10085c8329edSJulian Elischer 			kse_free(ke_first);
10095c8329edSJulian Elischer 			ke_first = ke_next;
10105c8329edSJulian Elischer 		}
10115c8329edSJulian Elischer 		while (kg_first) {
10125c8329edSJulian Elischer 			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
10135c8329edSJulian Elischer 			ksegrp_free(kg_first);
10145c8329edSJulian Elischer 			kg_first = kg_next;
10155c8329edSJulian Elischer 		}
10165215b187SJeff Roberson 		while (ku_first) {
10175215b187SJeff Roberson 			ku_next = TAILQ_NEXT(ku_first, ku_link);
10185215b187SJeff Roberson 			upcall_free(ku_first);
10195215b187SJeff Roberson 			ku_first = ku_next;
10205215b187SJeff Roberson 		}
102144990b8cSJulian Elischer 	}
102244990b8cSJulian Elischer }
102344990b8cSJulian Elischer 
102444990b8cSJulian Elischer /*
10254f0db5e0SJulian Elischer  * Allocate a ksegrp.
10264f0db5e0SJulian Elischer  */
10274f0db5e0SJulian Elischer struct ksegrp *
10284f0db5e0SJulian Elischer ksegrp_alloc(void)
10294f0db5e0SJulian Elischer {
1030a163d034SWarner Losh 	return (uma_zalloc(ksegrp_zone, M_WAITOK));
10314f0db5e0SJulian Elischer }
10324f0db5e0SJulian Elischer 
10334f0db5e0SJulian Elischer /*
10344f0db5e0SJulian Elischer  * Allocate a kse.
10354f0db5e0SJulian Elischer  */
10364f0db5e0SJulian Elischer struct kse *
10374f0db5e0SJulian Elischer kse_alloc(void)
10384f0db5e0SJulian Elischer {
1039a163d034SWarner Losh 	return (uma_zalloc(kse_zone, M_WAITOK));
10404f0db5e0SJulian Elischer }
10414f0db5e0SJulian Elischer 
10424f0db5e0SJulian Elischer /*
104344990b8cSJulian Elischer  * Allocate a thread.
104444990b8cSJulian Elischer  */
104544990b8cSJulian Elischer struct thread *
104644990b8cSJulian Elischer thread_alloc(void)
104744990b8cSJulian Elischer {
104844990b8cSJulian Elischer 	thread_reap(); /* check if any zombies to get */
1049a163d034SWarner Losh 	return (uma_zalloc(thread_zone, M_WAITOK));
105044990b8cSJulian Elischer }
105144990b8cSJulian Elischer 
105244990b8cSJulian Elischer /*
10534f0db5e0SJulian Elischer  * Deallocate a ksegrp.
10544f0db5e0SJulian Elischer  */
10554f0db5e0SJulian Elischer void
10564f0db5e0SJulian Elischer ksegrp_free(struct ksegrp *td)
10574f0db5e0SJulian Elischer {
10584f0db5e0SJulian Elischer 	uma_zfree(ksegrp_zone, td);
10594f0db5e0SJulian Elischer }
10604f0db5e0SJulian Elischer 
10614f0db5e0SJulian Elischer /*
10624f0db5e0SJulian Elischer  * Deallocate a kse.
10634f0db5e0SJulian Elischer  */
10644f0db5e0SJulian Elischer void
10654f0db5e0SJulian Elischer kse_free(struct kse *td)
10664f0db5e0SJulian Elischer {
10674f0db5e0SJulian Elischer 	uma_zfree(kse_zone, td);
10684f0db5e0SJulian Elischer }
10694f0db5e0SJulian Elischer 
10704f0db5e0SJulian Elischer /*
107144990b8cSJulian Elischer  * Deallocate a thread.
107244990b8cSJulian Elischer  */
107344990b8cSJulian Elischer void
107444990b8cSJulian Elischer thread_free(struct thread *td)
107544990b8cSJulian Elischer {
1076696058c3SJulian Elischer 
1077696058c3SJulian Elischer 	cpu_thread_clean(td);
107844990b8cSJulian Elischer 	uma_zfree(thread_zone, td);
107944990b8cSJulian Elischer }
108044990b8cSJulian Elischer 
108144990b8cSJulian Elischer /*
1082fdcac928SMarcel Moolenaar  * Assign a thread ID.
1083fdcac928SMarcel Moolenaar  */
1084fdcac928SMarcel Moolenaar int
1085fdcac928SMarcel Moolenaar thread_new_tid(void)
1086fdcac928SMarcel Moolenaar {
1087fdcac928SMarcel Moolenaar 	struct tid_bitmap_part *bmp, *new;
1088fdcac928SMarcel Moolenaar 	int bit, idx, tid;
1089fdcac928SMarcel Moolenaar 
1090fdcac928SMarcel Moolenaar 	mtx_lock(&tid_lock);
1091fdcac928SMarcel Moolenaar 	STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
1092fdcac928SMarcel Moolenaar 		if (bmp->bmp_free)
1093fdcac928SMarcel Moolenaar 			break;
1094fdcac928SMarcel Moolenaar 	}
1095fdcac928SMarcel Moolenaar 	/* Create a new bitmap if we run out of free bits. */
1096fdcac928SMarcel Moolenaar 	if (bmp == NULL) {
1097fdcac928SMarcel Moolenaar 		mtx_unlock(&tid_lock);
1098fdcac928SMarcel Moolenaar 		new = uma_zalloc(tid_zone, M_WAITOK);
1099fdcac928SMarcel Moolenaar 		mtx_lock(&tid_lock);
1100fdcac928SMarcel Moolenaar 		bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next);
1101fdcac928SMarcel Moolenaar 		if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) {
1102fdcac928SMarcel Moolenaar 			/* 1=free, 0=assigned. This way we can use ffsl(). */
1103fdcac928SMarcel Moolenaar 			memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap));
1104fdcac928SMarcel Moolenaar 			new->bmp_base = (bmp == NULL) ? TID_MIN :
1105fdcac928SMarcel Moolenaar 			    bmp->bmp_base + TID_IDS_PER_PART;
1106fdcac928SMarcel Moolenaar 			new->bmp_free = TID_IDS_PER_PART;
1107fdcac928SMarcel Moolenaar 			STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next);
1108fdcac928SMarcel Moolenaar 			bmp = new;
1109fdcac928SMarcel Moolenaar 			new = NULL;
1110fdcac928SMarcel Moolenaar 		}
1111fdcac928SMarcel Moolenaar 	} else
1112fdcac928SMarcel Moolenaar 		new = NULL;
1113fdcac928SMarcel Moolenaar 	/* We have a bitmap with available IDs. */
1114fdcac928SMarcel Moolenaar 	idx = 0;
1115fdcac928SMarcel Moolenaar 	while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL)
1116fdcac928SMarcel Moolenaar 		idx++;
1117fdcac928SMarcel Moolenaar 	bit = ffsl(bmp->bmp_bitmap[idx]) - 1;
1118fdcac928SMarcel Moolenaar 	tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit;
1119fdcac928SMarcel Moolenaar 	bmp->bmp_bitmap[idx] &= ~(1UL << bit);
1120fdcac928SMarcel Moolenaar 	bmp->bmp_free--;
1121fdcac928SMarcel Moolenaar 	mtx_unlock(&tid_lock);
1122fdcac928SMarcel Moolenaar 
1123fdcac928SMarcel Moolenaar 	if (new != NULL)
1124fdcac928SMarcel Moolenaar 		uma_zfree(tid_zone, new);
1125fdcac928SMarcel Moolenaar 	return (tid);
1126fdcac928SMarcel Moolenaar }
1127fdcac928SMarcel Moolenaar 
1128fdcac928SMarcel Moolenaar /*
112944990b8cSJulian Elischer  * Store the thread context in the UTS's mailbox.
11303d0586d4SJulian Elischer  * then add the mailbox at the head of a list we are building in user space.
11313d0586d4SJulian Elischer  * The list is anchored in the ksegrp structure.
113244990b8cSJulian Elischer  */
113344990b8cSJulian Elischer int
1134dd7da9aaSDavid Xu thread_export_context(struct thread *td, int willexit)
113544990b8cSJulian Elischer {
11360d294460SJuli Mallett 	struct proc *p;
11373d0586d4SJulian Elischer 	struct ksegrp *kg;
11383d0586d4SJulian Elischer 	uintptr_t mbx;
11393d0586d4SJulian Elischer 	void *addr;
11409dde3bc9SDavid Xu 	int error = 0, temp, sig;
11412b035cbeSJulian Elischer 	mcontext_t mc;
114244990b8cSJulian Elischer 
11430d294460SJuli Mallett 	p = td->td_proc;
11440d294460SJuli Mallett 	kg = td->td_ksegrp;
11450d294460SJuli Mallett 
1146c76e33b6SJonathan Mini 	/* Export the user/machine context. */
11472b035cbeSJulian Elischer 	get_mcontext(td, &mc, 0);
11482b035cbeSJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
11492b035cbeSJulian Elischer 	error = copyout(&mc, addr, sizeof(mcontext_t));
115093a7aa79SJulian Elischer 	if (error)
115193a7aa79SJulian Elischer 		goto bad;
115244990b8cSJulian Elischer 
11535215b187SJeff Roberson 	/* Exports clock ticks in kernel mode */
11545215b187SJeff Roberson 	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
11554e4422d4SMarcel Moolenaar 	temp = fuword32(addr) + td->td_usticks;
11564e4422d4SMarcel Moolenaar 	if (suword32(addr, temp)) {
11572b035cbeSJulian Elischer 		error = EFAULT;
11585215b187SJeff Roberson 		goto bad;
11592b035cbeSJulian Elischer 	}
11605215b187SJeff Roberson 
11619dde3bc9SDavid Xu 	/*
11629dde3bc9SDavid Xu 	 * Post sync signal, or process SIGKILL and SIGSTOP.
11639dde3bc9SDavid Xu 	 * For sync signal, it is only possible when the signal is not
11649dde3bc9SDavid Xu 	 * caught by userland or process is being debugged.
11659dde3bc9SDavid Xu 	 */
1166dd7da9aaSDavid Xu 	PROC_LOCK(p);
11679dde3bc9SDavid Xu 	if (td->td_flags & TDF_NEEDSIGCHK) {
11689dde3bc9SDavid Xu 		mtx_lock_spin(&sched_lock);
11699dde3bc9SDavid Xu 		td->td_flags &= ~TDF_NEEDSIGCHK;
11709dde3bc9SDavid Xu 		mtx_unlock_spin(&sched_lock);
11719dde3bc9SDavid Xu 		mtx_lock(&p->p_sigacts->ps_mtx);
11729dde3bc9SDavid Xu 		while ((sig = cursig(td)) != 0)
11739dde3bc9SDavid Xu 			postsig(sig);
11749dde3bc9SDavid Xu 		mtx_unlock(&p->p_sigacts->ps_mtx);
11759dde3bc9SDavid Xu 	}
1176dd7da9aaSDavid Xu 	if (willexit)
1177dd7da9aaSDavid Xu 		SIGFILLSET(td->td_sigmask);
1178dd7da9aaSDavid Xu 	PROC_UNLOCK(p);
11799dde3bc9SDavid Xu 
11805215b187SJeff Roberson 	/* Get address in latest mbox of list pointer */
11813d0586d4SJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_next);
11823d0586d4SJulian Elischer 	/*
11833d0586d4SJulian Elischer 	 * Put the saved address of the previous first
11843d0586d4SJulian Elischer 	 * entry into this one
11853d0586d4SJulian Elischer 	 */
11863d0586d4SJulian Elischer 	for (;;) {
11873d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
11883d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
118993a7aa79SJulian Elischer 			error = EFAULT;
11908798d4f9SDavid Xu 			goto bad;
11913d0586d4SJulian Elischer 		}
11920cd3964fSJulian Elischer 		PROC_LOCK(p);
11933d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
11943d0586d4SJulian Elischer 			kg->kg_completed = td->td_mailbox;
11955215b187SJeff Roberson 			/*
11965215b187SJeff Roberson 			 * The thread context may be taken away by
11975215b187SJeff Roberson 			 * other upcall threads when we unlock
11985215b187SJeff Roberson 			 * process lock. it's no longer valid to
11995215b187SJeff Roberson 			 * use it again in any other places.
12005215b187SJeff Roberson 			 */
12015215b187SJeff Roberson 			td->td_mailbox = NULL;
12020cd3964fSJulian Elischer 			PROC_UNLOCK(p);
12033d0586d4SJulian Elischer 			break;
12043d0586d4SJulian Elischer 		}
12050cd3964fSJulian Elischer 		PROC_UNLOCK(p);
12063d0586d4SJulian Elischer 	}
12075215b187SJeff Roberson 	td->td_usticks = 0;
12083d0586d4SJulian Elischer 	return (0);
12098798d4f9SDavid Xu 
12108798d4f9SDavid Xu bad:
12118798d4f9SDavid Xu 	PROC_LOCK(p);
1212dd7da9aaSDavid Xu 	sigexit(td, SIGILL);
121393a7aa79SJulian Elischer 	return (error);
12143d0586d4SJulian Elischer }
121544990b8cSJulian Elischer 
12163d0586d4SJulian Elischer /*
12173d0586d4SJulian Elischer  * Take the list of completed mailboxes for this KSEGRP and put them on this
12185215b187SJeff Roberson  * upcall's mailbox as it's the next one going up.
12193d0586d4SJulian Elischer  */
12203d0586d4SJulian Elischer static int
12215215b187SJeff Roberson thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
12223d0586d4SJulian Elischer {
12230cd3964fSJulian Elischer 	struct proc *p = kg->kg_proc;
12243d0586d4SJulian Elischer 	void *addr;
12253d0586d4SJulian Elischer 	uintptr_t mbx;
12263d0586d4SJulian Elischer 
12275215b187SJeff Roberson 	addr = (void *)(&ku->ku_mailbox->km_completed);
12283d0586d4SJulian Elischer 	for (;;) {
12293d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
12303d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
12310cd3964fSJulian Elischer 			PROC_LOCK(p);
12320cd3964fSJulian Elischer 			psignal(p, SIGSEGV);
12330cd3964fSJulian Elischer 			PROC_UNLOCK(p);
12343d0586d4SJulian Elischer 			return (EFAULT);
12353d0586d4SJulian Elischer 		}
12360cd3964fSJulian Elischer 		PROC_LOCK(p);
12373d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
12383d0586d4SJulian Elischer 			kg->kg_completed = NULL;
12390cd3964fSJulian Elischer 			PROC_UNLOCK(p);
12403d0586d4SJulian Elischer 			break;
12413d0586d4SJulian Elischer 		}
12420cd3964fSJulian Elischer 		PROC_UNLOCK(p);
12433d0586d4SJulian Elischer 	}
12443d0586d4SJulian Elischer 	return (0);
12453d0586d4SJulian Elischer }
124644990b8cSJulian Elischer 
124744990b8cSJulian Elischer /*
12488798d4f9SDavid Xu  * This function should be called at statclock interrupt time
12498798d4f9SDavid Xu  */
12508798d4f9SDavid Xu int
12515215b187SJeff Roberson thread_statclock(int user)
12528798d4f9SDavid Xu {
12538798d4f9SDavid Xu 	struct thread *td = curthread;
1254cd4f6ebbSDavid Xu 	struct ksegrp *kg = td->td_ksegrp;
12558798d4f9SDavid Xu 
1256cd4f6ebbSDavid Xu 	if (kg->kg_numupcalls == 0 || !(td->td_flags & TDF_SA))
1257cd4f6ebbSDavid Xu 		return (0);
12588798d4f9SDavid Xu 	if (user) {
12598798d4f9SDavid Xu 		/* Current always do via ast() */
1260b4508d7dSDavid Xu 		mtx_lock_spin(&sched_lock);
12614a338afdSJulian Elischer 		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1262b4508d7dSDavid Xu 		mtx_unlock_spin(&sched_lock);
12635215b187SJeff Roberson 		td->td_uuticks++;
12648798d4f9SDavid Xu 	} else {
12658798d4f9SDavid Xu 		if (td->td_mailbox != NULL)
12665215b187SJeff Roberson 			td->td_usticks++;
12675215b187SJeff Roberson 		else {
12685215b187SJeff Roberson 			/* XXXKSE
12695215b187SJeff Roberson 		 	 * We will call thread_user_enter() for every
12705215b187SJeff Roberson 			 * kernel entry in future, so if the thread mailbox
12715215b187SJeff Roberson 			 * is NULL, it must be a UTS kernel, don't account
12725215b187SJeff Roberson 			 * clock ticks for it.
12735215b187SJeff Roberson 			 */
12748798d4f9SDavid Xu 		}
12755215b187SJeff Roberson 	}
12765215b187SJeff Roberson 	return (0);
12778798d4f9SDavid Xu }
12788798d4f9SDavid Xu 
12795215b187SJeff Roberson /*
12804b4866edSDavid Xu  * Export state clock ticks for userland
12815215b187SJeff Roberson  */
12828798d4f9SDavid Xu static int
12834b4866edSDavid Xu thread_update_usr_ticks(struct thread *td, int user)
12848798d4f9SDavid Xu {
12858798d4f9SDavid Xu 	struct proc *p = td->td_proc;
12868798d4f9SDavid Xu 	struct kse_thr_mailbox *tmbx;
12875215b187SJeff Roberson 	struct kse_upcall *ku;
12886ce75196SDavid Xu 	struct ksegrp *kg;
12898798d4f9SDavid Xu 	caddr_t addr;
12908b149b51SJohn Baldwin 	u_int uticks;
12918798d4f9SDavid Xu 
12925215b187SJeff Roberson 	if ((ku = td->td_upcall) == NULL)
12935215b187SJeff Roberson 		return (-1);
12948798d4f9SDavid Xu 
12955215b187SJeff Roberson 	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
12968798d4f9SDavid Xu 	if ((tmbx == NULL) || (tmbx == (void *)-1))
12975215b187SJeff Roberson 		return (-1);
12984b4866edSDavid Xu 	if (user) {
12995215b187SJeff Roberson 		uticks = td->td_uuticks;
13005215b187SJeff Roberson 		td->td_uuticks = 0;
13015215b187SJeff Roberson 		addr = (caddr_t)&tmbx->tm_uticks;
13024b4866edSDavid Xu 	} else {
13034b4866edSDavid Xu 		uticks = td->td_usticks;
13045215b187SJeff Roberson 		td->td_usticks = 0;
13054b4866edSDavid Xu 		addr = (caddr_t)&tmbx->tm_sticks;
13064b4866edSDavid Xu 	}
13074b4866edSDavid Xu 	if (uticks) {
13084e4422d4SMarcel Moolenaar 		if (suword32(addr, uticks+fuword32(addr))) {
13095215b187SJeff Roberson 			PROC_LOCK(p);
13105215b187SJeff Roberson 			psignal(p, SIGSEGV);
13115215b187SJeff Roberson 			PROC_UNLOCK(p);
13125215b187SJeff Roberson 			return (-2);
13135215b187SJeff Roberson 		}
13144b4866edSDavid Xu 	}
13156ce75196SDavid Xu 	kg = td->td_ksegrp;
13166ce75196SDavid Xu 	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
13174b4866edSDavid Xu 		mtx_lock_spin(&sched_lock);
13184b4866edSDavid Xu 		td->td_upcall->ku_flags |= KUF_DOUPCALL;
13194b4866edSDavid Xu 		mtx_unlock_spin(&sched_lock);
13204b4866edSDavid Xu 	}
13215215b187SJeff Roberson 	return (0);
13228798d4f9SDavid Xu }
13238798d4f9SDavid Xu 
13248798d4f9SDavid Xu /*
132544990b8cSJulian Elischer  * Discard the current thread and exit from its context.
132644990b8cSJulian Elischer  *
132744990b8cSJulian Elischer  * Because we can't free a thread while we're operating under its context,
1328696058c3SJulian Elischer  * push the current thread into our CPU's deadthread holder. This means
1329696058c3SJulian Elischer  * we needn't worry about someone else grabbing our context before we
1330696058c3SJulian Elischer  * do a cpu_throw().
133144990b8cSJulian Elischer  */
133244990b8cSJulian Elischer void
133344990b8cSJulian Elischer thread_exit(void)
133444990b8cSJulian Elischer {
133544990b8cSJulian Elischer 	struct thread *td;
133644990b8cSJulian Elischer 	struct kse *ke;
133744990b8cSJulian Elischer 	struct proc *p;
133844990b8cSJulian Elischer 	struct ksegrp	*kg;
133944990b8cSJulian Elischer 
134044990b8cSJulian Elischer 	td = curthread;
134144990b8cSJulian Elischer 	kg = td->td_ksegrp;
134244990b8cSJulian Elischer 	p = td->td_proc;
134344990b8cSJulian Elischer 	ke = td->td_kse;
134444990b8cSJulian Elischer 
134544990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
134688151aa3SJulian Elischer 	KASSERT(p != NULL, ("thread exiting without a process"));
134788151aa3SJulian Elischer 	KASSERT(ke != NULL, ("thread exiting without a kse"));
134888151aa3SJulian Elischer 	KASSERT(kg != NULL, ("thread exiting without a kse group"));
134944990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
135044990b8cSJulian Elischer 	CTR1(KTR_PROC, "thread_exit: thread %p", td);
135162a0fd94SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
135244990b8cSJulian Elischer 
135348bfcdddSJulian Elischer 	if (td->td_standin != NULL) {
135448bfcdddSJulian Elischer 		thread_stash(td->td_standin);
135548bfcdddSJulian Elischer 		td->td_standin = NULL;
135648bfcdddSJulian Elischer 	}
135748bfcdddSJulian Elischer 
135844990b8cSJulian Elischer 	cpu_thread_exit(td);	/* XXXSMP */
135944990b8cSJulian Elischer 
13601faf202eSJulian Elischer 	/*
13611faf202eSJulian Elischer 	 * The last thread is left attached to the process
13621faf202eSJulian Elischer 	 * So that the whole bundle gets recycled. Skip
13631faf202eSJulian Elischer 	 * all this stuff.
13641faf202eSJulian Elischer 	 */
13651faf202eSJulian Elischer 	if (p->p_numthreads > 1) {
1366d3a0bd78SJulian Elischer 		thread_unlink(td);
13670252d203SDavid Xu 		if (p->p_maxthrwaits)
13680252d203SDavid Xu 			wakeup(&p->p_numthreads);
136944990b8cSJulian Elischer 		/*
137044990b8cSJulian Elischer 		 * The test below is NOT true if we are the
13711faf202eSJulian Elischer 		 * sole exiting thread. P_STOPPED_SNGL is unset
137244990b8cSJulian Elischer 		 * in exit1() after it is the only survivor.
137344990b8cSJulian Elischer 		 */
13741279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
137544990b8cSJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
137671fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
137744990b8cSJulian Elischer 			}
137844990b8cSJulian Elischer 		}
137948bfcdddSJulian Elischer 
13805215b187SJeff Roberson 		/*
13815215b187SJeff Roberson 		 * Because each upcall structure has an owner thread,
13825215b187SJeff Roberson 		 * owner thread exits only when process is in exiting
13835215b187SJeff Roberson 		 * state, so upcall to userland is no longer needed,
13845215b187SJeff Roberson 		 * deleting upcall structure is safe here.
13855215b187SJeff Roberson 		 * So when all threads in a group is exited, all upcalls
13865215b187SJeff Roberson 		 * in the group should be automatically freed.
13875215b187SJeff Roberson 		 */
13885215b187SJeff Roberson 		if (td->td_upcall)
13895215b187SJeff Roberson 			upcall_remove(td);
13906f8132a8SJulian Elischer 
1391ab2baa72SDavid Xu 		sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
1392ab2baa72SDavid Xu 		sched_exit_kse(FIRST_KSE_IN_PROC(p), ke);
13935215b187SJeff Roberson 		ke->ke_state = KES_UNQUEUED;
13945215b187SJeff Roberson 		ke->ke_thread = NULL;
139548bfcdddSJulian Elischer 		/*
139693a7aa79SJulian Elischer 		 * Decide what to do with the KSE attached to this thread.
139748bfcdddSJulian Elischer 		 */
1398ab2baa72SDavid Xu 		if (ke->ke_flags & KEF_EXIT) {
13996f8132a8SJulian Elischer 			kse_unlink(ke);
1400ab2baa72SDavid Xu 			if (kg->kg_kses == 0) {
1401ab2baa72SDavid Xu 				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg);
1402ab2baa72SDavid Xu 				ksegrp_unlink(kg);
1403ab2baa72SDavid Xu 			}
1404ab2baa72SDavid Xu 		}
14055215b187SJeff Roberson 		else
14066f8132a8SJulian Elischer 			kse_reassign(ke);
14076f8132a8SJulian Elischer 		PROC_UNLOCK(p);
14085215b187SJeff Roberson 		td->td_kse	= NULL;
14095c8329edSJulian Elischer 		td->td_state	= TDS_INACTIVE;
141036f7b36fSDavid Xu #if 0
14115c8329edSJulian Elischer 		td->td_proc	= NULL;
141236f7b36fSDavid Xu #endif
14135c8329edSJulian Elischer 		td->td_ksegrp	= NULL;
14145c8329edSJulian Elischer 		td->td_last_kse	= NULL;
1415696058c3SJulian Elischer 		PCPU_SET(deadthread, td);
14161faf202eSJulian Elischer 	} else {
14171faf202eSJulian Elischer 		PROC_UNLOCK(p);
14181faf202eSJulian Elischer 	}
14194093529dSJeff Roberson 	/* XXX Shouldn't cpu_throw() here. */
1420cc66ebe2SPeter Wemm 	mtx_assert(&sched_lock, MA_OWNED);
1421cc66ebe2SPeter Wemm 	cpu_throw(td, choosethread());
1422cc66ebe2SPeter Wemm 	panic("I'm a teapot!");
142344990b8cSJulian Elischer 	/* NOTREACHED */
142444990b8cSJulian Elischer }
142544990b8cSJulian Elischer 
142644990b8cSJulian Elischer /*
1427696058c3SJulian Elischer  * Do any thread specific cleanups that may be needed in wait()
142837814395SPeter Wemm  * called with Giant, proc and schedlock not held.
1429696058c3SJulian Elischer  */
1430696058c3SJulian Elischer void
1431696058c3SJulian Elischer thread_wait(struct proc *p)
1432696058c3SJulian Elischer {
1433696058c3SJulian Elischer 	struct thread *td;
1434696058c3SJulian Elischer 
143537814395SPeter Wemm 	mtx_assert(&Giant, MA_NOTOWNED);
143685495c72SJens Schweikhardt 	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
143785495c72SJens Schweikhardt 	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
1438696058c3SJulian Elischer 	FOREACH_THREAD_IN_PROC(p, td) {
1439696058c3SJulian Elischer 		if (td->td_standin != NULL) {
1440696058c3SJulian Elischer 			thread_free(td->td_standin);
1441696058c3SJulian Elischer 			td->td_standin = NULL;
1442696058c3SJulian Elischer 		}
1443696058c3SJulian Elischer 		cpu_thread_clean(td);
1444696058c3SJulian Elischer 	}
1445696058c3SJulian Elischer 	thread_reap();	/* check for zombie threads etc. */
1446696058c3SJulian Elischer }
1447696058c3SJulian Elischer 
1448696058c3SJulian Elischer /*
144944990b8cSJulian Elischer  * Link a thread to a process.
14501faf202eSJulian Elischer  * set up anything that needs to be initialized for it to
14511faf202eSJulian Elischer  * be used by the process.
145244990b8cSJulian Elischer  *
145344990b8cSJulian Elischer  * Note that we do not link to the proc's ucred here.
145444990b8cSJulian Elischer  * The thread is linked as if running but no KSE assigned.
145544990b8cSJulian Elischer  */
145644990b8cSJulian Elischer void
145744990b8cSJulian Elischer thread_link(struct thread *td, struct ksegrp *kg)
145844990b8cSJulian Elischer {
145944990b8cSJulian Elischer 	struct proc *p;
146044990b8cSJulian Elischer 
146144990b8cSJulian Elischer 	p = kg->kg_proc;
146271fad9fdSJulian Elischer 	td->td_state    = TDS_INACTIVE;
146344990b8cSJulian Elischer 	td->td_proc     = p;
146444990b8cSJulian Elischer 	td->td_ksegrp   = kg;
146544990b8cSJulian Elischer 	td->td_last_kse = NULL;
14665215b187SJeff Roberson 	td->td_flags    = 0;
14675215b187SJeff Roberson 	td->td_kse      = NULL;
146844990b8cSJulian Elischer 
14691faf202eSJulian Elischer 	LIST_INIT(&td->td_contested);
1470c06eb4e2SSam Leffler 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
147144990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
147244990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
147344990b8cSJulian Elischer 	p->p_numthreads++;
147444990b8cSJulian Elischer 	kg->kg_numthreads++;
147544990b8cSJulian Elischer }
147644990b8cSJulian Elischer 
1477d3a0bd78SJulian Elischer void
1478d3a0bd78SJulian Elischer thread_unlink(struct thread *td)
1479d3a0bd78SJulian Elischer {
1480d3a0bd78SJulian Elischer 	struct proc *p = td->td_proc;
1481d3a0bd78SJulian Elischer 	struct ksegrp *kg = td->td_ksegrp;
1482d3a0bd78SJulian Elischer 
1483112afcb2SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
1484d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1485d3a0bd78SJulian Elischer 	p->p_numthreads--;
1486d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1487d3a0bd78SJulian Elischer 	kg->kg_numthreads--;
1488d3a0bd78SJulian Elischer 	/* could clear a few other things here */
1489d3a0bd78SJulian Elischer }
1490d3a0bd78SJulian Elischer 
14915215b187SJeff Roberson /*
14925215b187SJeff Roberson  * Purge a ksegrp resource. When a ksegrp is preparing to
14935215b187SJeff Roberson  * exit, it calls this function.
14945215b187SJeff Roberson  */
1495a6f37ac9SJohn Baldwin static void
14965215b187SJeff Roberson kse_purge_group(struct thread *td)
14975215b187SJeff Roberson {
14985215b187SJeff Roberson 	struct ksegrp *kg;
14995215b187SJeff Roberson 	struct kse *ke;
15005215b187SJeff Roberson 
15015215b187SJeff Roberson 	kg = td->td_ksegrp;
15025215b187SJeff Roberson  	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
15035215b187SJeff Roberson 	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
15045215b187SJeff Roberson 		KASSERT(ke->ke_state == KES_IDLE,
15055215b187SJeff Roberson 			("%s: wrong idle KSE state", __func__));
15065215b187SJeff Roberson 		kse_unlink(ke);
15075215b187SJeff Roberson 	}
15085215b187SJeff Roberson 	KASSERT((kg->kg_kses == 1),
15095215b187SJeff Roberson 		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
15105215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0),
15115215b187SJeff Roberson 	        ("%s: ksegrp still has %d upcall datas",
15125215b187SJeff Roberson 		__func__, kg->kg_numupcalls));
15135215b187SJeff Roberson }
15145215b187SJeff Roberson 
15155215b187SJeff Roberson /*
15165215b187SJeff Roberson  * Purge a process's KSE resource. When a process is preparing to
15175215b187SJeff Roberson  * exit, it calls kse_purge to release any extra KSE resources in
15185215b187SJeff Roberson  * the process.
15195215b187SJeff Roberson  */
1520a6f37ac9SJohn Baldwin static void
15215c8329edSJulian Elischer kse_purge(struct proc *p, struct thread *td)
15225c8329edSJulian Elischer {
15235c8329edSJulian Elischer 	struct ksegrp *kg;
15245215b187SJeff Roberson 	struct kse *ke;
15255c8329edSJulian Elischer 
15265c8329edSJulian Elischer  	KASSERT(p->p_numthreads == 1, ("bad thread number"));
15275c8329edSJulian Elischer 	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
15285c8329edSJulian Elischer 		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
15295c8329edSJulian Elischer 		p->p_numksegrps--;
15305215b187SJeff Roberson 		/*
15315215b187SJeff Roberson 		 * There is no ownership for KSE, after all threads
15325215b187SJeff Roberson 		 * in the group exited, it is possible that some KSEs
15335215b187SJeff Roberson 		 * were left in idle queue, gc them now.
15345215b187SJeff Roberson 		 */
15355215b187SJeff Roberson 		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
15365215b187SJeff Roberson 			KASSERT(ke->ke_state == KES_IDLE,
15375215b187SJeff Roberson 			   ("%s: wrong idle KSE state", __func__));
15385215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
15395215b187SJeff Roberson 			kg->kg_idle_kses--;
15405215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
15415215b187SJeff Roberson 			kg->kg_kses--;
15425215b187SJeff Roberson 			kse_stash(ke);
15435215b187SJeff Roberson 		}
15445c8329edSJulian Elischer 		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
15455c8329edSJulian Elischer 		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
15465215b187SJeff Roberson 		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
15475215b187SJeff Roberson 		KASSERT((kg->kg_numupcalls == 0),
15485215b187SJeff Roberson 		        ("%s: ksegrp still has %d upcall datas",
15495215b187SJeff Roberson 			__func__, kg->kg_numupcalls));
15505215b187SJeff Roberson 
15515215b187SJeff Roberson 		if (kg != td->td_ksegrp)
15525c8329edSJulian Elischer 			ksegrp_stash(kg);
15535c8329edSJulian Elischer 	}
15545c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
15555c8329edSJulian Elischer 	p->p_numksegrps++;
15565c8329edSJulian Elischer }
15575c8329edSJulian Elischer 
15585215b187SJeff Roberson /*
15595215b187SJeff Roberson  * This function is intended to be used to initialize a spare thread
15605215b187SJeff Roberson  * for upcall. Initialize thread's large data area outside sched_lock
15615215b187SJeff Roberson  * for thread_schedule_upcall().
15625215b187SJeff Roberson  */
15635215b187SJeff Roberson void
15645215b187SJeff Roberson thread_alloc_spare(struct thread *td, struct thread *spare)
15655215b187SJeff Roberson {
156637814395SPeter Wemm 
15675215b187SJeff Roberson 	if (td->td_standin)
15685215b187SJeff Roberson 		return;
1569fdcac928SMarcel Moolenaar 	if (spare == NULL) {
15705215b187SJeff Roberson 		spare = thread_alloc();
1571fdcac928SMarcel Moolenaar 		spare->td_tid = thread_new_tid();
1572fdcac928SMarcel Moolenaar 	}
15735215b187SJeff Roberson 	td->td_standin = spare;
15745215b187SJeff Roberson 	bzero(&spare->td_startzero,
15755215b187SJeff Roberson 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
15765215b187SJeff Roberson 	spare->td_proc = td->td_proc;
15775215b187SJeff Roberson 	spare->td_ucred = crhold(td->td_ucred);
15785215b187SJeff Roberson }
15795c8329edSJulian Elischer 
158044990b8cSJulian Elischer /*
1581c76e33b6SJonathan Mini  * Create a thread and schedule it for upcall on the KSE given.
158293a7aa79SJulian Elischer  * Use our thread's standin so that we don't have to allocate one.
158344990b8cSJulian Elischer  */
158444990b8cSJulian Elischer struct thread *
15855215b187SJeff Roberson thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
158644990b8cSJulian Elischer {
158744990b8cSJulian Elischer 	struct thread *td2;
158844990b8cSJulian Elischer 
158944990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
159048bfcdddSJulian Elischer 
159148bfcdddSJulian Elischer 	/*
15925215b187SJeff Roberson 	 * Schedule an upcall thread on specified kse_upcall,
15935215b187SJeff Roberson 	 * the kse_upcall must be free.
15945215b187SJeff Roberson 	 * td must have a spare thread.
159548bfcdddSJulian Elischer 	 */
15965215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
159748bfcdddSJulian Elischer 	if ((td2 = td->td_standin) != NULL) {
159848bfcdddSJulian Elischer 		td->td_standin = NULL;
159944990b8cSJulian Elischer 	} else {
16005215b187SJeff Roberson 		panic("no reserve thread when scheduling an upcall");
160148bfcdddSJulian Elischer 		return (NULL);
160244990b8cSJulian Elischer 	}
160344990b8cSJulian Elischer 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
160448bfcdddSJulian Elischer 	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
16051faf202eSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
16061faf202eSJulian Elischer 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
16075215b187SJeff Roberson 	thread_link(td2, ku->ku_ksegrp);
160836f7b36fSDavid Xu 	/* inherit blocked thread's context */
160911e0f8e1SMarcel Moolenaar 	cpu_set_upcall(td2, td);
16105215b187SJeff Roberson 	/* Let the new thread become owner of the upcall */
16115215b187SJeff Roberson 	ku->ku_owner   = td2;
16125215b187SJeff Roberson 	td2->td_upcall = ku;
1613cd4f6ebbSDavid Xu 	td2->td_flags  = TDF_SA;
16141d5a24beSDavid Xu 	td2->td_pflags = TDP_UPCALLING;
16155215b187SJeff Roberson 	td2->td_kse    = NULL;
161648bfcdddSJulian Elischer 	td2->td_state  = TDS_CAN_RUN;
161748bfcdddSJulian Elischer 	td2->td_inhibitors = 0;
16189dde3bc9SDavid Xu 	SIGFILLSET(td2->td_sigmask);
16199dde3bc9SDavid Xu 	SIG_CANTMASK(td2->td_sigmask);
1620ab2baa72SDavid Xu 	sched_fork_thread(td, td2);
162148bfcdddSJulian Elischer 	return (td2);	/* bogus.. should be a void function */
162244990b8cSJulian Elischer }
162344990b8cSJulian Elischer 
16249dde3bc9SDavid Xu /*
16259dde3bc9SDavid Xu  * It is only used when thread generated a trap and process is being
16269dde3bc9SDavid Xu  * debugged.
16279dde3bc9SDavid Xu  */
162858a3c273SJeff Roberson void
162958a3c273SJeff Roberson thread_signal_add(struct thread *td, int sig)
1630c76e33b6SJonathan Mini {
163158a3c273SJeff Roberson 	struct proc *p;
16329dde3bc9SDavid Xu 	siginfo_t siginfo;
16339dde3bc9SDavid Xu 	struct sigacts *ps;
1634c76e33b6SJonathan Mini 	int error;
1635c76e33b6SJonathan Mini 
1636b0bd5f38SDavid Xu 	p = td->td_proc;
1637b0bd5f38SDavid Xu 	PROC_LOCK_ASSERT(p, MA_OWNED);
16389dde3bc9SDavid Xu 	ps = p->p_sigacts;
16399dde3bc9SDavid Xu 	mtx_assert(&ps->ps_mtx, MA_OWNED);
16409dde3bc9SDavid Xu 
16414b7d5d84SDavid Xu 	cpu_thread_siginfo(sig, 0, &siginfo);
16429dde3bc9SDavid Xu 	mtx_unlock(&ps->ps_mtx);
1643c76e33b6SJonathan Mini 	PROC_UNLOCK(p);
16449dde3bc9SDavid Xu 	error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo));
16459dde3bc9SDavid Xu 	if (error) {
164658a3c273SJeff Roberson 		PROC_LOCK(p);
164758a3c273SJeff Roberson 		sigexit(td, SIGILL);
164858a3c273SJeff Roberson 	}
16499dde3bc9SDavid Xu 	PROC_LOCK(p);
16509dde3bc9SDavid Xu 	SIGADDSET(td->td_sigmask, sig);
16519dde3bc9SDavid Xu 	mtx_lock(&ps->ps_mtx);
1652c76e33b6SJonathan Mini }
1653c76e33b6SJonathan Mini 
16546ce75196SDavid Xu void
16556ce75196SDavid Xu thread_switchout(struct thread *td)
16566ce75196SDavid Xu {
16576ce75196SDavid Xu 	struct kse_upcall *ku;
1658ab78d4d6SDavid Xu 	struct thread *td2;
16596ce75196SDavid Xu 
16606ce75196SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
16616ce75196SDavid Xu 
16626ce75196SDavid Xu 	/*
16636ce75196SDavid Xu 	 * If the outgoing thread is in threaded group and has never
16646ce75196SDavid Xu 	 * scheduled an upcall, decide whether this is a short
16656ce75196SDavid Xu 	 * or long term event and thus whether or not to schedule
16666ce75196SDavid Xu 	 * an upcall.
16676ce75196SDavid Xu 	 * If it is a short term event, just suspend it in
16686ce75196SDavid Xu 	 * a way that takes its KSE with it.
16696ce75196SDavid Xu 	 * Select the events for which we want to schedule upcalls.
16706ce75196SDavid Xu 	 * For now it's just sleep.
16716ce75196SDavid Xu 	 * XXXKSE eventually almost any inhibition could do.
16726ce75196SDavid Xu 	 */
16736ce75196SDavid Xu 	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
16746ce75196SDavid Xu 		/*
16756ce75196SDavid Xu 		 * Release ownership of upcall, and schedule an upcall
16766ce75196SDavid Xu 		 * thread, this new upcall thread becomes the owner of
16776ce75196SDavid Xu 		 * the upcall structure.
16786ce75196SDavid Xu 		 */
16796ce75196SDavid Xu 		ku = td->td_upcall;
16806ce75196SDavid Xu 		ku->ku_owner = NULL;
16816ce75196SDavid Xu 		td->td_upcall = NULL;
16826ce75196SDavid Xu 		td->td_flags &= ~TDF_CAN_UNBIND;
1683ab78d4d6SDavid Xu 		td2 = thread_schedule_upcall(td, ku);
1684ab78d4d6SDavid Xu 		setrunqueue(td2);
16856ce75196SDavid Xu 	}
16866ce75196SDavid Xu }
16876ce75196SDavid Xu 
1688c76e33b6SJonathan Mini /*
16895215b187SJeff Roberson  * Setup done on the thread when it enters the kernel.
16901434d3feSJulian Elischer  * XXXKSE Presently only for syscalls but eventually all kernel entries.
16911434d3feSJulian Elischer  */
16921434d3feSJulian Elischer void
16931434d3feSJulian Elischer thread_user_enter(struct proc *p, struct thread *td)
16941434d3feSJulian Elischer {
16955215b187SJeff Roberson 	struct ksegrp *kg;
16965215b187SJeff Roberson 	struct kse_upcall *ku;
16971ecb38a3SDavid Xu 	struct kse_thr_mailbox *tmbx;
1698d3b5e418SDavid Xu 	uint32_t tflags;
16991434d3feSJulian Elischer 
17005215b187SJeff Roberson 	kg = td->td_ksegrp;
17011ecb38a3SDavid Xu 
17021434d3feSJulian Elischer 	/*
17031434d3feSJulian Elischer 	 * First check that we shouldn't just abort.
17041434d3feSJulian Elischer 	 * But check if we are the single thread first!
17051434d3feSJulian Elischer 	 */
1706cd4f6ebbSDavid Xu 	if (p->p_flag & P_SINGLE_EXIT) {
17071434d3feSJulian Elischer 		PROC_LOCK(p);
17081434d3feSJulian Elischer 		mtx_lock_spin(&sched_lock);
1709e574e444SDavid Xu 		thread_stopped(p);
17101434d3feSJulian Elischer 		thread_exit();
17111434d3feSJulian Elischer 		/* NOTREACHED */
17121434d3feSJulian Elischer 	}
17131434d3feSJulian Elischer 
17141434d3feSJulian Elischer 	/*
17151434d3feSJulian Elischer 	 * If we are doing a syscall in a KSE environment,
17161434d3feSJulian Elischer 	 * note where our mailbox is. There is always the
171793a7aa79SJulian Elischer 	 * possibility that we could do this lazily (in kse_reassign()),
17181434d3feSJulian Elischer 	 * but for now do it every time.
17191434d3feSJulian Elischer 	 */
17205215b187SJeff Roberson 	kg = td->td_ksegrp;
1721cd4f6ebbSDavid Xu 	if (td->td_flags & TDF_SA) {
17225215b187SJeff Roberson 		ku = td->td_upcall;
17235215b187SJeff Roberson 		KASSERT(ku, ("%s: no upcall owned", __func__));
17245215b187SJeff Roberson 		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
17251ecb38a3SDavid Xu 		KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__));
17264e4422d4SMarcel Moolenaar 		ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
17271ecb38a3SDavid Xu 		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
1728d3b5e418SDavid Xu 		if ((tmbx == NULL) || (tmbx == (void *)-1L) ||
1729d3b5e418SDavid Xu 		    (ku->ku_mflags & KMF_NOUPCALL)) {
1730d3b5e418SDavid Xu 			td->td_mailbox = NULL;
1731d3b5e418SDavid Xu 		} else {
1732d3b5e418SDavid Xu 			if (td->td_standin == NULL)
1733d3b5e418SDavid Xu 				thread_alloc_spare(td, NULL);
1734ab908f59SDaniel Eischen 			tflags = fuword32(&tmbx->tm_flags);
1735d3b5e418SDavid Xu 			/*
1736d3b5e418SDavid Xu 			 * On some architectures, TP register points to thread
1737d3b5e418SDavid Xu 			 * mailbox but not points to kse mailbox, and userland
1738d3b5e418SDavid Xu 			 * can not atomically clear km_curthread, but can
1739d3b5e418SDavid Xu 			 * use TP register, and set TMF_NOUPCALL in thread
1740d3b5e418SDavid Xu 			 * flag	to indicate a critical region.
1741d3b5e418SDavid Xu 			 */
1742d3b5e418SDavid Xu 			if (tflags & TMF_NOUPCALL) {
17435215b187SJeff Roberson 				td->td_mailbox = NULL;
17448798d4f9SDavid Xu 			} else {
17451ecb38a3SDavid Xu 				td->td_mailbox = tmbx;
17468798d4f9SDavid Xu 				mtx_lock_spin(&sched_lock);
174793a7aa79SJulian Elischer 				td->td_flags |= TDF_CAN_UNBIND;
17488798d4f9SDavid Xu 				mtx_unlock_spin(&sched_lock);
17495215b187SJeff Roberson 			}
17501434d3feSJulian Elischer 		}
17511434d3feSJulian Elischer 	}
1752d3b5e418SDavid Xu }
17531434d3feSJulian Elischer 
17541434d3feSJulian Elischer /*
1755c76e33b6SJonathan Mini  * The extra work we go through if we are a threaded process when we
1756c76e33b6SJonathan Mini  * return to userland.
1757c76e33b6SJonathan Mini  *
1758c76e33b6SJonathan Mini  * If we are a KSE process and returning to user mode, check for
1759c76e33b6SJonathan Mini  * extra work to do before we return (e.g. for more syscalls
1760c76e33b6SJonathan Mini  * to complete first).  If we were in a critical section, we should
1761c76e33b6SJonathan Mini  * just return to let it finish. Same if we were in the UTS (in
1762c76e33b6SJonathan Mini  * which case the mailbox's context's busy indicator will be set).
1763c76e33b6SJonathan Mini  * The only traps we suport will have set the mailbox.
1764c76e33b6SJonathan Mini  * We will clear it here.
176544990b8cSJulian Elischer  */
1766c76e33b6SJonathan Mini int
1767253fdd5bSJulian Elischer thread_userret(struct thread *td, struct trapframe *frame)
1768c76e33b6SJonathan Mini {
17691ecb38a3SDavid Xu 	int error = 0, upcalls, uts_crit;
17705215b187SJeff Roberson 	struct kse_upcall *ku;
17710252d203SDavid Xu 	struct ksegrp *kg, *kg2;
177248bfcdddSJulian Elischer 	struct proc *p;
1773bfd83250SDavid Xu 	struct timespec ts;
1774c76e33b6SJonathan Mini 
17756f8132a8SJulian Elischer 	p = td->td_proc;
17765215b187SJeff Roberson 	kg = td->td_ksegrp;
1777cd4f6ebbSDavid Xu 	ku = td->td_upcall;
177893a7aa79SJulian Elischer 
1779cd4f6ebbSDavid Xu 	/* Nothing to do with bound thread */
1780cd4f6ebbSDavid Xu 	if (!(td->td_flags & TDF_SA))
17815215b187SJeff Roberson 		return (0);
17825215b187SJeff Roberson 
17835215b187SJeff Roberson 	/*
17845215b187SJeff Roberson 	 * Stat clock interrupt hit in userland, it
17855215b187SJeff Roberson 	 * is returning from interrupt, charge thread's
17865215b187SJeff Roberson 	 * userland time for UTS.
17875215b187SJeff Roberson 	 */
17885215b187SJeff Roberson 	if (td->td_flags & TDF_USTATCLOCK) {
17894b4866edSDavid Xu 		thread_update_usr_ticks(td, 1);
179093a7aa79SJulian Elischer 		mtx_lock_spin(&sched_lock);
17915215b187SJeff Roberson 		td->td_flags &= ~TDF_USTATCLOCK;
17920dbb100bSDavid Xu 		mtx_unlock_spin(&sched_lock);
17934b4866edSDavid Xu 		if (kg->kg_completed ||
17944b4866edSDavid Xu 		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
17954b4866edSDavid Xu 			thread_user_enter(p, td);
17965215b187SJeff Roberson 	}
17975215b187SJeff Roberson 
17981ecb38a3SDavid Xu 	uts_crit = (td->td_mailbox == NULL);
17995215b187SJeff Roberson 	/*
18005215b187SJeff Roberson 	 * Optimisation:
18015215b187SJeff Roberson 	 * This thread has not started any upcall.
18025215b187SJeff Roberson 	 * If there is no work to report other than ourself,
18035215b187SJeff Roberson 	 * then it can return direct to userland.
18045215b187SJeff Roberson 	 */
18055215b187SJeff Roberson 	if (TD_CAN_UNBIND(td)) {
18065215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
18075215b187SJeff Roberson 		td->td_flags &= ~TDF_CAN_UNBIND;
18084093529dSJeff Roberson 		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
180921e0492aSDavid Xu 		    (kg->kg_completed == NULL) &&
18106ce75196SDavid Xu 		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
181195bee4c3SDavid Xu 		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
18124093529dSJeff Roberson 			mtx_unlock_spin(&sched_lock);
18134b4866edSDavid Xu 			thread_update_usr_ticks(td, 0);
18149a4b78c9SDavid Xu 			nanotime(&ts);
18159a4b78c9SDavid Xu 			error = copyout(&ts,
18169a4b78c9SDavid Xu 				(caddr_t)&ku->ku_mailbox->km_timeofday,
18179a4b78c9SDavid Xu 				sizeof(ts));
181821e0492aSDavid Xu 			td->td_mailbox = 0;
18191ecb38a3SDavid Xu 			ku->ku_mflags = 0;
18209a4b78c9SDavid Xu 			if (error)
18219a4b78c9SDavid Xu 				goto out;
182293a7aa79SJulian Elischer 			return (0);
182393a7aa79SJulian Elischer 		}
18244093529dSJeff Roberson 		mtx_unlock_spin(&sched_lock);
1825dd7da9aaSDavid Xu 		thread_export_context(td, 0);
182693a7aa79SJulian Elischer 		/*
18275215b187SJeff Roberson 		 * There is something to report, and we own an upcall
18285215b187SJeff Roberson 		 * strucuture, we can go to userland.
18295215b187SJeff Roberson 		 * Turn ourself into an upcall thread.
183093a7aa79SJulian Elischer 		 */
18311d5a24beSDavid Xu 		td->td_pflags |= TDP_UPCALLING;
18321ecb38a3SDavid Xu 	} else if (td->td_mailbox && (ku == NULL)) {
1833dd7da9aaSDavid Xu 		thread_export_context(td, 1);
1834e574e444SDavid Xu 		PROC_LOCK(p);
18356f8132a8SJulian Elischer 		/*
18365215b187SJeff Roberson 		 * There are upcall threads waiting for
18375215b187SJeff Roberson 		 * work to do, wake one of them up.
18385215b187SJeff Roberson 		 * XXXKSE Maybe wake all of them up.
18396f8132a8SJulian Elischer 		 */
1840dd7da9aaSDavid Xu 		if (kg->kg_upsleeps)
18415215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
1842e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1843e574e444SDavid Xu 		thread_stopped(p);
184493a7aa79SJulian Elischer 		thread_exit();
18455215b187SJeff Roberson 		/* NOTREACHED */
184648bfcdddSJulian Elischer 	}
184793a7aa79SJulian Elischer 
1848cd4f6ebbSDavid Xu 	KASSERT(ku != NULL, ("upcall is NULL\n"));
1849a87891eeSDavid Xu 	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1850a87891eeSDavid Xu 
1851a87891eeSDavid Xu 	if (p->p_numthreads > max_threads_per_proc) {
1852a87891eeSDavid Xu 		max_threads_hits++;
1853a87891eeSDavid Xu 		PROC_LOCK(p);
1854112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
18557677ce18SDavid Xu 		p->p_maxthrwaits++;
1856a87891eeSDavid Xu 		while (p->p_numthreads > max_threads_per_proc) {
1857a87891eeSDavid Xu 			upcalls = 0;
1858a87891eeSDavid Xu 			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1859a87891eeSDavid Xu 				if (kg2->kg_numupcalls == 0)
1860a87891eeSDavid Xu 					upcalls++;
1861a87891eeSDavid Xu 				else
1862a87891eeSDavid Xu 					upcalls += kg2->kg_numupcalls;
1863a87891eeSDavid Xu 			}
1864a87891eeSDavid Xu 			if (upcalls >= max_threads_per_proc)
1865a87891eeSDavid Xu 				break;
18665073e68fSDavid Xu 			mtx_unlock_spin(&sched_lock);
186736407becSDavid Xu 			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
186855cdddc0SPeter Wemm 			    "maxthreads", 0)) {
18697677ce18SDavid Xu 				mtx_lock_spin(&sched_lock);
187036407becSDavid Xu 				break;
18717677ce18SDavid Xu 			} else {
1872112afcb2SJohn Baldwin 				mtx_lock_spin(&sched_lock);
1873a87891eeSDavid Xu 			}
18747677ce18SDavid Xu 		}
18757677ce18SDavid Xu 		p->p_maxthrwaits--;
1876112afcb2SJohn Baldwin 		mtx_unlock_spin(&sched_lock);
1877a87891eeSDavid Xu 		PROC_UNLOCK(p);
1878a87891eeSDavid Xu 	}
1879a87891eeSDavid Xu 
18801d5a24beSDavid Xu 	if (td->td_pflags & TDP_UPCALLING) {
18811ecb38a3SDavid Xu 		uts_crit = 0;
18826ce75196SDavid Xu 		kg->kg_nextupcall = ticks+kg->kg_upquantum;
188348bfcdddSJulian Elischer 		/*
188444990b8cSJulian Elischer 		 * There is no more work to do and we are going to ride
18855215b187SJeff Roberson 		 * this thread up to userland as an upcall.
188648bfcdddSJulian Elischer 		 * Do the last parts of the setup needed for the upcall.
188744990b8cSJulian Elischer 		 */
1888c76e33b6SJonathan Mini 		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1889ed32df81SJulian Elischer 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1890c76e33b6SJonathan Mini 
18911d5a24beSDavid Xu 		td->td_pflags &= ~TDP_UPCALLING;
1892cd4f6ebbSDavid Xu 		if (ku->ku_flags & KUF_DOUPCALL) {
18935215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
18945215b187SJeff Roberson 			ku->ku_flags &= ~KUF_DOUPCALL;
18955215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
1896cd4f6ebbSDavid Xu 		}
1897c76e33b6SJonathan Mini 		/*
18981ecb38a3SDavid Xu 		 * Set user context to the UTS
18991ecb38a3SDavid Xu 		 */
19001ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
19011ecb38a3SDavid Xu 			cpu_set_upcall_kse(td, ku);
19021ecb38a3SDavid Xu 			error = suword(&ku->ku_mailbox->km_curthread, 0);
19031ecb38a3SDavid Xu 			if (error)
19041ecb38a3SDavid Xu 				goto out;
19051ecb38a3SDavid Xu 		}
19061ecb38a3SDavid Xu 
19071ecb38a3SDavid Xu 		/*
190893a7aa79SJulian Elischer 		 * Unhook the list of completed threads.
190993a7aa79SJulian Elischer 		 * anything that completes after this gets to
191093a7aa79SJulian Elischer 		 * come in next time.
191193a7aa79SJulian Elischer 		 * Put the list of completed thread mailboxes on
191293a7aa79SJulian Elischer 		 * this KSE's mailbox.
1913c76e33b6SJonathan Mini 		 */
19141ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
19151ecb38a3SDavid Xu 		    (error = thread_link_mboxes(kg, ku)) != 0)
19160252d203SDavid Xu 			goto out;
19171ecb38a3SDavid Xu 	}
19181ecb38a3SDavid Xu 	if (!uts_crit) {
1919bfd83250SDavid Xu 		nanotime(&ts);
19201ecb38a3SDavid Xu 		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
1921bfd83250SDavid Xu 	}
19220252d203SDavid Xu 
19230252d203SDavid Xu out:
19240252d203SDavid Xu 	if (error) {
19253d0586d4SJulian Elischer 		/*
1926fc8cdd87SDavid Xu 		 * Things are going to be so screwed we should just kill
1927fc8cdd87SDavid Xu 		 * the process.
19283d0586d4SJulian Elischer 		 * how do we do that?
19293d0586d4SJulian Elischer 		 */
193048bfcdddSJulian Elischer 		PROC_LOCK(td->td_proc);
193148bfcdddSJulian Elischer 		psignal(td->td_proc, SIGSEGV);
193248bfcdddSJulian Elischer 		PROC_UNLOCK(td->td_proc);
19330252d203SDavid Xu 	} else {
19340252d203SDavid Xu 		/*
19350252d203SDavid Xu 		 * Optimisation:
19360252d203SDavid Xu 		 * Ensure that we have a spare thread available,
19370252d203SDavid Xu 		 * for when we re-enter the kernel.
19380252d203SDavid Xu 		 */
19390252d203SDavid Xu 		if (td->td_standin == NULL)
19400252d203SDavid Xu 			thread_alloc_spare(td, NULL);
19410252d203SDavid Xu 	}
19420252d203SDavid Xu 
19431ecb38a3SDavid Xu 	ku->ku_mflags = 0;
19440252d203SDavid Xu 	/*
19450252d203SDavid Xu 	 * Clear thread mailbox first, then clear system tick count.
19460252d203SDavid Xu 	 * The order is important because thread_statclock() use
19470252d203SDavid Xu 	 * mailbox pointer to see if it is an userland thread or
19480252d203SDavid Xu 	 * an UTS kernel thread.
19490252d203SDavid Xu 	 */
195093a7aa79SJulian Elischer 	td->td_mailbox = NULL;
19515215b187SJeff Roberson 	td->td_usticks = 0;
195248bfcdddSJulian Elischer 	return (error);	/* go sync */
195344990b8cSJulian Elischer }
195444990b8cSJulian Elischer 
195544990b8cSJulian Elischer /*
195644990b8cSJulian Elischer  * Enforce single-threading.
195744990b8cSJulian Elischer  *
195844990b8cSJulian Elischer  * Returns 1 if the caller must abort (another thread is waiting to
195944990b8cSJulian Elischer  * exit the process or similar). Process is locked!
196044990b8cSJulian Elischer  * Returns 0 when you are successfully the only thread running.
196144990b8cSJulian Elischer  * A process has successfully single threaded in the suspend mode when
196244990b8cSJulian Elischer  * There are no threads in user mode. Threads in the kernel must be
196344990b8cSJulian Elischer  * allowed to continue until they get to the user boundary. They may even
196444990b8cSJulian Elischer  * copy out their return values and data before suspending. They may however be
196544990b8cSJulian Elischer  * accellerated in reaching the user boundary as we will wake up
196644990b8cSJulian Elischer  * any sleeping threads that are interruptable. (PCATCH).
196744990b8cSJulian Elischer  */
196844990b8cSJulian Elischer int
196944990b8cSJulian Elischer thread_single(int force_exit)
197044990b8cSJulian Elischer {
197144990b8cSJulian Elischer 	struct thread *td;
197244990b8cSJulian Elischer 	struct thread *td2;
197344990b8cSJulian Elischer 	struct proc *p;
197444990b8cSJulian Elischer 
197544990b8cSJulian Elischer 	td = curthread;
197644990b8cSJulian Elischer 	p = td->td_proc;
197737814395SPeter Wemm 	mtx_assert(&Giant, MA_NOTOWNED);
197844990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
197944990b8cSJulian Elischer 	KASSERT((td != NULL), ("curthread is NULL"));
198044990b8cSJulian Elischer 
19810e2a4d3aSDavid Xu 	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
198244990b8cSJulian Elischer 		return (0);
198344990b8cSJulian Elischer 
1984e3b9bf71SJulian Elischer 	/* Is someone already single threading? */
1985e3b9bf71SJulian Elischer 	if (p->p_singlethread)
198644990b8cSJulian Elischer 		return (1);
198744990b8cSJulian Elischer 
198893a7aa79SJulian Elischer 	if (force_exit == SINGLE_EXIT) {
198944990b8cSJulian Elischer 		p->p_flag |= P_SINGLE_EXIT;
199093a7aa79SJulian Elischer 	} else
199144990b8cSJulian Elischer 		p->p_flag &= ~P_SINGLE_EXIT;
19921279572aSDavid Xu 	p->p_flag |= P_STOPPED_SINGLE;
199371fad9fdSJulian Elischer 	mtx_lock_spin(&sched_lock);
1994112afcb2SJohn Baldwin 	p->p_singlethread = td;
1995112afcb2SJohn Baldwin 	while ((p->p_numthreads - p->p_suspcount) != 1) {
199644990b8cSJulian Elischer 		FOREACH_THREAD_IN_PROC(p, td2) {
199744990b8cSJulian Elischer 			if (td2 == td)
199844990b8cSJulian Elischer 				continue;
1999588257e8SDavid Xu 			td2->td_flags |= TDF_ASTPENDING;
200071fad9fdSJulian Elischer 			if (TD_IS_INHIBITED(td2)) {
20011279572aSDavid Xu 				if (force_exit == SINGLE_EXIT) {
20029d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2)) {
200371fad9fdSJulian Elischer 						thread_unsuspend_one(td2);
200471fad9fdSJulian Elischer 					}
200533862f40SDavid Xu 					if (TD_ON_SLEEPQ(td2) &&
200633862f40SDavid Xu 					    (td2->td_flags & TDF_SINTR)) {
200744f3b092SJohn Baldwin 						sleepq_abort(td2);
200871fad9fdSJulian Elischer 					}
20099d102777SJulian Elischer 				} else {
20109d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2))
20119d102777SJulian Elischer 						continue;
20125215b187SJeff Roberson 					/*
20135215b187SJeff Roberson 					 * maybe other inhibitted states too?
20145215b187SJeff Roberson 					 * XXXKSE Is it totally safe to
20155215b187SJeff Roberson 					 * suspend a non-interruptable thread?
20165215b187SJeff Roberson 					 */
201793a7aa79SJulian Elischer 					if (td2->td_inhibitors &
20185215b187SJeff Roberson 					    (TDI_SLEEPING | TDI_SWAPPED))
20199d102777SJulian Elischer 						thread_suspend_one(td2);
202044990b8cSJulian Elischer 				}
202144990b8cSJulian Elischer 			}
20229d102777SJulian Elischer 		}
20239d102777SJulian Elischer 		/*
20249d102777SJulian Elischer 		 * Maybe we suspended some threads.. was it enough?
20259d102777SJulian Elischer 		 */
2026112afcb2SJohn Baldwin 		if ((p->p_numthreads - p->p_suspcount) == 1)
20279d102777SJulian Elischer 			break;
20289d102777SJulian Elischer 
202944990b8cSJulian Elischer 		/*
203044990b8cSJulian Elischer 		 * Wake us up when everyone else has suspended.
2031e3b9bf71SJulian Elischer 		 * In the mean time we suspend as well.
203244990b8cSJulian Elischer 		 */
203371fad9fdSJulian Elischer 		thread_suspend_one(td);
203444990b8cSJulian Elischer 		PROC_UNLOCK(p);
203529bcc451SJeff Roberson 		mi_switch(SW_VOL);
203644990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
203744990b8cSJulian Elischer 		PROC_LOCK(p);
2038112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
203944990b8cSJulian Elischer 	}
20405215b187SJeff Roberson 	if (force_exit == SINGLE_EXIT) {
2041112afcb2SJohn Baldwin 		if (td->td_upcall)
20425215b187SJeff Roberson 			upcall_remove(td);
20435c8329edSJulian Elischer 		kse_purge(p, td);
20445215b187SJeff Roberson 	}
2045112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
204644990b8cSJulian Elischer 	return (0);
204744990b8cSJulian Elischer }
204844990b8cSJulian Elischer 
204944990b8cSJulian Elischer /*
205044990b8cSJulian Elischer  * Called in from locations that can safely check to see
205144990b8cSJulian Elischer  * whether we have to suspend or at least throttle for a
205244990b8cSJulian Elischer  * single-thread event (e.g. fork).
205344990b8cSJulian Elischer  *
205444990b8cSJulian Elischer  * Such locations include userret().
205544990b8cSJulian Elischer  * If the "return_instead" argument is non zero, the thread must be able to
205644990b8cSJulian Elischer  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
205744990b8cSJulian Elischer  *
205844990b8cSJulian Elischer  * The 'return_instead' argument tells the function if it may do a
205944990b8cSJulian Elischer  * thread_exit() or suspend, or whether the caller must abort and back
206044990b8cSJulian Elischer  * out instead.
206144990b8cSJulian Elischer  *
206244990b8cSJulian Elischer  * If the thread that set the single_threading request has set the
206344990b8cSJulian Elischer  * P_SINGLE_EXIT bit in the process flags then this call will never return
206444990b8cSJulian Elischer  * if 'return_instead' is false, but will exit.
206544990b8cSJulian Elischer  *
206644990b8cSJulian Elischer  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
206744990b8cSJulian Elischer  *---------------+--------------------+---------------------
206844990b8cSJulian Elischer  *       0       | returns 0          |   returns 0 or 1
206944990b8cSJulian Elischer  *               | when ST ends       |   immediatly
207044990b8cSJulian Elischer  *---------------+--------------------+---------------------
207144990b8cSJulian Elischer  *       1       | thread exits       |   returns 1
207244990b8cSJulian Elischer  *               |                    |  immediatly
207344990b8cSJulian Elischer  * 0 = thread_exit() or suspension ok,
207444990b8cSJulian Elischer  * other = return error instead of stopping the thread.
207544990b8cSJulian Elischer  *
207644990b8cSJulian Elischer  * While a full suspension is under effect, even a single threading
207744990b8cSJulian Elischer  * thread would be suspended if it made this call (but it shouldn't).
207844990b8cSJulian Elischer  * This call should only be made from places where
207944990b8cSJulian Elischer  * thread_exit() would be safe as that may be the outcome unless
208044990b8cSJulian Elischer  * return_instead is set.
208144990b8cSJulian Elischer  */
208244990b8cSJulian Elischer int
208344990b8cSJulian Elischer thread_suspend_check(int return_instead)
208444990b8cSJulian Elischer {
2085ecafb24bSJuli Mallett 	struct thread *td;
2086ecafb24bSJuli Mallett 	struct proc *p;
208744990b8cSJulian Elischer 
208844990b8cSJulian Elischer 	td = curthread;
208944990b8cSJulian Elischer 	p = td->td_proc;
209037814395SPeter Wemm 	mtx_assert(&Giant, MA_NOTOWNED);
209144990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
209244990b8cSJulian Elischer 	while (P_SHOULDSTOP(p)) {
20931279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
209444990b8cSJulian Elischer 			KASSERT(p->p_singlethread != NULL,
209544990b8cSJulian Elischer 			    ("singlethread not set"));
209644990b8cSJulian Elischer 			/*
2097e3b9bf71SJulian Elischer 			 * The only suspension in action is a
2098e3b9bf71SJulian Elischer 			 * single-threading. Single threader need not stop.
2099b6d5995eSJulian Elischer 			 * XXX Should be safe to access unlocked
2100b6d5995eSJulian Elischer 			 * as it can only be set to be true by us.
210144990b8cSJulian Elischer 			 */
2102e3b9bf71SJulian Elischer 			if (p->p_singlethread == td)
210344990b8cSJulian Elischer 				return (0);	/* Exempt from stopping. */
210444990b8cSJulian Elischer 		}
2105e3b9bf71SJulian Elischer 		if (return_instead)
210644990b8cSJulian Elischer 			return (1);
210744990b8cSJulian Elischer 
2108e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
2109e574e444SDavid Xu 		thread_stopped(p);
211044990b8cSJulian Elischer 		/*
211144990b8cSJulian Elischer 		 * If the process is waiting for us to exit,
211244990b8cSJulian Elischer 		 * this thread should just suicide.
21131279572aSDavid Xu 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
211444990b8cSJulian Elischer 		 */
211544990b8cSJulian Elischer 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
21160e2a4d3aSDavid Xu 			if (p->p_flag & P_SA)
211744990b8cSJulian Elischer 				thread_exit();
21182c10d16aSJeff Roberson 			else
21192c10d16aSJeff Roberson 				thr_exit1();
212044990b8cSJulian Elischer 		}
212144990b8cSJulian Elischer 
212244990b8cSJulian Elischer 		/*
212344990b8cSJulian Elischer 		 * When a thread suspends, it just
212444990b8cSJulian Elischer 		 * moves to the processes's suspend queue
212544990b8cSJulian Elischer 		 * and stays there.
212644990b8cSJulian Elischer 		 */
212771fad9fdSJulian Elischer 		thread_suspend_one(td);
21281279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2129cf19bf91SJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
213071fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
2131cf19bf91SJulian Elischer 			}
2132cf19bf91SJulian Elischer 		}
2133a6f37ac9SJohn Baldwin 		PROC_UNLOCK(p);
213429bcc451SJeff Roberson 		mi_switch(SW_INVOL);
213544990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
213644990b8cSJulian Elischer 		PROC_LOCK(p);
213744990b8cSJulian Elischer 	}
213844990b8cSJulian Elischer 	return (0);
213944990b8cSJulian Elischer }
214044990b8cSJulian Elischer 
214135c32a76SDavid Xu void
214235c32a76SDavid Xu thread_suspend_one(struct thread *td)
214335c32a76SDavid Xu {
214435c32a76SDavid Xu 	struct proc *p = td->td_proc;
214535c32a76SDavid Xu 
214635c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2147112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
2148e574e444SDavid Xu 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
214935c32a76SDavid Xu 	p->p_suspcount++;
215071fad9fdSJulian Elischer 	TD_SET_SUSPENDED(td);
215135c32a76SDavid Xu 	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
215271fad9fdSJulian Elischer 	/*
215371fad9fdSJulian Elischer 	 * Hack: If we are suspending but are on the sleep queue
215471fad9fdSJulian Elischer 	 * then we are in msleep or the cv equivalent. We
215571fad9fdSJulian Elischer 	 * want to look like we have two Inhibitors.
21569d102777SJulian Elischer 	 * May already be set.. doesn't matter.
215771fad9fdSJulian Elischer 	 */
215871fad9fdSJulian Elischer 	if (TD_ON_SLEEPQ(td))
215971fad9fdSJulian Elischer 		TD_SET_SLEEPING(td);
216035c32a76SDavid Xu }
216135c32a76SDavid Xu 
216235c32a76SDavid Xu void
216335c32a76SDavid Xu thread_unsuspend_one(struct thread *td)
216435c32a76SDavid Xu {
216535c32a76SDavid Xu 	struct proc *p = td->td_proc;
216635c32a76SDavid Xu 
216735c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2168112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
216935c32a76SDavid Xu 	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
217071fad9fdSJulian Elischer 	TD_CLR_SUSPENDED(td);
217135c32a76SDavid Xu 	p->p_suspcount--;
217271fad9fdSJulian Elischer 	setrunnable(td);
217335c32a76SDavid Xu }
217435c32a76SDavid Xu 
217544990b8cSJulian Elischer /*
217644990b8cSJulian Elischer  * Allow all threads blocked by single threading to continue running.
217744990b8cSJulian Elischer  */
217844990b8cSJulian Elischer void
217944990b8cSJulian Elischer thread_unsuspend(struct proc *p)
218044990b8cSJulian Elischer {
218144990b8cSJulian Elischer 	struct thread *td;
218244990b8cSJulian Elischer 
2183b6d5995eSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
218444990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
218544990b8cSJulian Elischer 	if (!P_SHOULDSTOP(p)) {
218644990b8cSJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
218735c32a76SDavid Xu 			thread_unsuspend_one(td);
218844990b8cSJulian Elischer 		}
21891279572aSDavid Xu 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
219044990b8cSJulian Elischer 	    (p->p_numthreads == p->p_suspcount)) {
219144990b8cSJulian Elischer 		/*
219244990b8cSJulian Elischer 		 * Stopping everything also did the job for the single
219344990b8cSJulian Elischer 		 * threading request. Now we've downgraded to single-threaded,
219444990b8cSJulian Elischer 		 * let it continue.
219544990b8cSJulian Elischer 		 */
219635c32a76SDavid Xu 		thread_unsuspend_one(p->p_singlethread);
219744990b8cSJulian Elischer 	}
219844990b8cSJulian Elischer }
219944990b8cSJulian Elischer 
220044990b8cSJulian Elischer void
220144990b8cSJulian Elischer thread_single_end(void)
220244990b8cSJulian Elischer {
220344990b8cSJulian Elischer 	struct thread *td;
220444990b8cSJulian Elischer 	struct proc *p;
220544990b8cSJulian Elischer 
220644990b8cSJulian Elischer 	td = curthread;
220744990b8cSJulian Elischer 	p = td->td_proc;
220844990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
22091279572aSDavid Xu 	p->p_flag &= ~P_STOPPED_SINGLE;
2210112afcb2SJohn Baldwin 	mtx_lock_spin(&sched_lock);
221144990b8cSJulian Elischer 	p->p_singlethread = NULL;
221249539972SJulian Elischer 	/*
221349539972SJulian Elischer 	 * If there are other threads they mey now run,
221449539972SJulian Elischer 	 * unless of course there is a blanket 'stop order'
221549539972SJulian Elischer 	 * on the process. The single threader must be allowed
221649539972SJulian Elischer 	 * to continue however as this is a bad place to stop.
221749539972SJulian Elischer 	 */
221849539972SJulian Elischer 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
221949539972SJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
222071fad9fdSJulian Elischer 			thread_unsuspend_one(td);
222144990b8cSJulian Elischer 		}
222249539972SJulian Elischer 	}
2223112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
222449539972SJulian Elischer }
2225