xref: /freebsd/sys/kern/kern_thread.c (revision 37814395c12385fc105dde213843490046d9fade)
144990b8cSJulian Elischer /*
244990b8cSJulian Elischer  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
344990b8cSJulian Elischer  *  All rights reserved.
444990b8cSJulian Elischer  *
544990b8cSJulian Elischer  * Redistribution and use in source and binary forms, with or without
644990b8cSJulian Elischer  * modification, are permitted provided that the following conditions
744990b8cSJulian Elischer  * are met:
844990b8cSJulian Elischer  * 1. Redistributions of source code must retain the above copyright
944990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer as
1044990b8cSJulian Elischer  *    the first lines of this file unmodified other than the possible
1144990b8cSJulian Elischer  *    addition of one or more copyright notices.
1244990b8cSJulian Elischer  * 2. Redistributions in binary form must reproduce the above copyright
1344990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer in the
1444990b8cSJulian Elischer  *    documentation and/or other materials provided with the distribution.
1544990b8cSJulian Elischer  *
1644990b8cSJulian Elischer  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1744990b8cSJulian Elischer  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844990b8cSJulian Elischer  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1944990b8cSJulian Elischer  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2044990b8cSJulian Elischer  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2144990b8cSJulian Elischer  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2244990b8cSJulian Elischer  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2344990b8cSJulian Elischer  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2444990b8cSJulian Elischer  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2544990b8cSJulian Elischer  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2644990b8cSJulian Elischer  * DAMAGE.
2744990b8cSJulian Elischer  */
2844990b8cSJulian Elischer 
29677b542eSDavid E. O'Brien #include <sys/cdefs.h>
30677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
31677b542eSDavid E. O'Brien 
3244990b8cSJulian Elischer #include <sys/param.h>
3344990b8cSJulian Elischer #include <sys/systm.h>
3444990b8cSJulian Elischer #include <sys/kernel.h>
3544990b8cSJulian Elischer #include <sys/lock.h>
3644990b8cSJulian Elischer #include <sys/malloc.h>
3744990b8cSJulian Elischer #include <sys/mutex.h>
3844990b8cSJulian Elischer #include <sys/proc.h>
39904f1b77SJulian Elischer #include <sys/smp.h>
4044990b8cSJulian Elischer #include <sys/sysctl.h>
415c8329edSJulian Elischer #include <sys/sysproto.h>
4244990b8cSJulian Elischer #include <sys/filedesc.h>
43de028f5aSJeff Roberson #include <sys/sched.h>
4444990b8cSJulian Elischer #include <sys/signalvar.h>
4544f3b092SJohn Baldwin #include <sys/sleepqueue.h>
4644990b8cSJulian Elischer #include <sys/sx.h>
47de028f5aSJeff Roberson #include <sys/tty.h>
48961a7b24SJohn Baldwin #include <sys/turnstile.h>
4944990b8cSJulian Elischer #include <sys/user.h>
5044990b8cSJulian Elischer #include <sys/kse.h>
5144990b8cSJulian Elischer #include <sys/ktr.h>
52c76e33b6SJonathan Mini #include <sys/ucontext.h>
5344990b8cSJulian Elischer 
5444990b8cSJulian Elischer #include <vm/vm.h>
5549a2507bSAlan Cox #include <vm/vm_extern.h>
5644990b8cSJulian Elischer #include <vm/vm_object.h>
5744990b8cSJulian Elischer #include <vm/pmap.h>
5844990b8cSJulian Elischer #include <vm/uma.h>
5944990b8cSJulian Elischer #include <vm/vm_map.h>
6044990b8cSJulian Elischer 
6102fb42b0SPeter Wemm #include <machine/frame.h>
6202fb42b0SPeter Wemm 
6344990b8cSJulian Elischer /*
644f0db5e0SJulian Elischer  * KSEGRP related storage.
6544990b8cSJulian Elischer  */
664f0db5e0SJulian Elischer static uma_zone_t ksegrp_zone;
674f0db5e0SJulian Elischer static uma_zone_t kse_zone;
6844990b8cSJulian Elischer static uma_zone_t thread_zone;
695215b187SJeff Roberson static uma_zone_t upcall_zone;
7044990b8cSJulian Elischer 
714f0db5e0SJulian Elischer /* DEBUG ONLY */
7244990b8cSJulian Elischer SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
73696058c3SJulian Elischer static int thread_debug = 0;
74696058c3SJulian Elischer SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
75696058c3SJulian Elischer 	&thread_debug, 0, "thread debug");
76fdc5ecd2SDavid Xu 
775c29a450SDavid Xu static int max_threads_per_proc = 150;
78fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
794f0db5e0SJulian Elischer 	&max_threads_per_proc, 0, "Limit on threads per proc");
804f0db5e0SJulian Elischer 
815c29a450SDavid Xu static int max_groups_per_proc = 50;
82fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
83fdc5ecd2SDavid Xu 	&max_groups_per_proc, 0, "Limit on thread groups per proc");
84fdc5ecd2SDavid Xu 
850252d203SDavid Xu static int max_threads_hits;
860252d203SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
870252d203SDavid Xu 	&max_threads_hits, 0, "");
880252d203SDavid Xu 
895215b187SJeff Roberson static int virtual_cpu;
905215b187SJeff Roberson 
9144990b8cSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
9244990b8cSJulian Elischer 
935215b187SJeff Roberson TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
945c8329edSJulian Elischer TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
955c8329edSJulian Elischer TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
965215b187SJeff Roberson TAILQ_HEAD(, kse_upcall) zombie_upcalls =
975215b187SJeff Roberson 	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
985215b187SJeff Roberson struct mtx kse_zombie_lock;
995215b187SJeff Roberson MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
10044990b8cSJulian Elischer 
101696058c3SJulian Elischer static void kse_purge(struct proc *p, struct thread *td);
1025215b187SJeff Roberson static void kse_purge_group(struct thread *td);
1034b4866edSDavid Xu static int thread_update_usr_ticks(struct thread *td, int user);
1045215b187SJeff Roberson static void thread_alloc_spare(struct thread *td, struct thread *spare);
1055215b187SJeff Roberson 
1065215b187SJeff Roberson static int
1075215b187SJeff Roberson sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
1085215b187SJeff Roberson {
1095215b187SJeff Roberson 	int error, new_val;
1105215b187SJeff Roberson 	int def_val;
1115215b187SJeff Roberson 
1125215b187SJeff Roberson #ifdef SMP
1135215b187SJeff Roberson 	def_val = mp_ncpus;
1145215b187SJeff Roberson #else
1155215b187SJeff Roberson 	def_val = 1;
1165215b187SJeff Roberson #endif
1175215b187SJeff Roberson 	if (virtual_cpu == 0)
1185215b187SJeff Roberson 		new_val = def_val;
1195215b187SJeff Roberson 	else
1205215b187SJeff Roberson 		new_val = virtual_cpu;
1215215b187SJeff Roberson 	error = sysctl_handle_int(oidp, &new_val, 0, req);
1225215b187SJeff Roberson         if (error != 0 || req->newptr == NULL)
1235215b187SJeff Roberson 		return (error);
1245215b187SJeff Roberson 	if (new_val < 0)
1255215b187SJeff Roberson 		return (EINVAL);
1265215b187SJeff Roberson 	virtual_cpu = new_val;
1275215b187SJeff Roberson 	return (0);
1285215b187SJeff Roberson }
1295215b187SJeff Roberson 
1305215b187SJeff Roberson /* DEBUG ONLY */
1315215b187SJeff Roberson SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
1325215b187SJeff Roberson 	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
1335215b187SJeff Roberson 	"debug virtual cpus");
1345c8329edSJulian Elischer 
13544990b8cSJulian Elischer /*
136696058c3SJulian Elischer  * Prepare a thread for use.
13744990b8cSJulian Elischer  */
13844990b8cSJulian Elischer static void
13944990b8cSJulian Elischer thread_ctor(void *mem, int size, void *arg)
14044990b8cSJulian Elischer {
14144990b8cSJulian Elischer 	struct thread	*td;
14244990b8cSJulian Elischer 
14344990b8cSJulian Elischer 	td = (struct thread *)mem;
14471fad9fdSJulian Elischer 	td->td_state = TDS_INACTIVE;
145060563ecSJulian Elischer 	td->td_oncpu	= NOCPU;
146139b7550SJohn Baldwin 	td->td_critnest = 1;
14744990b8cSJulian Elischer }
14844990b8cSJulian Elischer 
14944990b8cSJulian Elischer /*
15044990b8cSJulian Elischer  * Reclaim a thread after use.
15144990b8cSJulian Elischer  */
15244990b8cSJulian Elischer static void
15344990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg)
15444990b8cSJulian Elischer {
15544990b8cSJulian Elischer 	struct thread	*td;
15644990b8cSJulian Elischer 
15744990b8cSJulian Elischer 	td = (struct thread *)mem;
15844990b8cSJulian Elischer 
15944990b8cSJulian Elischer #ifdef INVARIANTS
16044990b8cSJulian Elischer 	/* Verify that this thread is in a safe state to free. */
16144990b8cSJulian Elischer 	switch (td->td_state) {
16271fad9fdSJulian Elischer 	case TDS_INHIBITED:
16371fad9fdSJulian Elischer 	case TDS_RUNNING:
16471fad9fdSJulian Elischer 	case TDS_CAN_RUN:
16544990b8cSJulian Elischer 	case TDS_RUNQ:
16644990b8cSJulian Elischer 		/*
16744990b8cSJulian Elischer 		 * We must never unlink a thread that is in one of
16844990b8cSJulian Elischer 		 * these states, because it is currently active.
16944990b8cSJulian Elischer 		 */
17044990b8cSJulian Elischer 		panic("bad state for thread unlinking");
17144990b8cSJulian Elischer 		/* NOTREACHED */
17271fad9fdSJulian Elischer 	case TDS_INACTIVE:
17344990b8cSJulian Elischer 		break;
17444990b8cSJulian Elischer 	default:
17544990b8cSJulian Elischer 		panic("bad thread state");
17644990b8cSJulian Elischer 		/* NOTREACHED */
17744990b8cSJulian Elischer 	}
17844990b8cSJulian Elischer #endif
17944990b8cSJulian Elischer }
18044990b8cSJulian Elischer 
18144990b8cSJulian Elischer /*
18244990b8cSJulian Elischer  * Initialize type-stable parts of a thread (when newly created).
18344990b8cSJulian Elischer  */
18444990b8cSJulian Elischer static void
18544990b8cSJulian Elischer thread_init(void *mem, int size)
18644990b8cSJulian Elischer {
18744990b8cSJulian Elischer 	struct thread	*td;
18844990b8cSJulian Elischer 
18944990b8cSJulian Elischer 	td = (struct thread *)mem;
19049a2507bSAlan Cox 	vm_thread_new(td, 0);
19144990b8cSJulian Elischer 	cpu_thread_setup(td);
19244f3b092SJohn Baldwin 	td->td_sleepqueue = sleepq_alloc();
193961a7b24SJohn Baldwin 	td->td_turnstile = turnstile_alloc();
194de028f5aSJeff Roberson 	td->td_sched = (struct td_sched *)&td[1];
19544990b8cSJulian Elischer }
19644990b8cSJulian Elischer 
19744990b8cSJulian Elischer /*
19844990b8cSJulian Elischer  * Tear down type-stable parts of a thread (just before being discarded).
19944990b8cSJulian Elischer  */
20044990b8cSJulian Elischer static void
20144990b8cSJulian Elischer thread_fini(void *mem, int size)
20244990b8cSJulian Elischer {
20344990b8cSJulian Elischer 	struct thread	*td;
20444990b8cSJulian Elischer 
20544990b8cSJulian Elischer 	td = (struct thread *)mem;
206961a7b24SJohn Baldwin 	turnstile_free(td->td_turnstile);
20744f3b092SJohn Baldwin 	sleepq_free(td->td_sleepqueue);
20849a2507bSAlan Cox 	vm_thread_dispose(td);
20944990b8cSJulian Elischer }
2105215b187SJeff Roberson 
211de028f5aSJeff Roberson /*
212de028f5aSJeff Roberson  * Initialize type-stable parts of a kse (when newly created).
213de028f5aSJeff Roberson  */
214de028f5aSJeff Roberson static void
215de028f5aSJeff Roberson kse_init(void *mem, int size)
216de028f5aSJeff Roberson {
217de028f5aSJeff Roberson 	struct kse	*ke;
218de028f5aSJeff Roberson 
219de028f5aSJeff Roberson 	ke = (struct kse *)mem;
220de028f5aSJeff Roberson 	ke->ke_sched = (struct ke_sched *)&ke[1];
221de028f5aSJeff Roberson }
2225215b187SJeff Roberson 
223de028f5aSJeff Roberson /*
224de028f5aSJeff Roberson  * Initialize type-stable parts of a ksegrp (when newly created).
225de028f5aSJeff Roberson  */
226de028f5aSJeff Roberson static void
227de028f5aSJeff Roberson ksegrp_init(void *mem, int size)
228de028f5aSJeff Roberson {
229de028f5aSJeff Roberson 	struct ksegrp	*kg;
230de028f5aSJeff Roberson 
231de028f5aSJeff Roberson 	kg = (struct ksegrp *)mem;
232de028f5aSJeff Roberson 	kg->kg_sched = (struct kg_sched *)&kg[1];
233de028f5aSJeff Roberson }
23444990b8cSJulian Elischer 
23544990b8cSJulian Elischer /*
2365215b187SJeff Roberson  * KSE is linked into kse group.
2375c8329edSJulian Elischer  */
2385c8329edSJulian Elischer void
2395c8329edSJulian Elischer kse_link(struct kse *ke, struct ksegrp *kg)
2405c8329edSJulian Elischer {
2415c8329edSJulian Elischer 	struct proc *p = kg->kg_proc;
2425c8329edSJulian Elischer 
2435c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
2445c8329edSJulian Elischer 	kg->kg_kses++;
2455c8329edSJulian Elischer 	ke->ke_state	= KES_UNQUEUED;
2465c8329edSJulian Elischer 	ke->ke_proc	= p;
2475c8329edSJulian Elischer 	ke->ke_ksegrp	= kg;
2485c8329edSJulian Elischer 	ke->ke_thread	= NULL;
2495c8329edSJulian Elischer 	ke->ke_oncpu	= NOCPU;
2505215b187SJeff Roberson 	ke->ke_flags	= 0;
2515c8329edSJulian Elischer }
2525c8329edSJulian Elischer 
2535c8329edSJulian Elischer void
2545c8329edSJulian Elischer kse_unlink(struct kse *ke)
2555c8329edSJulian Elischer {
2565c8329edSJulian Elischer 	struct ksegrp *kg;
2575c8329edSJulian Elischer 
2585c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2595c8329edSJulian Elischer 	kg = ke->ke_ksegrp;
2605c8329edSJulian Elischer 	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
2615215b187SJeff Roberson 	if (ke->ke_state == KES_IDLE) {
2625215b187SJeff Roberson 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
2635215b187SJeff Roberson 		kg->kg_idle_kses--;
2646f8132a8SJulian Elischer 	}
265ab2baa72SDavid Xu 	--kg->kg_kses;
2665c8329edSJulian Elischer 	/*
2675c8329edSJulian Elischer 	 * Aggregate stats from the KSE
2685c8329edSJulian Elischer 	 */
2695c8329edSJulian Elischer 	kse_stash(ke);
2705c8329edSJulian Elischer }
2715c8329edSJulian Elischer 
2725c8329edSJulian Elischer void
2735c8329edSJulian Elischer ksegrp_link(struct ksegrp *kg, struct proc *p)
2745c8329edSJulian Elischer {
2755c8329edSJulian Elischer 
2765c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_threads);
2775c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
2785c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
2795c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
2805215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
2815215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
2825c8329edSJulian Elischer 	kg->kg_proc = p;
2835215b187SJeff Roberson 	/*
2845215b187SJeff Roberson 	 * the following counters are in the -zero- section
2855215b187SJeff Roberson 	 * and may not need clearing
2865215b187SJeff Roberson 	 */
2875c8329edSJulian Elischer 	kg->kg_numthreads = 0;
2885c8329edSJulian Elischer 	kg->kg_runnable   = 0;
2895c8329edSJulian Elischer 	kg->kg_kses       = 0;
2905c8329edSJulian Elischer 	kg->kg_runq_kses  = 0; /* XXXKSE change name */
2915215b187SJeff Roberson 	kg->kg_idle_kses  = 0;
2925215b187SJeff Roberson 	kg->kg_numupcalls = 0;
2935c8329edSJulian Elischer 	/* link it in now that it's consistent */
2945c8329edSJulian Elischer 	p->p_numksegrps++;
2955c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
2965c8329edSJulian Elischer }
2975c8329edSJulian Elischer 
2985c8329edSJulian Elischer void
2995c8329edSJulian Elischer ksegrp_unlink(struct ksegrp *kg)
3005c8329edSJulian Elischer {
3015c8329edSJulian Elischer 	struct proc *p;
3025c8329edSJulian Elischer 
3035c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
3045215b187SJeff Roberson 	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
3055215b187SJeff Roberson 	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
3065215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
3075215b187SJeff Roberson 
3085c8329edSJulian Elischer 	p = kg->kg_proc;
3095c8329edSJulian Elischer 	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
3105c8329edSJulian Elischer 	p->p_numksegrps--;
3115c8329edSJulian Elischer 	/*
3125c8329edSJulian Elischer 	 * Aggregate stats from the KSE
3135c8329edSJulian Elischer 	 */
3145c8329edSJulian Elischer 	ksegrp_stash(kg);
3155c8329edSJulian Elischer }
3165c8329edSJulian Elischer 
3175215b187SJeff Roberson struct kse_upcall *
3185215b187SJeff Roberson upcall_alloc(void)
3195215b187SJeff Roberson {
3205215b187SJeff Roberson 	struct kse_upcall *ku;
3215215b187SJeff Roberson 
32230621e14SDavid Xu 	ku = uma_zalloc(upcall_zone, M_WAITOK);
3235215b187SJeff Roberson 	bzero(ku, sizeof(*ku));
3245215b187SJeff Roberson 	return (ku);
3255215b187SJeff Roberson }
3265215b187SJeff Roberson 
3275215b187SJeff Roberson void
3285215b187SJeff Roberson upcall_free(struct kse_upcall *ku)
3295215b187SJeff Roberson {
3305215b187SJeff Roberson 
3315215b187SJeff Roberson 	uma_zfree(upcall_zone, ku);
3325215b187SJeff Roberson }
3335215b187SJeff Roberson 
3345215b187SJeff Roberson void
3355215b187SJeff Roberson upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
3365215b187SJeff Roberson {
3375215b187SJeff Roberson 
3385215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3395215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
3405215b187SJeff Roberson 	ku->ku_ksegrp = kg;
3415215b187SJeff Roberson 	kg->kg_numupcalls++;
3425215b187SJeff Roberson }
3435215b187SJeff Roberson 
3445215b187SJeff Roberson void
3455215b187SJeff Roberson upcall_unlink(struct kse_upcall *ku)
3465215b187SJeff Roberson {
3475215b187SJeff Roberson 	struct ksegrp *kg = ku->ku_ksegrp;
3485215b187SJeff Roberson 
3495215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3505215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
3515215b187SJeff Roberson 	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
3525215b187SJeff Roberson 	kg->kg_numupcalls--;
3535215b187SJeff Roberson 	upcall_stash(ku);
3545215b187SJeff Roberson }
3555215b187SJeff Roberson 
3565215b187SJeff Roberson void
3575215b187SJeff Roberson upcall_remove(struct thread *td)
3585215b187SJeff Roberson {
3595215b187SJeff Roberson 
3605215b187SJeff Roberson 	if (td->td_upcall) {
3615215b187SJeff Roberson 		td->td_upcall->ku_owner = NULL;
3625215b187SJeff Roberson 		upcall_unlink(td->td_upcall);
3635215b187SJeff Roberson 		td->td_upcall = 0;
3645215b187SJeff Roberson 	}
3655215b187SJeff Roberson }
3665215b187SJeff Roberson 
3675c8329edSJulian Elischer /*
3685215b187SJeff Roberson  * For a newly created process,
3695215b187SJeff Roberson  * link up all the structures and its initial threads etc.
3705c8329edSJulian Elischer  */
3715c8329edSJulian Elischer void
3725c8329edSJulian Elischer proc_linkup(struct proc *p, struct ksegrp *kg,
3735c8329edSJulian Elischer 	    struct kse *ke, struct thread *td)
3745c8329edSJulian Elischer {
3755c8329edSJulian Elischer 
3765c8329edSJulian Elischer 	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
3775c8329edSJulian Elischer 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
3785c8329edSJulian Elischer 	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
3795c8329edSJulian Elischer 	p->p_numksegrps = 0;
3805c8329edSJulian Elischer 	p->p_numthreads = 0;
3815c8329edSJulian Elischer 
3825c8329edSJulian Elischer 	ksegrp_link(kg, p);
3835c8329edSJulian Elischer 	kse_link(ke, kg);
3845c8329edSJulian Elischer 	thread_link(td, kg);
3855c8329edSJulian Elischer }
3865c8329edSJulian Elischer 
387702b2a17SMarcel Moolenaar #ifndef _SYS_SYSPROTO_H_
388702b2a17SMarcel Moolenaar struct kse_switchin_args {
389702b2a17SMarcel Moolenaar 	const struct __mcontext *mcp;
390702b2a17SMarcel Moolenaar 	long val;
391702b2a17SMarcel Moolenaar 	long *loc;
392702b2a17SMarcel Moolenaar };
393702b2a17SMarcel Moolenaar #endif
394702b2a17SMarcel Moolenaar 
395702b2a17SMarcel Moolenaar int
396702b2a17SMarcel Moolenaar kse_switchin(struct thread *td, struct kse_switchin_args *uap)
397702b2a17SMarcel Moolenaar {
398702b2a17SMarcel Moolenaar 	mcontext_t mc;
399702b2a17SMarcel Moolenaar 	int error;
400702b2a17SMarcel Moolenaar 
401702b2a17SMarcel Moolenaar 	error = (uap->mcp == NULL) ? EINVAL : 0;
402702b2a17SMarcel Moolenaar 	if (!error)
403702b2a17SMarcel Moolenaar 		error = copyin(uap->mcp, &mc, sizeof(mc));
404ccb46febSMarcel Moolenaar 	if (!error && uap->loc != NULL)
405ccb46febSMarcel Moolenaar 		error = (suword(uap->loc, uap->val) != 0) ? EINVAL : 0;
406702b2a17SMarcel Moolenaar 	if (!error)
407702b2a17SMarcel Moolenaar 		error = set_mcontext(td, &mc);
408702b2a17SMarcel Moolenaar 	return ((error == 0) ? EJUSTRETURN : error);
409702b2a17SMarcel Moolenaar }
410702b2a17SMarcel Moolenaar 
4115215b187SJeff Roberson /*
4125215b187SJeff Roberson struct kse_thr_interrupt_args {
4135215b187SJeff Roberson 	struct kse_thr_mailbox * tmbx;
414dd7da9aaSDavid Xu 	int cmd;
415dd7da9aaSDavid Xu 	long data;
4165215b187SJeff Roberson };
4175215b187SJeff Roberson */
4185c8329edSJulian Elischer int
4195c8329edSJulian Elischer kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
4205c8329edSJulian Elischer {
42134e80e02SDavid Xu 	struct proc *p;
42234e80e02SDavid Xu 	struct thread *td2;
4235c8329edSJulian Elischer 
424adac9400SDavid Xu 	p = td->td_proc;
425ab2baa72SDavid Xu 
426dd7da9aaSDavid Xu 	if (!(p->p_flag & P_SA))
4278db2431fSDavid Xu 		return (EINVAL);
4289dde3bc9SDavid Xu 
429dd7da9aaSDavid Xu 	switch (uap->cmd) {
430dd7da9aaSDavid Xu 	case KSE_INTR_SENDSIG:
431dd7da9aaSDavid Xu 		if (uap->data < 0 || uap->data > _SIG_MAXSIG)
432dd7da9aaSDavid Xu 			return (EINVAL);
433dd7da9aaSDavid Xu 	case KSE_INTR_INTERRUPT:
434dd7da9aaSDavid Xu 	case KSE_INTR_RESTART:
4359dde3bc9SDavid Xu 		PROC_LOCK(p);
43634e80e02SDavid Xu 		mtx_lock_spin(&sched_lock);
43734e80e02SDavid Xu 		FOREACH_THREAD_IN_PROC(p, td2) {
4389dde3bc9SDavid Xu 			if (td2->td_mailbox == uap->tmbx)
4399dde3bc9SDavid Xu 				break;
4409dde3bc9SDavid Xu 		}
4419dde3bc9SDavid Xu 		if (td2 == NULL) {
4429dde3bc9SDavid Xu 			mtx_unlock_spin(&sched_lock);
4439dde3bc9SDavid Xu 			PROC_UNLOCK(p);
4449dde3bc9SDavid Xu 			return (ESRCH);
4459dde3bc9SDavid Xu 		}
446dd7da9aaSDavid Xu 		if (uap->cmd == KSE_INTR_SENDSIG) {
447dd7da9aaSDavid Xu 			if (uap->data > 0) {
4489dde3bc9SDavid Xu 				td2->td_flags &= ~TDF_INTERRUPT;
4499dde3bc9SDavid Xu 				mtx_unlock_spin(&sched_lock);
450dd7da9aaSDavid Xu 				tdsignal(td2, (int)uap->data, SIGTARGET_TD);
451dd7da9aaSDavid Xu 			} else {
4529dde3bc9SDavid Xu 				mtx_unlock_spin(&sched_lock);
453dd7da9aaSDavid Xu 			}
4549dde3bc9SDavid Xu 		} else {
4559dde3bc9SDavid Xu 			td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
4569dde3bc9SDavid Xu 			if (TD_CAN_UNBIND(td2))
457df9c6cdaSDavid Xu 				td2->td_upcall->ku_flags |= KUF_DOUPCALL;
458dd7da9aaSDavid Xu 			if (uap->cmd == KSE_INTR_INTERRUPT)
4599dde3bc9SDavid Xu 				td2->td_intrval = EINTR;
460dd7da9aaSDavid Xu 			else
4619dde3bc9SDavid Xu 				td2->td_intrval = ERESTART;
46244f3b092SJohn Baldwin 			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
46344f3b092SJohn Baldwin 				sleepq_abort(td2);
46434e80e02SDavid Xu 			mtx_unlock_spin(&sched_lock);
4659dde3bc9SDavid Xu 		}
4669dde3bc9SDavid Xu 		PROC_UNLOCK(p);
467dd7da9aaSDavid Xu 		break;
468dd7da9aaSDavid Xu 	case KSE_INTR_SIGEXIT:
469dd7da9aaSDavid Xu 		if (uap->data < 1 || uap->data > _SIG_MAXSIG)
470dd7da9aaSDavid Xu 			return (EINVAL);
471dd7da9aaSDavid Xu 		PROC_LOCK(p);
472dd7da9aaSDavid Xu 		sigexit(td, (int)uap->data);
473dd7da9aaSDavid Xu 		break;
474dd7da9aaSDavid Xu 	default:
475dd7da9aaSDavid Xu 		return (EINVAL);
476dd7da9aaSDavid Xu 	}
4777b290dd0SDavid Xu 	return (0);
47834e80e02SDavid Xu }
4795c8329edSJulian Elischer 
4805215b187SJeff Roberson /*
4815215b187SJeff Roberson struct kse_exit_args {
4825215b187SJeff Roberson 	register_t dummy;
4835215b187SJeff Roberson };
4845215b187SJeff Roberson */
4855c8329edSJulian Elischer int
4865c8329edSJulian Elischer kse_exit(struct thread *td, struct kse_exit_args *uap)
4875c8329edSJulian Elischer {
4885c8329edSJulian Elischer 	struct proc *p;
4895c8329edSJulian Elischer 	struct ksegrp *kg;
490450c38d0SDavid Xu 	struct kse *ke;
4912b035cbeSJulian Elischer 	struct kse_upcall *ku, *ku2;
4922b035cbeSJulian Elischer 	int    error, count;
4935c8329edSJulian Elischer 
4945c8329edSJulian Elischer 	p = td->td_proc;
4952b035cbeSJulian Elischer 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
4967b290dd0SDavid Xu 		return (EINVAL);
4975c8329edSJulian Elischer 	kg = td->td_ksegrp;
4982b035cbeSJulian Elischer 	count = 0;
4995c8329edSJulian Elischer 	PROC_LOCK(p);
5005c8329edSJulian Elischer 	mtx_lock_spin(&sched_lock);
5012b035cbeSJulian Elischer 	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
5022b035cbeSJulian Elischer 		if (ku2->ku_flags & KUF_EXITING)
5032b035cbeSJulian Elischer 			count++;
5042b035cbeSJulian Elischer 	}
5052b035cbeSJulian Elischer 	if ((kg->kg_numupcalls - count) == 1 &&
5062b035cbeSJulian Elischer 	    (kg->kg_numthreads > 1)) {
5075c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5085c8329edSJulian Elischer 		PROC_UNLOCK(p);
5095c8329edSJulian Elischer 		return (EDEADLK);
5105c8329edSJulian Elischer 	}
5112b035cbeSJulian Elischer 	ku->ku_flags |= KUF_EXITING;
5122b035cbeSJulian Elischer 	mtx_unlock_spin(&sched_lock);
5132b035cbeSJulian Elischer 	PROC_UNLOCK(p);
5142b035cbeSJulian Elischer 	error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
5152b035cbeSJulian Elischer 	PROC_LOCK(p);
5162b035cbeSJulian Elischer 	if (error)
5172b035cbeSJulian Elischer 		psignal(p, SIGSEGV);
5182b035cbeSJulian Elischer 	mtx_lock_spin(&sched_lock);
5195215b187SJeff Roberson 	upcall_remove(td);
5202b035cbeSJulian Elischer 	ke = td->td_kse;
521450c38d0SDavid Xu 	if (p->p_numthreads == 1) {
5225215b187SJeff Roberson 		kse_purge(p, td);
5230e2a4d3aSDavid Xu 		p->p_flag &= ~P_SA;
5245c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5255c8329edSJulian Elischer 		PROC_UNLOCK(p);
5265c8329edSJulian Elischer 	} else {
5275215b187SJeff Roberson 		if (kg->kg_numthreads == 1) { /* Shutdown a group */
5285215b187SJeff Roberson 			kse_purge_group(td);
529450c38d0SDavid Xu 			ke->ke_flags |= KEF_EXIT;
5305215b187SJeff Roberson 		}
531e574e444SDavid Xu 		thread_stopped(p);
5325c8329edSJulian Elischer 		thread_exit();
5335c8329edSJulian Elischer 		/* NOTREACHED */
5345c8329edSJulian Elischer 	}
5357b290dd0SDavid Xu 	return (0);
5365c8329edSJulian Elischer }
5375c8329edSJulian Elischer 
538696058c3SJulian Elischer /*
53993a7aa79SJulian Elischer  * Either becomes an upcall or waits for an awakening event and
5405215b187SJeff Roberson  * then becomes an upcall. Only error cases return.
5415215b187SJeff Roberson  */
5425215b187SJeff Roberson /*
5435215b187SJeff Roberson struct kse_release_args {
544eb117d5cSDavid Xu 	struct timespec *timeout;
5455215b187SJeff Roberson };
546696058c3SJulian Elischer */
5475c8329edSJulian Elischer int
5485c8329edSJulian Elischer kse_release(struct thread *td, struct kse_release_args *uap)
5495c8329edSJulian Elischer {
5505c8329edSJulian Elischer 	struct proc *p;
551696058c3SJulian Elischer 	struct ksegrp *kg;
552cd4f6ebbSDavid Xu 	struct kse_upcall *ku;
553cd4f6ebbSDavid Xu 	struct timespec timeout;
554eb117d5cSDavid Xu 	struct timeval tv;
5559dde3bc9SDavid Xu 	sigset_t sigset;
556eb117d5cSDavid Xu 	int error;
5575c8329edSJulian Elischer 
5585c8329edSJulian Elischer 	p = td->td_proc;
559696058c3SJulian Elischer 	kg = td->td_ksegrp;
560cd4f6ebbSDavid Xu 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
5615c8329edSJulian Elischer 		return (EINVAL);
562eb117d5cSDavid Xu 	if (uap->timeout != NULL) {
563eb117d5cSDavid Xu 		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
564eb117d5cSDavid Xu 			return (error);
565eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
566eb117d5cSDavid Xu 	}
567cd4f6ebbSDavid Xu 	if (td->td_flags & TDF_SA)
5681d5a24beSDavid Xu 		td->td_pflags |= TDP_UPCALLING;
5699dde3bc9SDavid Xu 	else {
5709dde3bc9SDavid Xu 		ku->ku_mflags = fuword(&ku->ku_mailbox->km_flags);
5719dde3bc9SDavid Xu 		if (ku->ku_mflags == -1) {
572eb117d5cSDavid Xu 			PROC_LOCK(p);
5739dde3bc9SDavid Xu 			sigexit(td, SIGSEGV);
5749dde3bc9SDavid Xu 		}
5759dde3bc9SDavid Xu 	}
5769dde3bc9SDavid Xu 	PROC_LOCK(p);
5779dde3bc9SDavid Xu 	if (ku->ku_mflags & KMF_WAITSIGEVENT) {
5789dde3bc9SDavid Xu 		/* UTS wants to wait for signal event */
5799dde3bc9SDavid Xu 		if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL))
5809dde3bc9SDavid Xu 			error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
5819dde3bc9SDavid Xu 			    "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
5829dde3bc9SDavid Xu 		p->p_flag &= ~P_SIGEVENT;
5839dde3bc9SDavid Xu 		sigset = p->p_siglist;
5849dde3bc9SDavid Xu 		PROC_UNLOCK(p);
5859dde3bc9SDavid Xu 		error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
5869dde3bc9SDavid Xu 		    sizeof(sigset));
5879dde3bc9SDavid Xu 	} else {
5889dde3bc9SDavid Xu 		 if (! kg->kg_completed && !(ku->ku_flags & KUF_DOUPCALL)) {
5895215b187SJeff Roberson 			kg->kg_upsleeps++;
5909dde3bc9SDavid Xu 			error = msleep(&kg->kg_completed, &p->p_mtx,
5919dde3bc9SDavid Xu 				PPAUSE|PCATCH, "kserel",
5929dde3bc9SDavid Xu 				(uap->timeout ? tvtohz(&tv) : 0));
5935215b187SJeff Roberson 			kg->kg_upsleeps--;
594cd4f6ebbSDavid Xu 		}
5959dde3bc9SDavid Xu 		PROC_UNLOCK(p);
5969dde3bc9SDavid Xu 	}
597cd4f6ebbSDavid Xu 	if (ku->ku_flags & KUF_DOUPCALL) {
598cd4f6ebbSDavid Xu 		mtx_lock_spin(&sched_lock);
599cd4f6ebbSDavid Xu 		ku->ku_flags &= ~KUF_DOUPCALL;
600cd4f6ebbSDavid Xu 		mtx_unlock_spin(&sched_lock);
60193a7aa79SJulian Elischer 	}
602696058c3SJulian Elischer 	return (0);
6035c8329edSJulian Elischer }
6045c8329edSJulian Elischer 
6055c8329edSJulian Elischer /* struct kse_wakeup_args {
6065c8329edSJulian Elischer 	struct kse_mailbox *mbx;
6075c8329edSJulian Elischer }; */
6085c8329edSJulian Elischer int
6095c8329edSJulian Elischer kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
6105c8329edSJulian Elischer {
6115c8329edSJulian Elischer 	struct proc *p;
6125c8329edSJulian Elischer 	struct ksegrp *kg;
6135215b187SJeff Roberson 	struct kse_upcall *ku;
61493a7aa79SJulian Elischer 	struct thread *td2;
6155c8329edSJulian Elischer 
6165c8329edSJulian Elischer 	p = td->td_proc;
61793a7aa79SJulian Elischer 	td2 = NULL;
6185215b187SJeff Roberson 	ku = NULL;
6195c8329edSJulian Elischer 	/* KSE-enabled processes only, please. */
6200e2a4d3aSDavid Xu 	if (!(p->p_flag & P_SA))
6215215b187SJeff Roberson 		return (EINVAL);
6225215b187SJeff Roberson 	PROC_LOCK(p);
62303ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
6245c8329edSJulian Elischer 	if (uap->mbx) {
6255c8329edSJulian Elischer 		FOREACH_KSEGRP_IN_PROC(p, kg) {
6265215b187SJeff Roberson 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
6275215b187SJeff Roberson 				if (ku->ku_mailbox == uap->mbx)
62893a7aa79SJulian Elischer 					break;
62993a7aa79SJulian Elischer 			}
6305215b187SJeff Roberson 			if (ku)
63193a7aa79SJulian Elischer 				break;
6325c8329edSJulian Elischer 		}
6335c8329edSJulian Elischer 	} else {
6345c8329edSJulian Elischer 		kg = td->td_ksegrp;
6355215b187SJeff Roberson 		if (kg->kg_upsleeps) {
6365215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
6375215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
6385215b187SJeff Roberson 			PROC_UNLOCK(p);
6395215b187SJeff Roberson 			return (0);
6405c8329edSJulian Elischer 		}
6415215b187SJeff Roberson 		ku = TAILQ_FIRST(&kg->kg_upcalls);
6425c8329edSJulian Elischer 	}
6435215b187SJeff Roberson 	if (ku) {
6445215b187SJeff Roberson 		if ((td2 = ku->ku_owner) == NULL) {
6455215b187SJeff Roberson 			panic("%s: no owner", __func__);
646707559e4SJohn Baldwin 		} else if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) &&
6479dde3bc9SDavid Xu 		           ((td2->td_wchan == &kg->kg_completed) ||
6489dde3bc9SDavid Xu 			    (td2->td_wchan == &p->p_siglist &&
6499dde3bc9SDavid Xu 			     (ku->ku_mflags & KMF_WAITSIGEVENT)))) {
65044f3b092SJohn Baldwin 			sleepq_abort(td2);
6515215b187SJeff Roberson 		} else {
6525215b187SJeff Roberson 			ku->ku_flags |= KUF_DOUPCALL;
65303ea4720SJulian Elischer 		}
6545c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
6555215b187SJeff Roberson 		PROC_UNLOCK(p);
6567b290dd0SDavid Xu 		return (0);
6575c8329edSJulian Elischer 	}
65893a7aa79SJulian Elischer 	mtx_unlock_spin(&sched_lock);
6595215b187SJeff Roberson 	PROC_UNLOCK(p);
66093a7aa79SJulian Elischer 	return (ESRCH);
66193a7aa79SJulian Elischer }
6625c8329edSJulian Elischer 
6635c8329edSJulian Elischer /*
6645c8329edSJulian Elischer  * No new KSEG: first call: use current KSE, don't schedule an upcall
6655215b187SJeff Roberson  * All other situations, do allocate max new KSEs and schedule an upcall.
6665c8329edSJulian Elischer  */
6675c8329edSJulian Elischer /* struct kse_create_args {
6685c8329edSJulian Elischer 	struct kse_mailbox *mbx;
6695c8329edSJulian Elischer 	int newgroup;
6705c8329edSJulian Elischer }; */
6715c8329edSJulian Elischer int
6725c8329edSJulian Elischer kse_create(struct thread *td, struct kse_create_args *uap)
6735c8329edSJulian Elischer {
6745c8329edSJulian Elischer 	struct kse *newke;
6755c8329edSJulian Elischer 	struct ksegrp *newkg;
6765c8329edSJulian Elischer 	struct ksegrp *kg;
6775c8329edSJulian Elischer 	struct proc *p;
6785c8329edSJulian Elischer 	struct kse_mailbox mbx;
6795215b187SJeff Roberson 	struct kse_upcall *newku;
680cd4f6ebbSDavid Xu 	int err, ncpus, sa = 0, first = 0;
681cd4f6ebbSDavid Xu 	struct thread *newtd;
6825c8329edSJulian Elischer 
6835c8329edSJulian Elischer 	p = td->td_proc;
6845c8329edSJulian Elischer 	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
6855c8329edSJulian Elischer 		return (err);
6865c8329edSJulian Elischer 
6875215b187SJeff Roberson 	/* Too bad, why hasn't kernel always a cpu counter !? */
6885215b187SJeff Roberson #ifdef SMP
6895215b187SJeff Roberson 	ncpus = mp_ncpus;
6905215b187SJeff Roberson #else
6915215b187SJeff Roberson 	ncpus = 1;
6925215b187SJeff Roberson #endif
693cd4f6ebbSDavid Xu 	if (virtual_cpu != 0)
6945215b187SJeff Roberson 		ncpus = virtual_cpu;
695cd4f6ebbSDavid Xu 	if (!(mbx.km_flags & KMF_BOUND))
696cd4f6ebbSDavid Xu 		sa = TDF_SA;
697075102ccSDavid Xu 	else
698075102ccSDavid Xu 		ncpus = 1;
699661db6daSDavid Xu 	PROC_LOCK(p);
700cd4f6ebbSDavid Xu 	if (!(p->p_flag & P_SA)) {
701cd4f6ebbSDavid Xu 		first = 1;
7020e2a4d3aSDavid Xu 		p->p_flag |= P_SA;
703cd4f6ebbSDavid Xu 	}
704661db6daSDavid Xu 	PROC_UNLOCK(p);
705cd4f6ebbSDavid Xu 	if (!sa && !uap->newgroup && !first)
706cd4f6ebbSDavid Xu 		return (EINVAL);
7075c8329edSJulian Elischer 	kg = td->td_ksegrp;
7085c8329edSJulian Elischer 	if (uap->newgroup) {
7095215b187SJeff Roberson 		/* Have race condition but it is cheap */
710fdc5ecd2SDavid Xu 		if (p->p_numksegrps >= max_groups_per_proc)
711fdc5ecd2SDavid Xu 			return (EPROCLIM);
7125c8329edSJulian Elischer 		/*
7135c8329edSJulian Elischer 		 * If we want a new KSEGRP it doesn't matter whether
7145c8329edSJulian Elischer 		 * we have already fired up KSE mode before or not.
7155215b187SJeff Roberson 		 * We put the process in KSE mode and create a new KSEGRP.
7165c8329edSJulian Elischer 		 */
7175c8329edSJulian Elischer 		newkg = ksegrp_alloc();
7185c8329edSJulian Elischer 		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
7195c8329edSJulian Elischer 		      kg_startzero, kg_endzero));
7205c8329edSJulian Elischer 		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
7215c8329edSJulian Elischer 		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
722ab2baa72SDavid Xu 		PROC_LOCK(p);
7235215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
7245215b187SJeff Roberson 		if (p->p_numksegrps >= max_groups_per_proc) {
7255215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
726ab2baa72SDavid Xu 			PROC_UNLOCK(p);
7279948c47fSDavid Xu 			ksegrp_free(newkg);
7286f8132a8SJulian Elischer 			return (EPROCLIM);
7296f8132a8SJulian Elischer 		}
7309948c47fSDavid Xu 		ksegrp_link(newkg, p);
731ab2baa72SDavid Xu 		sched_fork_ksegrp(kg, newkg);
7325215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
733ab2baa72SDavid Xu 		PROC_UNLOCK(p);
7346f8132a8SJulian Elischer 	} else {
7354184d791SDavid Xu 		if (!first && ((td->td_flags & TDF_SA) ^ sa) != 0)
7364184d791SDavid Xu 			return (EINVAL);
7375215b187SJeff Roberson 		newkg = kg;
7386f8132a8SJulian Elischer 	}
7395215b187SJeff Roberson 
7405215b187SJeff Roberson 	/*
7415215b187SJeff Roberson 	 * Creating upcalls more than number of physical cpu does
7425215b187SJeff Roberson 	 * not help performance.
7435215b187SJeff Roberson 	 */
7445215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus)
7455215b187SJeff Roberson 		return (EPROCLIM);
7465215b187SJeff Roberson 
7475215b187SJeff Roberson 	if (newkg->kg_numupcalls == 0) {
7485215b187SJeff Roberson 		/*
749cd4f6ebbSDavid Xu 		 * Initialize KSE group
750cd4f6ebbSDavid Xu 		 *
751cd4f6ebbSDavid Xu 		 * For multiplxed group, create KSEs as many as physical
752cd4f6ebbSDavid Xu 		 * cpus. This increases concurrent even if userland
753cd4f6ebbSDavid Xu 		 * is not MP safe and can only run on single CPU.
7545215b187SJeff Roberson 		 * In ideal world, every physical cpu should execute a thread.
7555215b187SJeff Roberson 		 * If there is enough KSEs, threads in kernel can be
7565215b187SJeff Roberson 		 * executed parallel on different cpus with full speed,
7575215b187SJeff Roberson 		 * Concurrent in kernel shouldn't be restricted by number of
758cd4f6ebbSDavid Xu 		 * upcalls userland provides. Adding more upcall structures
759cd4f6ebbSDavid Xu 		 * only increases concurrent in userland.
760cd4f6ebbSDavid Xu 		 *
761cd4f6ebbSDavid Xu 		 * For bound thread group, because there is only thread in the
762cd4f6ebbSDavid Xu 		 * group, we only create one KSE for the group. Thread in this
763cd4f6ebbSDavid Xu 		 * kind of group will never schedule an upcall when blocked,
764cd4f6ebbSDavid Xu 		 * this intends to simulate pthread system scope thread.
7655215b187SJeff Roberson 		 */
7665215b187SJeff Roberson 		while (newkg->kg_kses < ncpus) {
7675215b187SJeff Roberson 			newke = kse_alloc();
7685c8329edSJulian Elischer 			bzero(&newke->ke_startzero, RANGEOF(struct kse,
7695c8329edSJulian Elischer 			      ke_startzero, ke_endzero));
7705c8329edSJulian Elischer #if 0
7715215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
7725c8329edSJulian Elischer 			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
7735c8329edSJulian Elischer 			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
7746f8132a8SJulian Elischer 			mtx_unlock_spin(&sched_lock);
7755215b187SJeff Roberson #endif
7765215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
7775c8329edSJulian Elischer 			kse_link(newke, newkg);
778ab2baa72SDavid Xu 			sched_fork_kse(td->td_kse, newke);
7795215b187SJeff Roberson 			/* Add engine */
7805215b187SJeff Roberson 			kse_reassign(newke);
7815c8329edSJulian Elischer 			mtx_unlock_spin(&sched_lock);
7825215b187SJeff Roberson 		}
7835215b187SJeff Roberson 	}
7845215b187SJeff Roberson 	newku = upcall_alloc();
7855215b187SJeff Roberson 	newku->ku_mailbox = uap->mbx;
7865215b187SJeff Roberson 	newku->ku_func = mbx.km_func;
7875215b187SJeff Roberson 	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
7885215b187SJeff Roberson 
7895215b187SJeff Roberson 	/* For the first call this may not have been set */
7905215b187SJeff Roberson 	if (td->td_standin == NULL)
7915215b187SJeff Roberson 		thread_alloc_spare(td, NULL);
7925215b187SJeff Roberson 
7939dde3bc9SDavid Xu 	PROC_LOCK(p);
7945215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus) {
7959dde3bc9SDavid Xu 		PROC_UNLOCK(p);
7963b3df40fSDavid Xu 		upcall_free(newku);
7975215b187SJeff Roberson 		return (EPROCLIM);
7985215b187SJeff Roberson 	}
799af161f22SDavid Xu 	if (first && sa) {
8009dde3bc9SDavid Xu 		SIGSETOR(p->p_siglist, td->td_siglist);
8019dde3bc9SDavid Xu 		SIGEMPTYSET(td->td_siglist);
8029dde3bc9SDavid Xu 		SIGFILLSET(td->td_sigmask);
8039dde3bc9SDavid Xu 		SIG_CANTMASK(td->td_sigmask);
8049dde3bc9SDavid Xu 	}
8059dde3bc9SDavid Xu 	mtx_lock_spin(&sched_lock);
8069dde3bc9SDavid Xu 	PROC_UNLOCK(p);
8075215b187SJeff Roberson 	upcall_link(newku, newkg);
8086ce75196SDavid Xu 	if (mbx.km_quantum)
8096ce75196SDavid Xu 		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
8105215b187SJeff Roberson 
8115215b187SJeff Roberson 	/*
8125215b187SJeff Roberson 	 * Each upcall structure has an owner thread, find which
8135215b187SJeff Roberson 	 * one owns it.
8145215b187SJeff Roberson 	 */
8155215b187SJeff Roberson 	if (uap->newgroup) {
8165215b187SJeff Roberson 		/*
8175215b187SJeff Roberson 		 * Because new ksegrp hasn't thread,
8185215b187SJeff Roberson 		 * create an initial upcall thread to own it.
8195215b187SJeff Roberson 		 */
820cd4f6ebbSDavid Xu 		newtd = thread_schedule_upcall(td, newku);
8215c8329edSJulian Elischer 	} else {
8225c8329edSJulian Elischer 		/*
8235215b187SJeff Roberson 		 * If current thread hasn't an upcall structure,
8245215b187SJeff Roberson 		 * just assign the upcall to it.
8255c8329edSJulian Elischer 		 */
8265215b187SJeff Roberson 		if (td->td_upcall == NULL) {
8275215b187SJeff Roberson 			newku->ku_owner = td;
8285215b187SJeff Roberson 			td->td_upcall = newku;
829cd4f6ebbSDavid Xu 			newtd = td;
8305215b187SJeff Roberson 		} else {
8315c8329edSJulian Elischer 			/*
8325215b187SJeff Roberson 			 * Create a new upcall thread to own it.
8335c8329edSJulian Elischer 			 */
834cd4f6ebbSDavid Xu 			newtd = thread_schedule_upcall(td, newku);
8355215b187SJeff Roberson 		}
8365215b187SJeff Roberson 	}
837cd4f6ebbSDavid Xu 	if (!sa) {
838cd4f6ebbSDavid Xu 		newtd->td_mailbox = mbx.km_curthread;
839cd4f6ebbSDavid Xu 		newtd->td_flags &= ~TDF_SA;
840ab78d4d6SDavid Xu 		if (newtd != td) {
841ab78d4d6SDavid Xu 			mtx_unlock_spin(&sched_lock);
842ab78d4d6SDavid Xu 			cpu_set_upcall_kse(newtd, newku);
843ab78d4d6SDavid Xu 			mtx_lock_spin(&sched_lock);
844ab78d4d6SDavid Xu 		}
845cd4f6ebbSDavid Xu 	} else {
846cd4f6ebbSDavid Xu 		newtd->td_flags |= TDF_SA;
847cd4f6ebbSDavid Xu 	}
848ab78d4d6SDavid Xu 	if (newtd != td)
849ab78d4d6SDavid Xu 		setrunqueue(newtd);
8505215b187SJeff Roberson 	mtx_unlock_spin(&sched_lock);
8515c8329edSJulian Elischer 	return (0);
8525c8329edSJulian Elischer }
8535c8329edSJulian Elischer 
8545c8329edSJulian Elischer /*
85544990b8cSJulian Elischer  * Initialize global thread allocation resources.
85644990b8cSJulian Elischer  */
85744990b8cSJulian Elischer void
85844990b8cSJulian Elischer threadinit(void)
85944990b8cSJulian Elischer {
86044990b8cSJulian Elischer 
861de028f5aSJeff Roberson 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
86244990b8cSJulian Elischer 	    thread_ctor, thread_dtor, thread_init, thread_fini,
86344990b8cSJulian Elischer 	    UMA_ALIGN_CACHE, 0);
864de028f5aSJeff Roberson 	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
865de028f5aSJeff Roberson 	    NULL, NULL, ksegrp_init, NULL,
8664f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
867de028f5aSJeff Roberson 	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
868de028f5aSJeff Roberson 	    NULL, NULL, kse_init, NULL,
8694f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
8705215b187SJeff Roberson 	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
8715215b187SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
87244990b8cSJulian Elischer }
87344990b8cSJulian Elischer 
87444990b8cSJulian Elischer /*
8751faf202eSJulian Elischer  * Stash an embarasingly extra thread into the zombie thread queue.
87644990b8cSJulian Elischer  */
87744990b8cSJulian Elischer void
87844990b8cSJulian Elischer thread_stash(struct thread *td)
87944990b8cSJulian Elischer {
8805215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
88144990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
8825215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
88344990b8cSJulian Elischer }
88444990b8cSJulian Elischer 
88544990b8cSJulian Elischer /*
8865c8329edSJulian Elischer  * Stash an embarasingly extra kse into the zombie kse queue.
8875c8329edSJulian Elischer  */
8885c8329edSJulian Elischer void
8895c8329edSJulian Elischer kse_stash(struct kse *ke)
8905c8329edSJulian Elischer {
8915215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8925c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
8935215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8945215b187SJeff Roberson }
8955215b187SJeff Roberson 
8965215b187SJeff Roberson /*
8975215b187SJeff Roberson  * Stash an embarasingly extra upcall into the zombie upcall queue.
8985215b187SJeff Roberson  */
8995215b187SJeff Roberson 
9005215b187SJeff Roberson void
9015215b187SJeff Roberson upcall_stash(struct kse_upcall *ku)
9025215b187SJeff Roberson {
9035215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
9045215b187SJeff Roberson 	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
9055215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
9065c8329edSJulian Elischer }
9075c8329edSJulian Elischer 
9085c8329edSJulian Elischer /*
9095c8329edSJulian Elischer  * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
9105c8329edSJulian Elischer  */
9115c8329edSJulian Elischer void
9125c8329edSJulian Elischer ksegrp_stash(struct ksegrp *kg)
9135c8329edSJulian Elischer {
9145215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
9155c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
9165215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
9175c8329edSJulian Elischer }
9185c8329edSJulian Elischer 
9195c8329edSJulian Elischer /*
9205215b187SJeff Roberson  * Reap zombie kse resource.
92144990b8cSJulian Elischer  */
92244990b8cSJulian Elischer void
92344990b8cSJulian Elischer thread_reap(void)
92444990b8cSJulian Elischer {
9255c8329edSJulian Elischer 	struct thread *td_first, *td_next;
9265c8329edSJulian Elischer 	struct kse *ke_first, *ke_next;
9275c8329edSJulian Elischer 	struct ksegrp *kg_first, * kg_next;
9285215b187SJeff Roberson 	struct kse_upcall *ku_first, *ku_next;
92944990b8cSJulian Elischer 
93044990b8cSJulian Elischer 	/*
9315215b187SJeff Roberson 	 * Don't even bother to lock if none at this instant,
9325215b187SJeff Roberson 	 * we really don't care about the next instant..
93344990b8cSJulian Elischer 	 */
9345c8329edSJulian Elischer 	if ((!TAILQ_EMPTY(&zombie_threads))
9355c8329edSJulian Elischer 	    || (!TAILQ_EMPTY(&zombie_kses))
9365215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_ksegrps))
9375215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
9385215b187SJeff Roberson 		mtx_lock_spin(&kse_zombie_lock);
9395c8329edSJulian Elischer 		td_first = TAILQ_FIRST(&zombie_threads);
9405c8329edSJulian Elischer 		ke_first = TAILQ_FIRST(&zombie_kses);
9415c8329edSJulian Elischer 		kg_first = TAILQ_FIRST(&zombie_ksegrps);
9425215b187SJeff Roberson 		ku_first = TAILQ_FIRST(&zombie_upcalls);
9435c8329edSJulian Elischer 		if (td_first)
9445c8329edSJulian Elischer 			TAILQ_INIT(&zombie_threads);
9455c8329edSJulian Elischer 		if (ke_first)
9465c8329edSJulian Elischer 			TAILQ_INIT(&zombie_kses);
9475c8329edSJulian Elischer 		if (kg_first)
9485c8329edSJulian Elischer 			TAILQ_INIT(&zombie_ksegrps);
9495215b187SJeff Roberson 		if (ku_first)
9505215b187SJeff Roberson 			TAILQ_INIT(&zombie_upcalls);
9515215b187SJeff Roberson 		mtx_unlock_spin(&kse_zombie_lock);
9525c8329edSJulian Elischer 		while (td_first) {
9535c8329edSJulian Elischer 			td_next = TAILQ_NEXT(td_first, td_runq);
9545215b187SJeff Roberson 			if (td_first->td_ucred)
9555215b187SJeff Roberson 				crfree(td_first->td_ucred);
9565c8329edSJulian Elischer 			thread_free(td_first);
9575c8329edSJulian Elischer 			td_first = td_next;
95844990b8cSJulian Elischer 		}
9595c8329edSJulian Elischer 		while (ke_first) {
9605c8329edSJulian Elischer 			ke_next = TAILQ_NEXT(ke_first, ke_procq);
9615c8329edSJulian Elischer 			kse_free(ke_first);
9625c8329edSJulian Elischer 			ke_first = ke_next;
9635c8329edSJulian Elischer 		}
9645c8329edSJulian Elischer 		while (kg_first) {
9655c8329edSJulian Elischer 			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
9665c8329edSJulian Elischer 			ksegrp_free(kg_first);
9675c8329edSJulian Elischer 			kg_first = kg_next;
9685c8329edSJulian Elischer 		}
9695215b187SJeff Roberson 		while (ku_first) {
9705215b187SJeff Roberson 			ku_next = TAILQ_NEXT(ku_first, ku_link);
9715215b187SJeff Roberson 			upcall_free(ku_first);
9725215b187SJeff Roberson 			ku_first = ku_next;
9735215b187SJeff Roberson 		}
97444990b8cSJulian Elischer 	}
97544990b8cSJulian Elischer }
97644990b8cSJulian Elischer 
97744990b8cSJulian Elischer /*
9784f0db5e0SJulian Elischer  * Allocate a ksegrp.
9794f0db5e0SJulian Elischer  */
9804f0db5e0SJulian Elischer struct ksegrp *
9814f0db5e0SJulian Elischer ksegrp_alloc(void)
9824f0db5e0SJulian Elischer {
983a163d034SWarner Losh 	return (uma_zalloc(ksegrp_zone, M_WAITOK));
9844f0db5e0SJulian Elischer }
9854f0db5e0SJulian Elischer 
9864f0db5e0SJulian Elischer /*
9874f0db5e0SJulian Elischer  * Allocate a kse.
9884f0db5e0SJulian Elischer  */
9894f0db5e0SJulian Elischer struct kse *
9904f0db5e0SJulian Elischer kse_alloc(void)
9914f0db5e0SJulian Elischer {
992a163d034SWarner Losh 	return (uma_zalloc(kse_zone, M_WAITOK));
9934f0db5e0SJulian Elischer }
9944f0db5e0SJulian Elischer 
9954f0db5e0SJulian Elischer /*
99644990b8cSJulian Elischer  * Allocate a thread.
99744990b8cSJulian Elischer  */
99844990b8cSJulian Elischer struct thread *
99944990b8cSJulian Elischer thread_alloc(void)
100044990b8cSJulian Elischer {
100144990b8cSJulian Elischer 	thread_reap(); /* check if any zombies to get */
1002a163d034SWarner Losh 	return (uma_zalloc(thread_zone, M_WAITOK));
100344990b8cSJulian Elischer }
100444990b8cSJulian Elischer 
100544990b8cSJulian Elischer /*
10064f0db5e0SJulian Elischer  * Deallocate a ksegrp.
10074f0db5e0SJulian Elischer  */
10084f0db5e0SJulian Elischer void
10094f0db5e0SJulian Elischer ksegrp_free(struct ksegrp *td)
10104f0db5e0SJulian Elischer {
10114f0db5e0SJulian Elischer 	uma_zfree(ksegrp_zone, td);
10124f0db5e0SJulian Elischer }
10134f0db5e0SJulian Elischer 
10144f0db5e0SJulian Elischer /*
10154f0db5e0SJulian Elischer  * Deallocate a kse.
10164f0db5e0SJulian Elischer  */
10174f0db5e0SJulian Elischer void
10184f0db5e0SJulian Elischer kse_free(struct kse *td)
10194f0db5e0SJulian Elischer {
10204f0db5e0SJulian Elischer 	uma_zfree(kse_zone, td);
10214f0db5e0SJulian Elischer }
10224f0db5e0SJulian Elischer 
10234f0db5e0SJulian Elischer /*
102444990b8cSJulian Elischer  * Deallocate a thread.
102544990b8cSJulian Elischer  */
102644990b8cSJulian Elischer void
102744990b8cSJulian Elischer thread_free(struct thread *td)
102844990b8cSJulian Elischer {
1029696058c3SJulian Elischer 
1030696058c3SJulian Elischer 	cpu_thread_clean(td);
103144990b8cSJulian Elischer 	uma_zfree(thread_zone, td);
103244990b8cSJulian Elischer }
103344990b8cSJulian Elischer 
103444990b8cSJulian Elischer /*
103544990b8cSJulian Elischer  * Store the thread context in the UTS's mailbox.
10363d0586d4SJulian Elischer  * then add the mailbox at the head of a list we are building in user space.
10373d0586d4SJulian Elischer  * The list is anchored in the ksegrp structure.
103844990b8cSJulian Elischer  */
103944990b8cSJulian Elischer int
1040dd7da9aaSDavid Xu thread_export_context(struct thread *td, int willexit)
104144990b8cSJulian Elischer {
10420d294460SJuli Mallett 	struct proc *p;
10433d0586d4SJulian Elischer 	struct ksegrp *kg;
10443d0586d4SJulian Elischer 	uintptr_t mbx;
10453d0586d4SJulian Elischer 	void *addr;
10469dde3bc9SDavid Xu 	int error = 0, temp, sig;
10472b035cbeSJulian Elischer 	mcontext_t mc;
104844990b8cSJulian Elischer 
10490d294460SJuli Mallett 	p = td->td_proc;
10500d294460SJuli Mallett 	kg = td->td_ksegrp;
10510d294460SJuli Mallett 
1052c76e33b6SJonathan Mini 	/* Export the user/machine context. */
10532b035cbeSJulian Elischer 	get_mcontext(td, &mc, 0);
10542b035cbeSJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
10552b035cbeSJulian Elischer 	error = copyout(&mc, addr, sizeof(mcontext_t));
105693a7aa79SJulian Elischer 	if (error)
105793a7aa79SJulian Elischer 		goto bad;
105844990b8cSJulian Elischer 
10595215b187SJeff Roberson 	/* Exports clock ticks in kernel mode */
10605215b187SJeff Roberson 	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
10614e4422d4SMarcel Moolenaar 	temp = fuword32(addr) + td->td_usticks;
10624e4422d4SMarcel Moolenaar 	if (suword32(addr, temp)) {
10632b035cbeSJulian Elischer 		error = EFAULT;
10645215b187SJeff Roberson 		goto bad;
10652b035cbeSJulian Elischer 	}
10665215b187SJeff Roberson 
10679dde3bc9SDavid Xu 	/*
10689dde3bc9SDavid Xu 	 * Post sync signal, or process SIGKILL and SIGSTOP.
10699dde3bc9SDavid Xu 	 * For sync signal, it is only possible when the signal is not
10709dde3bc9SDavid Xu 	 * caught by userland or process is being debugged.
10719dde3bc9SDavid Xu 	 */
1072dd7da9aaSDavid Xu 	PROC_LOCK(p);
10739dde3bc9SDavid Xu 	if (td->td_flags & TDF_NEEDSIGCHK) {
10749dde3bc9SDavid Xu 		mtx_lock_spin(&sched_lock);
10759dde3bc9SDavid Xu 		td->td_flags &= ~TDF_NEEDSIGCHK;
10769dde3bc9SDavid Xu 		mtx_unlock_spin(&sched_lock);
10779dde3bc9SDavid Xu 		mtx_lock(&p->p_sigacts->ps_mtx);
10789dde3bc9SDavid Xu 		while ((sig = cursig(td)) != 0)
10799dde3bc9SDavid Xu 			postsig(sig);
10809dde3bc9SDavid Xu 		mtx_unlock(&p->p_sigacts->ps_mtx);
10819dde3bc9SDavid Xu 	}
1082dd7da9aaSDavid Xu 	if (willexit)
1083dd7da9aaSDavid Xu 		SIGFILLSET(td->td_sigmask);
1084dd7da9aaSDavid Xu 	PROC_UNLOCK(p);
10859dde3bc9SDavid Xu 
10865215b187SJeff Roberson 	/* Get address in latest mbox of list pointer */
10873d0586d4SJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_next);
10883d0586d4SJulian Elischer 	/*
10893d0586d4SJulian Elischer 	 * Put the saved address of the previous first
10903d0586d4SJulian Elischer 	 * entry into this one
10913d0586d4SJulian Elischer 	 */
10923d0586d4SJulian Elischer 	for (;;) {
10933d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
10943d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
109593a7aa79SJulian Elischer 			error = EFAULT;
10968798d4f9SDavid Xu 			goto bad;
10973d0586d4SJulian Elischer 		}
10980cd3964fSJulian Elischer 		PROC_LOCK(p);
10993d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
11003d0586d4SJulian Elischer 			kg->kg_completed = td->td_mailbox;
11015215b187SJeff Roberson 			/*
11025215b187SJeff Roberson 			 * The thread context may be taken away by
11035215b187SJeff Roberson 			 * other upcall threads when we unlock
11045215b187SJeff Roberson 			 * process lock. it's no longer valid to
11055215b187SJeff Roberson 			 * use it again in any other places.
11065215b187SJeff Roberson 			 */
11075215b187SJeff Roberson 			td->td_mailbox = NULL;
11080cd3964fSJulian Elischer 			PROC_UNLOCK(p);
11093d0586d4SJulian Elischer 			break;
11103d0586d4SJulian Elischer 		}
11110cd3964fSJulian Elischer 		PROC_UNLOCK(p);
11123d0586d4SJulian Elischer 	}
11135215b187SJeff Roberson 	td->td_usticks = 0;
11143d0586d4SJulian Elischer 	return (0);
11158798d4f9SDavid Xu 
11168798d4f9SDavid Xu bad:
11178798d4f9SDavid Xu 	PROC_LOCK(p);
1118dd7da9aaSDavid Xu 	sigexit(td, SIGILL);
111993a7aa79SJulian Elischer 	return (error);
11203d0586d4SJulian Elischer }
112144990b8cSJulian Elischer 
11223d0586d4SJulian Elischer /*
11233d0586d4SJulian Elischer  * Take the list of completed mailboxes for this KSEGRP and put them on this
11245215b187SJeff Roberson  * upcall's mailbox as it's the next one going up.
11253d0586d4SJulian Elischer  */
11263d0586d4SJulian Elischer static int
11275215b187SJeff Roberson thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
11283d0586d4SJulian Elischer {
11290cd3964fSJulian Elischer 	struct proc *p = kg->kg_proc;
11303d0586d4SJulian Elischer 	void *addr;
11313d0586d4SJulian Elischer 	uintptr_t mbx;
11323d0586d4SJulian Elischer 
11335215b187SJeff Roberson 	addr = (void *)(&ku->ku_mailbox->km_completed);
11343d0586d4SJulian Elischer 	for (;;) {
11353d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
11363d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
11370cd3964fSJulian Elischer 			PROC_LOCK(p);
11380cd3964fSJulian Elischer 			psignal(p, SIGSEGV);
11390cd3964fSJulian Elischer 			PROC_UNLOCK(p);
11403d0586d4SJulian Elischer 			return (EFAULT);
11413d0586d4SJulian Elischer 		}
11420cd3964fSJulian Elischer 		PROC_LOCK(p);
11433d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
11443d0586d4SJulian Elischer 			kg->kg_completed = NULL;
11450cd3964fSJulian Elischer 			PROC_UNLOCK(p);
11463d0586d4SJulian Elischer 			break;
11473d0586d4SJulian Elischer 		}
11480cd3964fSJulian Elischer 		PROC_UNLOCK(p);
11493d0586d4SJulian Elischer 	}
11503d0586d4SJulian Elischer 	return (0);
11513d0586d4SJulian Elischer }
115244990b8cSJulian Elischer 
115344990b8cSJulian Elischer /*
11548798d4f9SDavid Xu  * This function should be called at statclock interrupt time
11558798d4f9SDavid Xu  */
11568798d4f9SDavid Xu int
11575215b187SJeff Roberson thread_statclock(int user)
11588798d4f9SDavid Xu {
11598798d4f9SDavid Xu 	struct thread *td = curthread;
1160cd4f6ebbSDavid Xu 	struct ksegrp *kg = td->td_ksegrp;
11618798d4f9SDavid Xu 
1162cd4f6ebbSDavid Xu 	if (kg->kg_numupcalls == 0 || !(td->td_flags & TDF_SA))
1163cd4f6ebbSDavid Xu 		return (0);
11648798d4f9SDavid Xu 	if (user) {
11658798d4f9SDavid Xu 		/* Current always do via ast() */
1166b4508d7dSDavid Xu 		mtx_lock_spin(&sched_lock);
11674a338afdSJulian Elischer 		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1168b4508d7dSDavid Xu 		mtx_unlock_spin(&sched_lock);
11695215b187SJeff Roberson 		td->td_uuticks++;
11708798d4f9SDavid Xu 	} else {
11718798d4f9SDavid Xu 		if (td->td_mailbox != NULL)
11725215b187SJeff Roberson 			td->td_usticks++;
11735215b187SJeff Roberson 		else {
11745215b187SJeff Roberson 			/* XXXKSE
11755215b187SJeff Roberson 		 	 * We will call thread_user_enter() for every
11765215b187SJeff Roberson 			 * kernel entry in future, so if the thread mailbox
11775215b187SJeff Roberson 			 * is NULL, it must be a UTS kernel, don't account
11785215b187SJeff Roberson 			 * clock ticks for it.
11795215b187SJeff Roberson 			 */
11808798d4f9SDavid Xu 		}
11815215b187SJeff Roberson 	}
11825215b187SJeff Roberson 	return (0);
11838798d4f9SDavid Xu }
11848798d4f9SDavid Xu 
11855215b187SJeff Roberson /*
11864b4866edSDavid Xu  * Export state clock ticks for userland
11875215b187SJeff Roberson  */
11888798d4f9SDavid Xu static int
11894b4866edSDavid Xu thread_update_usr_ticks(struct thread *td, int user)
11908798d4f9SDavid Xu {
11918798d4f9SDavid Xu 	struct proc *p = td->td_proc;
11928798d4f9SDavid Xu 	struct kse_thr_mailbox *tmbx;
11935215b187SJeff Roberson 	struct kse_upcall *ku;
11946ce75196SDavid Xu 	struct ksegrp *kg;
11958798d4f9SDavid Xu 	caddr_t addr;
11968b149b51SJohn Baldwin 	u_int uticks;
11978798d4f9SDavid Xu 
11985215b187SJeff Roberson 	if ((ku = td->td_upcall) == NULL)
11995215b187SJeff Roberson 		return (-1);
12008798d4f9SDavid Xu 
12015215b187SJeff Roberson 	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
12028798d4f9SDavid Xu 	if ((tmbx == NULL) || (tmbx == (void *)-1))
12035215b187SJeff Roberson 		return (-1);
12044b4866edSDavid Xu 	if (user) {
12055215b187SJeff Roberson 		uticks = td->td_uuticks;
12065215b187SJeff Roberson 		td->td_uuticks = 0;
12075215b187SJeff Roberson 		addr = (caddr_t)&tmbx->tm_uticks;
12084b4866edSDavid Xu 	} else {
12094b4866edSDavid Xu 		uticks = td->td_usticks;
12105215b187SJeff Roberson 		td->td_usticks = 0;
12114b4866edSDavid Xu 		addr = (caddr_t)&tmbx->tm_sticks;
12124b4866edSDavid Xu 	}
12134b4866edSDavid Xu 	if (uticks) {
12144e4422d4SMarcel Moolenaar 		if (suword32(addr, uticks+fuword32(addr))) {
12155215b187SJeff Roberson 			PROC_LOCK(p);
12165215b187SJeff Roberson 			psignal(p, SIGSEGV);
12175215b187SJeff Roberson 			PROC_UNLOCK(p);
12185215b187SJeff Roberson 			return (-2);
12195215b187SJeff Roberson 		}
12204b4866edSDavid Xu 	}
12216ce75196SDavid Xu 	kg = td->td_ksegrp;
12226ce75196SDavid Xu 	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
12234b4866edSDavid Xu 		mtx_lock_spin(&sched_lock);
12244b4866edSDavid Xu 		td->td_upcall->ku_flags |= KUF_DOUPCALL;
12254b4866edSDavid Xu 		mtx_unlock_spin(&sched_lock);
12264b4866edSDavid Xu 	}
12275215b187SJeff Roberson 	return (0);
12288798d4f9SDavid Xu }
12298798d4f9SDavid Xu 
12308798d4f9SDavid Xu /*
123144990b8cSJulian Elischer  * Discard the current thread and exit from its context.
123244990b8cSJulian Elischer  *
123344990b8cSJulian Elischer  * Because we can't free a thread while we're operating under its context,
1234696058c3SJulian Elischer  * push the current thread into our CPU's deadthread holder. This means
1235696058c3SJulian Elischer  * we needn't worry about someone else grabbing our context before we
1236696058c3SJulian Elischer  * do a cpu_throw().
123744990b8cSJulian Elischer  */
123844990b8cSJulian Elischer void
123944990b8cSJulian Elischer thread_exit(void)
124044990b8cSJulian Elischer {
124144990b8cSJulian Elischer 	struct thread *td;
124244990b8cSJulian Elischer 	struct kse *ke;
124344990b8cSJulian Elischer 	struct proc *p;
124444990b8cSJulian Elischer 	struct ksegrp	*kg;
124544990b8cSJulian Elischer 
124644990b8cSJulian Elischer 	td = curthread;
124744990b8cSJulian Elischer 	kg = td->td_ksegrp;
124844990b8cSJulian Elischer 	p = td->td_proc;
124944990b8cSJulian Elischer 	ke = td->td_kse;
125044990b8cSJulian Elischer 
125144990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
125288151aa3SJulian Elischer 	KASSERT(p != NULL, ("thread exiting without a process"));
125388151aa3SJulian Elischer 	KASSERT(ke != NULL, ("thread exiting without a kse"));
125488151aa3SJulian Elischer 	KASSERT(kg != NULL, ("thread exiting without a kse group"));
125544990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
125644990b8cSJulian Elischer 	CTR1(KTR_PROC, "thread_exit: thread %p", td);
125762a0fd94SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
125844990b8cSJulian Elischer 
125948bfcdddSJulian Elischer 	if (td->td_standin != NULL) {
126048bfcdddSJulian Elischer 		thread_stash(td->td_standin);
126148bfcdddSJulian Elischer 		td->td_standin = NULL;
126248bfcdddSJulian Elischer 	}
126348bfcdddSJulian Elischer 
126444990b8cSJulian Elischer 	cpu_thread_exit(td);	/* XXXSMP */
126544990b8cSJulian Elischer 
12661faf202eSJulian Elischer 	/*
12671faf202eSJulian Elischer 	 * The last thread is left attached to the process
12681faf202eSJulian Elischer 	 * So that the whole bundle gets recycled. Skip
12691faf202eSJulian Elischer 	 * all this stuff.
12701faf202eSJulian Elischer 	 */
12711faf202eSJulian Elischer 	if (p->p_numthreads > 1) {
1272d3a0bd78SJulian Elischer 		thread_unlink(td);
12730252d203SDavid Xu 		if (p->p_maxthrwaits)
12740252d203SDavid Xu 			wakeup(&p->p_numthreads);
127544990b8cSJulian Elischer 		/*
127644990b8cSJulian Elischer 		 * The test below is NOT true if we are the
12771faf202eSJulian Elischer 		 * sole exiting thread. P_STOPPED_SNGL is unset
127844990b8cSJulian Elischer 		 * in exit1() after it is the only survivor.
127944990b8cSJulian Elischer 		 */
12801279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
128144990b8cSJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
128271fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
128344990b8cSJulian Elischer 			}
128444990b8cSJulian Elischer 		}
128548bfcdddSJulian Elischer 
12865215b187SJeff Roberson 		/*
12875215b187SJeff Roberson 		 * Because each upcall structure has an owner thread,
12885215b187SJeff Roberson 		 * owner thread exits only when process is in exiting
12895215b187SJeff Roberson 		 * state, so upcall to userland is no longer needed,
12905215b187SJeff Roberson 		 * deleting upcall structure is safe here.
12915215b187SJeff Roberson 		 * So when all threads in a group is exited, all upcalls
12925215b187SJeff Roberson 		 * in the group should be automatically freed.
12935215b187SJeff Roberson 		 */
12945215b187SJeff Roberson 		if (td->td_upcall)
12955215b187SJeff Roberson 			upcall_remove(td);
12966f8132a8SJulian Elischer 
1297ab2baa72SDavid Xu 		sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
1298ab2baa72SDavid Xu 		sched_exit_kse(FIRST_KSE_IN_PROC(p), ke);
12995215b187SJeff Roberson 		ke->ke_state = KES_UNQUEUED;
13005215b187SJeff Roberson 		ke->ke_thread = NULL;
130148bfcdddSJulian Elischer 		/*
130293a7aa79SJulian Elischer 		 * Decide what to do with the KSE attached to this thread.
130348bfcdddSJulian Elischer 		 */
1304ab2baa72SDavid Xu 		if (ke->ke_flags & KEF_EXIT) {
13056f8132a8SJulian Elischer 			kse_unlink(ke);
1306ab2baa72SDavid Xu 			if (kg->kg_kses == 0) {
1307ab2baa72SDavid Xu 				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg);
1308ab2baa72SDavid Xu 				ksegrp_unlink(kg);
1309ab2baa72SDavid Xu 			}
1310ab2baa72SDavid Xu 		}
13115215b187SJeff Roberson 		else
13126f8132a8SJulian Elischer 			kse_reassign(ke);
13136f8132a8SJulian Elischer 		PROC_UNLOCK(p);
13145215b187SJeff Roberson 		td->td_kse	= NULL;
13155c8329edSJulian Elischer 		td->td_state	= TDS_INACTIVE;
131636f7b36fSDavid Xu #if 0
13175c8329edSJulian Elischer 		td->td_proc	= NULL;
131836f7b36fSDavid Xu #endif
13195c8329edSJulian Elischer 		td->td_ksegrp	= NULL;
13205c8329edSJulian Elischer 		td->td_last_kse	= NULL;
1321696058c3SJulian Elischer 		PCPU_SET(deadthread, td);
13221faf202eSJulian Elischer 	} else {
13231faf202eSJulian Elischer 		PROC_UNLOCK(p);
13241faf202eSJulian Elischer 	}
13254093529dSJeff Roberson 	/* XXX Shouldn't cpu_throw() here. */
1326cc66ebe2SPeter Wemm 	mtx_assert(&sched_lock, MA_OWNED);
1327cc66ebe2SPeter Wemm 	cpu_throw(td, choosethread());
1328cc66ebe2SPeter Wemm 	panic("I'm a teapot!");
132944990b8cSJulian Elischer 	/* NOTREACHED */
133044990b8cSJulian Elischer }
133144990b8cSJulian Elischer 
133244990b8cSJulian Elischer /*
1333696058c3SJulian Elischer  * Do any thread specific cleanups that may be needed in wait()
133437814395SPeter Wemm  * called with Giant, proc and schedlock not held.
1335696058c3SJulian Elischer  */
1336696058c3SJulian Elischer void
1337696058c3SJulian Elischer thread_wait(struct proc *p)
1338696058c3SJulian Elischer {
1339696058c3SJulian Elischer 	struct thread *td;
1340696058c3SJulian Elischer 
134137814395SPeter Wemm 	mtx_assert(&Giant, MA_NOTOWNED);
134285495c72SJens Schweikhardt 	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
134385495c72SJens Schweikhardt 	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
1344696058c3SJulian Elischer 	FOREACH_THREAD_IN_PROC(p, td) {
1345696058c3SJulian Elischer 		if (td->td_standin != NULL) {
1346696058c3SJulian Elischer 			thread_free(td->td_standin);
1347696058c3SJulian Elischer 			td->td_standin = NULL;
1348696058c3SJulian Elischer 		}
1349696058c3SJulian Elischer 		cpu_thread_clean(td);
1350696058c3SJulian Elischer 	}
1351696058c3SJulian Elischer 	thread_reap();	/* check for zombie threads etc. */
1352696058c3SJulian Elischer }
1353696058c3SJulian Elischer 
1354696058c3SJulian Elischer /*
135544990b8cSJulian Elischer  * Link a thread to a process.
13561faf202eSJulian Elischer  * set up anything that needs to be initialized for it to
13571faf202eSJulian Elischer  * be used by the process.
135844990b8cSJulian Elischer  *
135944990b8cSJulian Elischer  * Note that we do not link to the proc's ucred here.
136044990b8cSJulian Elischer  * The thread is linked as if running but no KSE assigned.
136144990b8cSJulian Elischer  */
136244990b8cSJulian Elischer void
136344990b8cSJulian Elischer thread_link(struct thread *td, struct ksegrp *kg)
136444990b8cSJulian Elischer {
136544990b8cSJulian Elischer 	struct proc *p;
136644990b8cSJulian Elischer 
136744990b8cSJulian Elischer 	p = kg->kg_proc;
136871fad9fdSJulian Elischer 	td->td_state    = TDS_INACTIVE;
136944990b8cSJulian Elischer 	td->td_proc     = p;
137044990b8cSJulian Elischer 	td->td_ksegrp   = kg;
137144990b8cSJulian Elischer 	td->td_last_kse = NULL;
13725215b187SJeff Roberson 	td->td_flags    = 0;
13735215b187SJeff Roberson 	td->td_kse      = NULL;
137444990b8cSJulian Elischer 
13751faf202eSJulian Elischer 	LIST_INIT(&td->td_contested);
1376c06eb4e2SSam Leffler 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
137744990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
137844990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
137944990b8cSJulian Elischer 	p->p_numthreads++;
138044990b8cSJulian Elischer 	kg->kg_numthreads++;
138144990b8cSJulian Elischer }
138244990b8cSJulian Elischer 
1383d3a0bd78SJulian Elischer void
1384d3a0bd78SJulian Elischer thread_unlink(struct thread *td)
1385d3a0bd78SJulian Elischer {
1386d3a0bd78SJulian Elischer 	struct proc *p = td->td_proc;
1387d3a0bd78SJulian Elischer 	struct ksegrp *kg = td->td_ksegrp;
1388d3a0bd78SJulian Elischer 
1389112afcb2SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
1390d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1391d3a0bd78SJulian Elischer 	p->p_numthreads--;
1392d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1393d3a0bd78SJulian Elischer 	kg->kg_numthreads--;
1394d3a0bd78SJulian Elischer 	/* could clear a few other things here */
1395d3a0bd78SJulian Elischer }
1396d3a0bd78SJulian Elischer 
13975215b187SJeff Roberson /*
13985215b187SJeff Roberson  * Purge a ksegrp resource. When a ksegrp is preparing to
13995215b187SJeff Roberson  * exit, it calls this function.
14005215b187SJeff Roberson  */
1401a6f37ac9SJohn Baldwin static void
14025215b187SJeff Roberson kse_purge_group(struct thread *td)
14035215b187SJeff Roberson {
14045215b187SJeff Roberson 	struct ksegrp *kg;
14055215b187SJeff Roberson 	struct kse *ke;
14065215b187SJeff Roberson 
14075215b187SJeff Roberson 	kg = td->td_ksegrp;
14085215b187SJeff Roberson  	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
14095215b187SJeff Roberson 	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
14105215b187SJeff Roberson 		KASSERT(ke->ke_state == KES_IDLE,
14115215b187SJeff Roberson 			("%s: wrong idle KSE state", __func__));
14125215b187SJeff Roberson 		kse_unlink(ke);
14135215b187SJeff Roberson 	}
14145215b187SJeff Roberson 	KASSERT((kg->kg_kses == 1),
14155215b187SJeff Roberson 		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
14165215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0),
14175215b187SJeff Roberson 	        ("%s: ksegrp still has %d upcall datas",
14185215b187SJeff Roberson 		__func__, kg->kg_numupcalls));
14195215b187SJeff Roberson }
14205215b187SJeff Roberson 
14215215b187SJeff Roberson /*
14225215b187SJeff Roberson  * Purge a process's KSE resource. When a process is preparing to
14235215b187SJeff Roberson  * exit, it calls kse_purge to release any extra KSE resources in
14245215b187SJeff Roberson  * the process.
14255215b187SJeff Roberson  */
1426a6f37ac9SJohn Baldwin static void
14275c8329edSJulian Elischer kse_purge(struct proc *p, struct thread *td)
14285c8329edSJulian Elischer {
14295c8329edSJulian Elischer 	struct ksegrp *kg;
14305215b187SJeff Roberson 	struct kse *ke;
14315c8329edSJulian Elischer 
14325c8329edSJulian Elischer  	KASSERT(p->p_numthreads == 1, ("bad thread number"));
14335c8329edSJulian Elischer 	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
14345c8329edSJulian Elischer 		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
14355c8329edSJulian Elischer 		p->p_numksegrps--;
14365215b187SJeff Roberson 		/*
14375215b187SJeff Roberson 		 * There is no ownership for KSE, after all threads
14385215b187SJeff Roberson 		 * in the group exited, it is possible that some KSEs
14395215b187SJeff Roberson 		 * were left in idle queue, gc them now.
14405215b187SJeff Roberson 		 */
14415215b187SJeff Roberson 		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
14425215b187SJeff Roberson 			KASSERT(ke->ke_state == KES_IDLE,
14435215b187SJeff Roberson 			   ("%s: wrong idle KSE state", __func__));
14445215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
14455215b187SJeff Roberson 			kg->kg_idle_kses--;
14465215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
14475215b187SJeff Roberson 			kg->kg_kses--;
14485215b187SJeff Roberson 			kse_stash(ke);
14495215b187SJeff Roberson 		}
14505c8329edSJulian Elischer 		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
14515c8329edSJulian Elischer 		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
14525215b187SJeff Roberson 		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
14535215b187SJeff Roberson 		KASSERT((kg->kg_numupcalls == 0),
14545215b187SJeff Roberson 		        ("%s: ksegrp still has %d upcall datas",
14555215b187SJeff Roberson 			__func__, kg->kg_numupcalls));
14565215b187SJeff Roberson 
14575215b187SJeff Roberson 		if (kg != td->td_ksegrp)
14585c8329edSJulian Elischer 			ksegrp_stash(kg);
14595c8329edSJulian Elischer 	}
14605c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
14615c8329edSJulian Elischer 	p->p_numksegrps++;
14625c8329edSJulian Elischer }
14635c8329edSJulian Elischer 
14645215b187SJeff Roberson /*
14655215b187SJeff Roberson  * This function is intended to be used to initialize a spare thread
14665215b187SJeff Roberson  * for upcall. Initialize thread's large data area outside sched_lock
14675215b187SJeff Roberson  * for thread_schedule_upcall().
14685215b187SJeff Roberson  */
14695215b187SJeff Roberson void
14705215b187SJeff Roberson thread_alloc_spare(struct thread *td, struct thread *spare)
14715215b187SJeff Roberson {
147237814395SPeter Wemm 
14735215b187SJeff Roberson 	if (td->td_standin)
14745215b187SJeff Roberson 		return;
14755215b187SJeff Roberson 	if (spare == NULL)
14765215b187SJeff Roberson 		spare = thread_alloc();
14775215b187SJeff Roberson 	td->td_standin = spare;
14785215b187SJeff Roberson 	bzero(&spare->td_startzero,
14795215b187SJeff Roberson 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
14805215b187SJeff Roberson 	spare->td_proc = td->td_proc;
14815215b187SJeff Roberson 	spare->td_ucred = crhold(td->td_ucred);
14825215b187SJeff Roberson }
14835c8329edSJulian Elischer 
148444990b8cSJulian Elischer /*
1485c76e33b6SJonathan Mini  * Create a thread and schedule it for upcall on the KSE given.
148693a7aa79SJulian Elischer  * Use our thread's standin so that we don't have to allocate one.
148744990b8cSJulian Elischer  */
148844990b8cSJulian Elischer struct thread *
14895215b187SJeff Roberson thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
149044990b8cSJulian Elischer {
149144990b8cSJulian Elischer 	struct thread *td2;
149244990b8cSJulian Elischer 
149344990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
149448bfcdddSJulian Elischer 
149548bfcdddSJulian Elischer 	/*
14965215b187SJeff Roberson 	 * Schedule an upcall thread on specified kse_upcall,
14975215b187SJeff Roberson 	 * the kse_upcall must be free.
14985215b187SJeff Roberson 	 * td must have a spare thread.
149948bfcdddSJulian Elischer 	 */
15005215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
150148bfcdddSJulian Elischer 	if ((td2 = td->td_standin) != NULL) {
150248bfcdddSJulian Elischer 		td->td_standin = NULL;
150344990b8cSJulian Elischer 	} else {
15045215b187SJeff Roberson 		panic("no reserve thread when scheduling an upcall");
150548bfcdddSJulian Elischer 		return (NULL);
150644990b8cSJulian Elischer 	}
150744990b8cSJulian Elischer 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
150848bfcdddSJulian Elischer 	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
15091faf202eSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
15101faf202eSJulian Elischer 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
15115215b187SJeff Roberson 	thread_link(td2, ku->ku_ksegrp);
151236f7b36fSDavid Xu 	/* inherit blocked thread's context */
151311e0f8e1SMarcel Moolenaar 	cpu_set_upcall(td2, td);
15145215b187SJeff Roberson 	/* Let the new thread become owner of the upcall */
15155215b187SJeff Roberson 	ku->ku_owner   = td2;
15165215b187SJeff Roberson 	td2->td_upcall = ku;
1517cd4f6ebbSDavid Xu 	td2->td_flags  = TDF_SA;
15181d5a24beSDavid Xu 	td2->td_pflags = TDP_UPCALLING;
15195215b187SJeff Roberson 	td2->td_kse    = NULL;
152048bfcdddSJulian Elischer 	td2->td_state  = TDS_CAN_RUN;
152148bfcdddSJulian Elischer 	td2->td_inhibitors = 0;
15229dde3bc9SDavid Xu 	SIGFILLSET(td2->td_sigmask);
15239dde3bc9SDavid Xu 	SIG_CANTMASK(td2->td_sigmask);
1524ab2baa72SDavid Xu 	sched_fork_thread(td, td2);
152548bfcdddSJulian Elischer 	return (td2);	/* bogus.. should be a void function */
152644990b8cSJulian Elischer }
152744990b8cSJulian Elischer 
15289dde3bc9SDavid Xu /*
15299dde3bc9SDavid Xu  * It is only used when thread generated a trap and process is being
15309dde3bc9SDavid Xu  * debugged.
15319dde3bc9SDavid Xu  */
153258a3c273SJeff Roberson void
153358a3c273SJeff Roberson thread_signal_add(struct thread *td, int sig)
1534c76e33b6SJonathan Mini {
153558a3c273SJeff Roberson 	struct proc *p;
15369dde3bc9SDavid Xu 	siginfo_t siginfo;
15379dde3bc9SDavid Xu 	struct sigacts *ps;
1538c76e33b6SJonathan Mini 	int error;
1539c76e33b6SJonathan Mini 
1540b0bd5f38SDavid Xu 	p = td->td_proc;
1541b0bd5f38SDavid Xu 	PROC_LOCK_ASSERT(p, MA_OWNED);
15429dde3bc9SDavid Xu 	ps = p->p_sigacts;
15439dde3bc9SDavid Xu 	mtx_assert(&ps->ps_mtx, MA_OWNED);
15449dde3bc9SDavid Xu 
15454b7d5d84SDavid Xu 	cpu_thread_siginfo(sig, 0, &siginfo);
15469dde3bc9SDavid Xu 	mtx_unlock(&ps->ps_mtx);
1547c76e33b6SJonathan Mini 	PROC_UNLOCK(p);
15489dde3bc9SDavid Xu 	error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo));
15499dde3bc9SDavid Xu 	if (error) {
155058a3c273SJeff Roberson 		PROC_LOCK(p);
155158a3c273SJeff Roberson 		sigexit(td, SIGILL);
155258a3c273SJeff Roberson 	}
15539dde3bc9SDavid Xu 	PROC_LOCK(p);
15549dde3bc9SDavid Xu 	SIGADDSET(td->td_sigmask, sig);
15559dde3bc9SDavid Xu 	mtx_lock(&ps->ps_mtx);
1556c76e33b6SJonathan Mini }
1557c76e33b6SJonathan Mini 
15586ce75196SDavid Xu void
15596ce75196SDavid Xu thread_switchout(struct thread *td)
15606ce75196SDavid Xu {
15616ce75196SDavid Xu 	struct kse_upcall *ku;
1562ab78d4d6SDavid Xu 	struct thread *td2;
15636ce75196SDavid Xu 
15646ce75196SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
15656ce75196SDavid Xu 
15666ce75196SDavid Xu 	/*
15676ce75196SDavid Xu 	 * If the outgoing thread is in threaded group and has never
15686ce75196SDavid Xu 	 * scheduled an upcall, decide whether this is a short
15696ce75196SDavid Xu 	 * or long term event and thus whether or not to schedule
15706ce75196SDavid Xu 	 * an upcall.
15716ce75196SDavid Xu 	 * If it is a short term event, just suspend it in
15726ce75196SDavid Xu 	 * a way that takes its KSE with it.
15736ce75196SDavid Xu 	 * Select the events for which we want to schedule upcalls.
15746ce75196SDavid Xu 	 * For now it's just sleep.
15756ce75196SDavid Xu 	 * XXXKSE eventually almost any inhibition could do.
15766ce75196SDavid Xu 	 */
15776ce75196SDavid Xu 	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
15786ce75196SDavid Xu 		/*
15796ce75196SDavid Xu 		 * Release ownership of upcall, and schedule an upcall
15806ce75196SDavid Xu 		 * thread, this new upcall thread becomes the owner of
15816ce75196SDavid Xu 		 * the upcall structure.
15826ce75196SDavid Xu 		 */
15836ce75196SDavid Xu 		ku = td->td_upcall;
15846ce75196SDavid Xu 		ku->ku_owner = NULL;
15856ce75196SDavid Xu 		td->td_upcall = NULL;
15866ce75196SDavid Xu 		td->td_flags &= ~TDF_CAN_UNBIND;
1587ab78d4d6SDavid Xu 		td2 = thread_schedule_upcall(td, ku);
1588ab78d4d6SDavid Xu 		setrunqueue(td2);
15896ce75196SDavid Xu 	}
15906ce75196SDavid Xu }
15916ce75196SDavid Xu 
1592c76e33b6SJonathan Mini /*
15935215b187SJeff Roberson  * Setup done on the thread when it enters the kernel.
15941434d3feSJulian Elischer  * XXXKSE Presently only for syscalls but eventually all kernel entries.
15951434d3feSJulian Elischer  */
15961434d3feSJulian Elischer void
15971434d3feSJulian Elischer thread_user_enter(struct proc *p, struct thread *td)
15981434d3feSJulian Elischer {
15995215b187SJeff Roberson 	struct ksegrp *kg;
16005215b187SJeff Roberson 	struct kse_upcall *ku;
16011ecb38a3SDavid Xu 	struct kse_thr_mailbox *tmbx;
1602d3b5e418SDavid Xu 	uint32_t tflags;
16031434d3feSJulian Elischer 
16045215b187SJeff Roberson 	kg = td->td_ksegrp;
16051ecb38a3SDavid Xu 
16061434d3feSJulian Elischer 	/*
16071434d3feSJulian Elischer 	 * First check that we shouldn't just abort.
16081434d3feSJulian Elischer 	 * But check if we are the single thread first!
16091434d3feSJulian Elischer 	 */
1610cd4f6ebbSDavid Xu 	if (p->p_flag & P_SINGLE_EXIT) {
16111434d3feSJulian Elischer 		PROC_LOCK(p);
16121434d3feSJulian Elischer 		mtx_lock_spin(&sched_lock);
1613e574e444SDavid Xu 		thread_stopped(p);
16141434d3feSJulian Elischer 		thread_exit();
16151434d3feSJulian Elischer 		/* NOTREACHED */
16161434d3feSJulian Elischer 	}
16171434d3feSJulian Elischer 
16181434d3feSJulian Elischer 	/*
16191434d3feSJulian Elischer 	 * If we are doing a syscall in a KSE environment,
16201434d3feSJulian Elischer 	 * note where our mailbox is. There is always the
162193a7aa79SJulian Elischer 	 * possibility that we could do this lazily (in kse_reassign()),
16221434d3feSJulian Elischer 	 * but for now do it every time.
16231434d3feSJulian Elischer 	 */
16245215b187SJeff Roberson 	kg = td->td_ksegrp;
1625cd4f6ebbSDavid Xu 	if (td->td_flags & TDF_SA) {
16265215b187SJeff Roberson 		ku = td->td_upcall;
16275215b187SJeff Roberson 		KASSERT(ku, ("%s: no upcall owned", __func__));
16285215b187SJeff Roberson 		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
16291ecb38a3SDavid Xu 		KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__));
16304e4422d4SMarcel Moolenaar 		ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
16311ecb38a3SDavid Xu 		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
1632d3b5e418SDavid Xu 		if ((tmbx == NULL) || (tmbx == (void *)-1L) ||
1633d3b5e418SDavid Xu 		    (ku->ku_mflags & KMF_NOUPCALL)) {
1634d3b5e418SDavid Xu 			td->td_mailbox = NULL;
1635d3b5e418SDavid Xu 		} else {
1636d3b5e418SDavid Xu 			if (td->td_standin == NULL)
1637d3b5e418SDavid Xu 				thread_alloc_spare(td, NULL);
1638ab908f59SDaniel Eischen 			tflags = fuword32(&tmbx->tm_flags);
1639d3b5e418SDavid Xu 			/*
1640d3b5e418SDavid Xu 			 * On some architectures, TP register points to thread
1641d3b5e418SDavid Xu 			 * mailbox but not points to kse mailbox, and userland
1642d3b5e418SDavid Xu 			 * can not atomically clear km_curthread, but can
1643d3b5e418SDavid Xu 			 * use TP register, and set TMF_NOUPCALL in thread
1644d3b5e418SDavid Xu 			 * flag	to indicate a critical region.
1645d3b5e418SDavid Xu 			 */
1646d3b5e418SDavid Xu 			if (tflags & TMF_NOUPCALL) {
16475215b187SJeff Roberson 				td->td_mailbox = NULL;
16488798d4f9SDavid Xu 			} else {
16491ecb38a3SDavid Xu 				td->td_mailbox = tmbx;
16508798d4f9SDavid Xu 				mtx_lock_spin(&sched_lock);
165193a7aa79SJulian Elischer 				td->td_flags |= TDF_CAN_UNBIND;
16528798d4f9SDavid Xu 				mtx_unlock_spin(&sched_lock);
16535215b187SJeff Roberson 			}
16541434d3feSJulian Elischer 		}
16551434d3feSJulian Elischer 	}
1656d3b5e418SDavid Xu }
16571434d3feSJulian Elischer 
16581434d3feSJulian Elischer /*
1659c76e33b6SJonathan Mini  * The extra work we go through if we are a threaded process when we
1660c76e33b6SJonathan Mini  * return to userland.
1661c76e33b6SJonathan Mini  *
1662c76e33b6SJonathan Mini  * If we are a KSE process and returning to user mode, check for
1663c76e33b6SJonathan Mini  * extra work to do before we return (e.g. for more syscalls
1664c76e33b6SJonathan Mini  * to complete first).  If we were in a critical section, we should
1665c76e33b6SJonathan Mini  * just return to let it finish. Same if we were in the UTS (in
1666c76e33b6SJonathan Mini  * which case the mailbox's context's busy indicator will be set).
1667c76e33b6SJonathan Mini  * The only traps we suport will have set the mailbox.
1668c76e33b6SJonathan Mini  * We will clear it here.
166944990b8cSJulian Elischer  */
1670c76e33b6SJonathan Mini int
1671253fdd5bSJulian Elischer thread_userret(struct thread *td, struct trapframe *frame)
1672c76e33b6SJonathan Mini {
16731ecb38a3SDavid Xu 	int error = 0, upcalls, uts_crit;
16745215b187SJeff Roberson 	struct kse_upcall *ku;
16750252d203SDavid Xu 	struct ksegrp *kg, *kg2;
167648bfcdddSJulian Elischer 	struct proc *p;
1677bfd83250SDavid Xu 	struct timespec ts;
1678c76e33b6SJonathan Mini 
16796f8132a8SJulian Elischer 	p = td->td_proc;
16805215b187SJeff Roberson 	kg = td->td_ksegrp;
1681cd4f6ebbSDavid Xu 	ku = td->td_upcall;
168293a7aa79SJulian Elischer 
1683cd4f6ebbSDavid Xu 	/* Nothing to do with bound thread */
1684cd4f6ebbSDavid Xu 	if (!(td->td_flags & TDF_SA))
16855215b187SJeff Roberson 		return (0);
16865215b187SJeff Roberson 
16875215b187SJeff Roberson 	/*
16885215b187SJeff Roberson 	 * Stat clock interrupt hit in userland, it
16895215b187SJeff Roberson 	 * is returning from interrupt, charge thread's
16905215b187SJeff Roberson 	 * userland time for UTS.
16915215b187SJeff Roberson 	 */
16925215b187SJeff Roberson 	if (td->td_flags & TDF_USTATCLOCK) {
16934b4866edSDavid Xu 		thread_update_usr_ticks(td, 1);
169493a7aa79SJulian Elischer 		mtx_lock_spin(&sched_lock);
16955215b187SJeff Roberson 		td->td_flags &= ~TDF_USTATCLOCK;
16960dbb100bSDavid Xu 		mtx_unlock_spin(&sched_lock);
16974b4866edSDavid Xu 		if (kg->kg_completed ||
16984b4866edSDavid Xu 		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
16994b4866edSDavid Xu 			thread_user_enter(p, td);
17005215b187SJeff Roberson 	}
17015215b187SJeff Roberson 
17021ecb38a3SDavid Xu 	uts_crit = (td->td_mailbox == NULL);
17035215b187SJeff Roberson 	/*
17045215b187SJeff Roberson 	 * Optimisation:
17055215b187SJeff Roberson 	 * This thread has not started any upcall.
17065215b187SJeff Roberson 	 * If there is no work to report other than ourself,
17075215b187SJeff Roberson 	 * then it can return direct to userland.
17085215b187SJeff Roberson 	 */
17095215b187SJeff Roberson 	if (TD_CAN_UNBIND(td)) {
17105215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
17115215b187SJeff Roberson 		td->td_flags &= ~TDF_CAN_UNBIND;
17124093529dSJeff Roberson 		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
171321e0492aSDavid Xu 		    (kg->kg_completed == NULL) &&
17146ce75196SDavid Xu 		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
171595bee4c3SDavid Xu 		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
17164093529dSJeff Roberson 			mtx_unlock_spin(&sched_lock);
17174b4866edSDavid Xu 			thread_update_usr_ticks(td, 0);
17189a4b78c9SDavid Xu 			nanotime(&ts);
17199a4b78c9SDavid Xu 			error = copyout(&ts,
17209a4b78c9SDavid Xu 				(caddr_t)&ku->ku_mailbox->km_timeofday,
17219a4b78c9SDavid Xu 				sizeof(ts));
172221e0492aSDavid Xu 			td->td_mailbox = 0;
17231ecb38a3SDavid Xu 			ku->ku_mflags = 0;
17249a4b78c9SDavid Xu 			if (error)
17259a4b78c9SDavid Xu 				goto out;
172693a7aa79SJulian Elischer 			return (0);
172793a7aa79SJulian Elischer 		}
17284093529dSJeff Roberson 		mtx_unlock_spin(&sched_lock);
1729dd7da9aaSDavid Xu 		thread_export_context(td, 0);
173093a7aa79SJulian Elischer 		/*
17315215b187SJeff Roberson 		 * There is something to report, and we own an upcall
17325215b187SJeff Roberson 		 * strucuture, we can go to userland.
17335215b187SJeff Roberson 		 * Turn ourself into an upcall thread.
173493a7aa79SJulian Elischer 		 */
17351d5a24beSDavid Xu 		td->td_pflags |= TDP_UPCALLING;
17361ecb38a3SDavid Xu 	} else if (td->td_mailbox && (ku == NULL)) {
1737dd7da9aaSDavid Xu 		thread_export_context(td, 1);
1738e574e444SDavid Xu 		PROC_LOCK(p);
17396f8132a8SJulian Elischer 		/*
17405215b187SJeff Roberson 		 * There are upcall threads waiting for
17415215b187SJeff Roberson 		 * work to do, wake one of them up.
17425215b187SJeff Roberson 		 * XXXKSE Maybe wake all of them up.
17436f8132a8SJulian Elischer 		 */
1744dd7da9aaSDavid Xu 		if (kg->kg_upsleeps)
17455215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
1746e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1747e574e444SDavid Xu 		thread_stopped(p);
174893a7aa79SJulian Elischer 		thread_exit();
17495215b187SJeff Roberson 		/* NOTREACHED */
175048bfcdddSJulian Elischer 	}
175193a7aa79SJulian Elischer 
1752cd4f6ebbSDavid Xu 	KASSERT(ku != NULL, ("upcall is NULL\n"));
1753a87891eeSDavid Xu 	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1754a87891eeSDavid Xu 
1755a87891eeSDavid Xu 	if (p->p_numthreads > max_threads_per_proc) {
1756a87891eeSDavid Xu 		max_threads_hits++;
1757a87891eeSDavid Xu 		PROC_LOCK(p);
1758112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
17597677ce18SDavid Xu 		p->p_maxthrwaits++;
1760a87891eeSDavid Xu 		while (p->p_numthreads > max_threads_per_proc) {
1761a87891eeSDavid Xu 			upcalls = 0;
1762a87891eeSDavid Xu 			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1763a87891eeSDavid Xu 				if (kg2->kg_numupcalls == 0)
1764a87891eeSDavid Xu 					upcalls++;
1765a87891eeSDavid Xu 				else
1766a87891eeSDavid Xu 					upcalls += kg2->kg_numupcalls;
1767a87891eeSDavid Xu 			}
1768a87891eeSDavid Xu 			if (upcalls >= max_threads_per_proc)
1769a87891eeSDavid Xu 				break;
17705073e68fSDavid Xu 			mtx_unlock_spin(&sched_lock);
177136407becSDavid Xu 			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
177255cdddc0SPeter Wemm 			    "maxthreads", 0)) {
17737677ce18SDavid Xu 				mtx_lock_spin(&sched_lock);
177436407becSDavid Xu 				break;
17757677ce18SDavid Xu 			} else {
1776112afcb2SJohn Baldwin 				mtx_lock_spin(&sched_lock);
1777a87891eeSDavid Xu 			}
17787677ce18SDavid Xu 		}
17797677ce18SDavid Xu 		p->p_maxthrwaits--;
1780112afcb2SJohn Baldwin 		mtx_unlock_spin(&sched_lock);
1781a87891eeSDavid Xu 		PROC_UNLOCK(p);
1782a87891eeSDavid Xu 	}
1783a87891eeSDavid Xu 
17841d5a24beSDavid Xu 	if (td->td_pflags & TDP_UPCALLING) {
17851ecb38a3SDavid Xu 		uts_crit = 0;
17866ce75196SDavid Xu 		kg->kg_nextupcall = ticks+kg->kg_upquantum;
178748bfcdddSJulian Elischer 		/*
178844990b8cSJulian Elischer 		 * There is no more work to do and we are going to ride
17895215b187SJeff Roberson 		 * this thread up to userland as an upcall.
179048bfcdddSJulian Elischer 		 * Do the last parts of the setup needed for the upcall.
179144990b8cSJulian Elischer 		 */
1792c76e33b6SJonathan Mini 		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1793ed32df81SJulian Elischer 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1794c76e33b6SJonathan Mini 
17951d5a24beSDavid Xu 		td->td_pflags &= ~TDP_UPCALLING;
1796cd4f6ebbSDavid Xu 		if (ku->ku_flags & KUF_DOUPCALL) {
17975215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
17985215b187SJeff Roberson 			ku->ku_flags &= ~KUF_DOUPCALL;
17995215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
1800cd4f6ebbSDavid Xu 		}
1801c76e33b6SJonathan Mini 		/*
18021ecb38a3SDavid Xu 		 * Set user context to the UTS
18031ecb38a3SDavid Xu 		 */
18041ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
18051ecb38a3SDavid Xu 			cpu_set_upcall_kse(td, ku);
18061ecb38a3SDavid Xu 			error = suword(&ku->ku_mailbox->km_curthread, 0);
18071ecb38a3SDavid Xu 			if (error)
18081ecb38a3SDavid Xu 				goto out;
18091ecb38a3SDavid Xu 		}
18101ecb38a3SDavid Xu 
18111ecb38a3SDavid Xu 		/*
181293a7aa79SJulian Elischer 		 * Unhook the list of completed threads.
181393a7aa79SJulian Elischer 		 * anything that completes after this gets to
181493a7aa79SJulian Elischer 		 * come in next time.
181593a7aa79SJulian Elischer 		 * Put the list of completed thread mailboxes on
181693a7aa79SJulian Elischer 		 * this KSE's mailbox.
1817c76e33b6SJonathan Mini 		 */
18181ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
18191ecb38a3SDavid Xu 		    (error = thread_link_mboxes(kg, ku)) != 0)
18200252d203SDavid Xu 			goto out;
18211ecb38a3SDavid Xu 	}
18221ecb38a3SDavid Xu 	if (!uts_crit) {
1823bfd83250SDavid Xu 		nanotime(&ts);
18241ecb38a3SDavid Xu 		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
1825bfd83250SDavid Xu 	}
18260252d203SDavid Xu 
18270252d203SDavid Xu out:
18280252d203SDavid Xu 	if (error) {
18293d0586d4SJulian Elischer 		/*
1830fc8cdd87SDavid Xu 		 * Things are going to be so screwed we should just kill
1831fc8cdd87SDavid Xu 		 * the process.
18323d0586d4SJulian Elischer 		 * how do we do that?
18333d0586d4SJulian Elischer 		 */
183448bfcdddSJulian Elischer 		PROC_LOCK(td->td_proc);
183548bfcdddSJulian Elischer 		psignal(td->td_proc, SIGSEGV);
183648bfcdddSJulian Elischer 		PROC_UNLOCK(td->td_proc);
18370252d203SDavid Xu 	} else {
18380252d203SDavid Xu 		/*
18390252d203SDavid Xu 		 * Optimisation:
18400252d203SDavid Xu 		 * Ensure that we have a spare thread available,
18410252d203SDavid Xu 		 * for when we re-enter the kernel.
18420252d203SDavid Xu 		 */
18430252d203SDavid Xu 		if (td->td_standin == NULL)
18440252d203SDavid Xu 			thread_alloc_spare(td, NULL);
18450252d203SDavid Xu 	}
18460252d203SDavid Xu 
18471ecb38a3SDavid Xu 	ku->ku_mflags = 0;
18480252d203SDavid Xu 	/*
18490252d203SDavid Xu 	 * Clear thread mailbox first, then clear system tick count.
18500252d203SDavid Xu 	 * The order is important because thread_statclock() use
18510252d203SDavid Xu 	 * mailbox pointer to see if it is an userland thread or
18520252d203SDavid Xu 	 * an UTS kernel thread.
18530252d203SDavid Xu 	 */
185493a7aa79SJulian Elischer 	td->td_mailbox = NULL;
18555215b187SJeff Roberson 	td->td_usticks = 0;
185648bfcdddSJulian Elischer 	return (error);	/* go sync */
185744990b8cSJulian Elischer }
185844990b8cSJulian Elischer 
185944990b8cSJulian Elischer /*
186044990b8cSJulian Elischer  * Enforce single-threading.
186144990b8cSJulian Elischer  *
186244990b8cSJulian Elischer  * Returns 1 if the caller must abort (another thread is waiting to
186344990b8cSJulian Elischer  * exit the process or similar). Process is locked!
186444990b8cSJulian Elischer  * Returns 0 when you are successfully the only thread running.
186544990b8cSJulian Elischer  * A process has successfully single threaded in the suspend mode when
186644990b8cSJulian Elischer  * There are no threads in user mode. Threads in the kernel must be
186744990b8cSJulian Elischer  * allowed to continue until they get to the user boundary. They may even
186844990b8cSJulian Elischer  * copy out their return values and data before suspending. They may however be
186944990b8cSJulian Elischer  * accellerated in reaching the user boundary as we will wake up
187044990b8cSJulian Elischer  * any sleeping threads that are interruptable. (PCATCH).
187144990b8cSJulian Elischer  */
187244990b8cSJulian Elischer int
187344990b8cSJulian Elischer thread_single(int force_exit)
187444990b8cSJulian Elischer {
187544990b8cSJulian Elischer 	struct thread *td;
187644990b8cSJulian Elischer 	struct thread *td2;
187744990b8cSJulian Elischer 	struct proc *p;
187844990b8cSJulian Elischer 
187944990b8cSJulian Elischer 	td = curthread;
188044990b8cSJulian Elischer 	p = td->td_proc;
188137814395SPeter Wemm 	mtx_assert(&Giant, MA_NOTOWNED);
188244990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
188344990b8cSJulian Elischer 	KASSERT((td != NULL), ("curthread is NULL"));
188444990b8cSJulian Elischer 
18850e2a4d3aSDavid Xu 	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
188644990b8cSJulian Elischer 		return (0);
188744990b8cSJulian Elischer 
1888e3b9bf71SJulian Elischer 	/* Is someone already single threading? */
1889e3b9bf71SJulian Elischer 	if (p->p_singlethread)
189044990b8cSJulian Elischer 		return (1);
189144990b8cSJulian Elischer 
189293a7aa79SJulian Elischer 	if (force_exit == SINGLE_EXIT) {
189344990b8cSJulian Elischer 		p->p_flag |= P_SINGLE_EXIT;
189493a7aa79SJulian Elischer 	} else
189544990b8cSJulian Elischer 		p->p_flag &= ~P_SINGLE_EXIT;
18961279572aSDavid Xu 	p->p_flag |= P_STOPPED_SINGLE;
189771fad9fdSJulian Elischer 	mtx_lock_spin(&sched_lock);
1898112afcb2SJohn Baldwin 	p->p_singlethread = td;
1899112afcb2SJohn Baldwin 	while ((p->p_numthreads - p->p_suspcount) != 1) {
190044990b8cSJulian Elischer 		FOREACH_THREAD_IN_PROC(p, td2) {
190144990b8cSJulian Elischer 			if (td2 == td)
190244990b8cSJulian Elischer 				continue;
1903588257e8SDavid Xu 			td2->td_flags |= TDF_ASTPENDING;
190471fad9fdSJulian Elischer 			if (TD_IS_INHIBITED(td2)) {
19051279572aSDavid Xu 				if (force_exit == SINGLE_EXIT) {
19069d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2)) {
190771fad9fdSJulian Elischer 						thread_unsuspend_one(td2);
190871fad9fdSJulian Elischer 					}
190933862f40SDavid Xu 					if (TD_ON_SLEEPQ(td2) &&
191033862f40SDavid Xu 					    (td2->td_flags & TDF_SINTR)) {
191144f3b092SJohn Baldwin 						sleepq_abort(td2);
191271fad9fdSJulian Elischer 					}
19139d102777SJulian Elischer 				} else {
19149d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2))
19159d102777SJulian Elischer 						continue;
19165215b187SJeff Roberson 					/*
19175215b187SJeff Roberson 					 * maybe other inhibitted states too?
19185215b187SJeff Roberson 					 * XXXKSE Is it totally safe to
19195215b187SJeff Roberson 					 * suspend a non-interruptable thread?
19205215b187SJeff Roberson 					 */
192193a7aa79SJulian Elischer 					if (td2->td_inhibitors &
19225215b187SJeff Roberson 					    (TDI_SLEEPING | TDI_SWAPPED))
19239d102777SJulian Elischer 						thread_suspend_one(td2);
192444990b8cSJulian Elischer 				}
192544990b8cSJulian Elischer 			}
19269d102777SJulian Elischer 		}
19279d102777SJulian Elischer 		/*
19289d102777SJulian Elischer 		 * Maybe we suspended some threads.. was it enough?
19299d102777SJulian Elischer 		 */
1930112afcb2SJohn Baldwin 		if ((p->p_numthreads - p->p_suspcount) == 1)
19319d102777SJulian Elischer 			break;
19329d102777SJulian Elischer 
193344990b8cSJulian Elischer 		/*
193444990b8cSJulian Elischer 		 * Wake us up when everyone else has suspended.
1935e3b9bf71SJulian Elischer 		 * In the mean time we suspend as well.
193644990b8cSJulian Elischer 		 */
193771fad9fdSJulian Elischer 		thread_suspend_one(td);
193844990b8cSJulian Elischer 		PROC_UNLOCK(p);
193929bcc451SJeff Roberson 		mi_switch(SW_VOL);
194044990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
194144990b8cSJulian Elischer 		PROC_LOCK(p);
1942112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
194344990b8cSJulian Elischer 	}
19445215b187SJeff Roberson 	if (force_exit == SINGLE_EXIT) {
1945112afcb2SJohn Baldwin 		if (td->td_upcall)
19465215b187SJeff Roberson 			upcall_remove(td);
19475c8329edSJulian Elischer 		kse_purge(p, td);
19485215b187SJeff Roberson 	}
1949112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
195044990b8cSJulian Elischer 	return (0);
195144990b8cSJulian Elischer }
195244990b8cSJulian Elischer 
195344990b8cSJulian Elischer /*
195444990b8cSJulian Elischer  * Called in from locations that can safely check to see
195544990b8cSJulian Elischer  * whether we have to suspend or at least throttle for a
195644990b8cSJulian Elischer  * single-thread event (e.g. fork).
195744990b8cSJulian Elischer  *
195844990b8cSJulian Elischer  * Such locations include userret().
195944990b8cSJulian Elischer  * If the "return_instead" argument is non zero, the thread must be able to
196044990b8cSJulian Elischer  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
196144990b8cSJulian Elischer  *
196244990b8cSJulian Elischer  * The 'return_instead' argument tells the function if it may do a
196344990b8cSJulian Elischer  * thread_exit() or suspend, or whether the caller must abort and back
196444990b8cSJulian Elischer  * out instead.
196544990b8cSJulian Elischer  *
196644990b8cSJulian Elischer  * If the thread that set the single_threading request has set the
196744990b8cSJulian Elischer  * P_SINGLE_EXIT bit in the process flags then this call will never return
196844990b8cSJulian Elischer  * if 'return_instead' is false, but will exit.
196944990b8cSJulian Elischer  *
197044990b8cSJulian Elischer  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
197144990b8cSJulian Elischer  *---------------+--------------------+---------------------
197244990b8cSJulian Elischer  *       0       | returns 0          |   returns 0 or 1
197344990b8cSJulian Elischer  *               | when ST ends       |   immediatly
197444990b8cSJulian Elischer  *---------------+--------------------+---------------------
197544990b8cSJulian Elischer  *       1       | thread exits       |   returns 1
197644990b8cSJulian Elischer  *               |                    |  immediatly
197744990b8cSJulian Elischer  * 0 = thread_exit() or suspension ok,
197844990b8cSJulian Elischer  * other = return error instead of stopping the thread.
197944990b8cSJulian Elischer  *
198044990b8cSJulian Elischer  * While a full suspension is under effect, even a single threading
198144990b8cSJulian Elischer  * thread would be suspended if it made this call (but it shouldn't).
198244990b8cSJulian Elischer  * This call should only be made from places where
198344990b8cSJulian Elischer  * thread_exit() would be safe as that may be the outcome unless
198444990b8cSJulian Elischer  * return_instead is set.
198544990b8cSJulian Elischer  */
198644990b8cSJulian Elischer int
198744990b8cSJulian Elischer thread_suspend_check(int return_instead)
198844990b8cSJulian Elischer {
1989ecafb24bSJuli Mallett 	struct thread *td;
1990ecafb24bSJuli Mallett 	struct proc *p;
199144990b8cSJulian Elischer 
199244990b8cSJulian Elischer 	td = curthread;
199344990b8cSJulian Elischer 	p = td->td_proc;
199437814395SPeter Wemm 	mtx_assert(&Giant, MA_NOTOWNED);
199544990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
199644990b8cSJulian Elischer 	while (P_SHOULDSTOP(p)) {
19971279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
199844990b8cSJulian Elischer 			KASSERT(p->p_singlethread != NULL,
199944990b8cSJulian Elischer 			    ("singlethread not set"));
200044990b8cSJulian Elischer 			/*
2001e3b9bf71SJulian Elischer 			 * The only suspension in action is a
2002e3b9bf71SJulian Elischer 			 * single-threading. Single threader need not stop.
2003b6d5995eSJulian Elischer 			 * XXX Should be safe to access unlocked
2004b6d5995eSJulian Elischer 			 * as it can only be set to be true by us.
200544990b8cSJulian Elischer 			 */
2006e3b9bf71SJulian Elischer 			if (p->p_singlethread == td)
200744990b8cSJulian Elischer 				return (0);	/* Exempt from stopping. */
200844990b8cSJulian Elischer 		}
2009e3b9bf71SJulian Elischer 		if (return_instead)
201044990b8cSJulian Elischer 			return (1);
201144990b8cSJulian Elischer 
2012e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
2013e574e444SDavid Xu 		thread_stopped(p);
201444990b8cSJulian Elischer 		/*
201544990b8cSJulian Elischer 		 * If the process is waiting for us to exit,
201644990b8cSJulian Elischer 		 * this thread should just suicide.
20171279572aSDavid Xu 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
201844990b8cSJulian Elischer 		 */
201944990b8cSJulian Elischer 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
20200e2a4d3aSDavid Xu 			if (p->p_flag & P_SA)
202144990b8cSJulian Elischer 				thread_exit();
20222c10d16aSJeff Roberson 			else
20232c10d16aSJeff Roberson 				thr_exit1();
202444990b8cSJulian Elischer 		}
202544990b8cSJulian Elischer 
202644990b8cSJulian Elischer 		/*
202744990b8cSJulian Elischer 		 * When a thread suspends, it just
202844990b8cSJulian Elischer 		 * moves to the processes's suspend queue
202944990b8cSJulian Elischer 		 * and stays there.
203044990b8cSJulian Elischer 		 */
203171fad9fdSJulian Elischer 		thread_suspend_one(td);
20321279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2033cf19bf91SJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
203471fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
2035cf19bf91SJulian Elischer 			}
2036cf19bf91SJulian Elischer 		}
2037a6f37ac9SJohn Baldwin 		PROC_UNLOCK(p);
203829bcc451SJeff Roberson 		mi_switch(SW_INVOL);
203944990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
204044990b8cSJulian Elischer 		PROC_LOCK(p);
204144990b8cSJulian Elischer 	}
204244990b8cSJulian Elischer 	return (0);
204344990b8cSJulian Elischer }
204444990b8cSJulian Elischer 
204535c32a76SDavid Xu void
204635c32a76SDavid Xu thread_suspend_one(struct thread *td)
204735c32a76SDavid Xu {
204835c32a76SDavid Xu 	struct proc *p = td->td_proc;
204935c32a76SDavid Xu 
205035c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2051112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
2052e574e444SDavid Xu 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
205335c32a76SDavid Xu 	p->p_suspcount++;
205471fad9fdSJulian Elischer 	TD_SET_SUSPENDED(td);
205535c32a76SDavid Xu 	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
205671fad9fdSJulian Elischer 	/*
205771fad9fdSJulian Elischer 	 * Hack: If we are suspending but are on the sleep queue
205871fad9fdSJulian Elischer 	 * then we are in msleep or the cv equivalent. We
205971fad9fdSJulian Elischer 	 * want to look like we have two Inhibitors.
20609d102777SJulian Elischer 	 * May already be set.. doesn't matter.
206171fad9fdSJulian Elischer 	 */
206271fad9fdSJulian Elischer 	if (TD_ON_SLEEPQ(td))
206371fad9fdSJulian Elischer 		TD_SET_SLEEPING(td);
206435c32a76SDavid Xu }
206535c32a76SDavid Xu 
206635c32a76SDavid Xu void
206735c32a76SDavid Xu thread_unsuspend_one(struct thread *td)
206835c32a76SDavid Xu {
206935c32a76SDavid Xu 	struct proc *p = td->td_proc;
207035c32a76SDavid Xu 
207135c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2072112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
207335c32a76SDavid Xu 	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
207471fad9fdSJulian Elischer 	TD_CLR_SUSPENDED(td);
207535c32a76SDavid Xu 	p->p_suspcount--;
207671fad9fdSJulian Elischer 	setrunnable(td);
207735c32a76SDavid Xu }
207835c32a76SDavid Xu 
207944990b8cSJulian Elischer /*
208044990b8cSJulian Elischer  * Allow all threads blocked by single threading to continue running.
208144990b8cSJulian Elischer  */
208244990b8cSJulian Elischer void
208344990b8cSJulian Elischer thread_unsuspend(struct proc *p)
208444990b8cSJulian Elischer {
208544990b8cSJulian Elischer 	struct thread *td;
208644990b8cSJulian Elischer 
2087b6d5995eSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
208844990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
208944990b8cSJulian Elischer 	if (!P_SHOULDSTOP(p)) {
209044990b8cSJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
209135c32a76SDavid Xu 			thread_unsuspend_one(td);
209244990b8cSJulian Elischer 		}
20931279572aSDavid Xu 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
209444990b8cSJulian Elischer 	    (p->p_numthreads == p->p_suspcount)) {
209544990b8cSJulian Elischer 		/*
209644990b8cSJulian Elischer 		 * Stopping everything also did the job for the single
209744990b8cSJulian Elischer 		 * threading request. Now we've downgraded to single-threaded,
209844990b8cSJulian Elischer 		 * let it continue.
209944990b8cSJulian Elischer 		 */
210035c32a76SDavid Xu 		thread_unsuspend_one(p->p_singlethread);
210144990b8cSJulian Elischer 	}
210244990b8cSJulian Elischer }
210344990b8cSJulian Elischer 
210444990b8cSJulian Elischer void
210544990b8cSJulian Elischer thread_single_end(void)
210644990b8cSJulian Elischer {
210744990b8cSJulian Elischer 	struct thread *td;
210844990b8cSJulian Elischer 	struct proc *p;
210944990b8cSJulian Elischer 
211044990b8cSJulian Elischer 	td = curthread;
211144990b8cSJulian Elischer 	p = td->td_proc;
211244990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
21131279572aSDavid Xu 	p->p_flag &= ~P_STOPPED_SINGLE;
2114112afcb2SJohn Baldwin 	mtx_lock_spin(&sched_lock);
211544990b8cSJulian Elischer 	p->p_singlethread = NULL;
211649539972SJulian Elischer 	/*
211749539972SJulian Elischer 	 * If there are other threads they mey now run,
211849539972SJulian Elischer 	 * unless of course there is a blanket 'stop order'
211949539972SJulian Elischer 	 * on the process. The single threader must be allowed
212049539972SJulian Elischer 	 * to continue however as this is a bad place to stop.
212149539972SJulian Elischer 	 */
212249539972SJulian Elischer 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
212349539972SJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
212471fad9fdSJulian Elischer 			thread_unsuspend_one(td);
212544990b8cSJulian Elischer 		}
212649539972SJulian Elischer 	}
2127112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
212849539972SJulian Elischer }
2129