xref: /freebsd/sys/kern/kern_thread.c (revision 139b7550d98ddfe74a9e69e9d7bc0645f4acafea)
144990b8cSJulian Elischer /*
244990b8cSJulian Elischer  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
344990b8cSJulian Elischer  *  All rights reserved.
444990b8cSJulian Elischer  *
544990b8cSJulian Elischer  * Redistribution and use in source and binary forms, with or without
644990b8cSJulian Elischer  * modification, are permitted provided that the following conditions
744990b8cSJulian Elischer  * are met:
844990b8cSJulian Elischer  * 1. Redistributions of source code must retain the above copyright
944990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer as
1044990b8cSJulian Elischer  *    the first lines of this file unmodified other than the possible
1144990b8cSJulian Elischer  *    addition of one or more copyright notices.
1244990b8cSJulian Elischer  * 2. Redistributions in binary form must reproduce the above copyright
1344990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer in the
1444990b8cSJulian Elischer  *    documentation and/or other materials provided with the distribution.
1544990b8cSJulian Elischer  *
1644990b8cSJulian Elischer  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1744990b8cSJulian Elischer  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844990b8cSJulian Elischer  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1944990b8cSJulian Elischer  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2044990b8cSJulian Elischer  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2144990b8cSJulian Elischer  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2244990b8cSJulian Elischer  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2344990b8cSJulian Elischer  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2444990b8cSJulian Elischer  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2544990b8cSJulian Elischer  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2644990b8cSJulian Elischer  * DAMAGE.
2744990b8cSJulian Elischer  */
2844990b8cSJulian Elischer 
29677b542eSDavid E. O'Brien #include <sys/cdefs.h>
30677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
31677b542eSDavid E. O'Brien 
3244990b8cSJulian Elischer #include <sys/param.h>
3344990b8cSJulian Elischer #include <sys/systm.h>
3444990b8cSJulian Elischer #include <sys/kernel.h>
3544990b8cSJulian Elischer #include <sys/lock.h>
3644990b8cSJulian Elischer #include <sys/malloc.h>
3744990b8cSJulian Elischer #include <sys/mutex.h>
3844990b8cSJulian Elischer #include <sys/proc.h>
39904f1b77SJulian Elischer #include <sys/smp.h>
4044990b8cSJulian Elischer #include <sys/sysctl.h>
415c8329edSJulian Elischer #include <sys/sysproto.h>
4244990b8cSJulian Elischer #include <sys/filedesc.h>
43de028f5aSJeff Roberson #include <sys/sched.h>
4444990b8cSJulian Elischer #include <sys/signalvar.h>
4544990b8cSJulian Elischer #include <sys/sx.h>
46de028f5aSJeff Roberson #include <sys/tty.h>
4744990b8cSJulian Elischer #include <sys/user.h>
4844990b8cSJulian Elischer #include <sys/jail.h>
4944990b8cSJulian Elischer #include <sys/kse.h>
5044990b8cSJulian Elischer #include <sys/ktr.h>
51c76e33b6SJonathan Mini #include <sys/ucontext.h>
5244990b8cSJulian Elischer 
5344990b8cSJulian Elischer #include <vm/vm.h>
5449a2507bSAlan Cox #include <vm/vm_extern.h>
5544990b8cSJulian Elischer #include <vm/vm_object.h>
5644990b8cSJulian Elischer #include <vm/pmap.h>
5744990b8cSJulian Elischer #include <vm/uma.h>
5844990b8cSJulian Elischer #include <vm/vm_map.h>
5944990b8cSJulian Elischer 
6002fb42b0SPeter Wemm #include <machine/frame.h>
6102fb42b0SPeter Wemm 
6244990b8cSJulian Elischer /*
634f0db5e0SJulian Elischer  * KSEGRP related storage.
6444990b8cSJulian Elischer  */
654f0db5e0SJulian Elischer static uma_zone_t ksegrp_zone;
664f0db5e0SJulian Elischer static uma_zone_t kse_zone;
6744990b8cSJulian Elischer static uma_zone_t thread_zone;
685215b187SJeff Roberson static uma_zone_t upcall_zone;
6944990b8cSJulian Elischer 
704f0db5e0SJulian Elischer /* DEBUG ONLY */
7144990b8cSJulian Elischer SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
72696058c3SJulian Elischer static int thread_debug = 0;
73696058c3SJulian Elischer SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
74696058c3SJulian Elischer 	&thread_debug, 0, "thread debug");
75fdc5ecd2SDavid Xu 
765c29a450SDavid Xu static int max_threads_per_proc = 150;
77fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
784f0db5e0SJulian Elischer 	&max_threads_per_proc, 0, "Limit on threads per proc");
794f0db5e0SJulian Elischer 
805c29a450SDavid Xu static int max_groups_per_proc = 50;
81fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
82fdc5ecd2SDavid Xu 	&max_groups_per_proc, 0, "Limit on thread groups per proc");
83fdc5ecd2SDavid Xu 
840252d203SDavid Xu static int max_threads_hits;
850252d203SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
860252d203SDavid Xu 	&max_threads_hits, 0, "");
870252d203SDavid Xu 
885215b187SJeff Roberson static int virtual_cpu;
895215b187SJeff Roberson 
9044990b8cSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
9144990b8cSJulian Elischer 
925215b187SJeff Roberson TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
935c8329edSJulian Elischer TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
945c8329edSJulian Elischer TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
955215b187SJeff Roberson TAILQ_HEAD(, kse_upcall) zombie_upcalls =
965215b187SJeff Roberson 	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
975215b187SJeff Roberson struct mtx kse_zombie_lock;
985215b187SJeff Roberson MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
9944990b8cSJulian Elischer 
100696058c3SJulian Elischer static void kse_purge(struct proc *p, struct thread *td);
1015215b187SJeff Roberson static void kse_purge_group(struct thread *td);
1024b4866edSDavid Xu static int thread_update_usr_ticks(struct thread *td, int user);
1035215b187SJeff Roberson static void thread_alloc_spare(struct thread *td, struct thread *spare);
1045215b187SJeff Roberson 
1055215b187SJeff Roberson static int
1065215b187SJeff Roberson sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
1075215b187SJeff Roberson {
1085215b187SJeff Roberson 	int error, new_val;
1095215b187SJeff Roberson 	int def_val;
1105215b187SJeff Roberson 
1115215b187SJeff Roberson #ifdef SMP
1125215b187SJeff Roberson 	def_val = mp_ncpus;
1135215b187SJeff Roberson #else
1145215b187SJeff Roberson 	def_val = 1;
1155215b187SJeff Roberson #endif
1165215b187SJeff Roberson 	if (virtual_cpu == 0)
1175215b187SJeff Roberson 		new_val = def_val;
1185215b187SJeff Roberson 	else
1195215b187SJeff Roberson 		new_val = virtual_cpu;
1205215b187SJeff Roberson 	error = sysctl_handle_int(oidp, &new_val, 0, req);
1215215b187SJeff Roberson         if (error != 0 || req->newptr == NULL)
1225215b187SJeff Roberson 		return (error);
1235215b187SJeff Roberson 	if (new_val < 0)
1245215b187SJeff Roberson 		return (EINVAL);
1255215b187SJeff Roberson 	virtual_cpu = new_val;
1265215b187SJeff Roberson 	return (0);
1275215b187SJeff Roberson }
1285215b187SJeff Roberson 
1295215b187SJeff Roberson /* DEBUG ONLY */
1305215b187SJeff Roberson SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
1315215b187SJeff Roberson 	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
1325215b187SJeff Roberson 	"debug virtual cpus");
1335c8329edSJulian Elischer 
13444990b8cSJulian Elischer /*
135696058c3SJulian Elischer  * Prepare a thread for use.
13644990b8cSJulian Elischer  */
13744990b8cSJulian Elischer static void
13844990b8cSJulian Elischer thread_ctor(void *mem, int size, void *arg)
13944990b8cSJulian Elischer {
14044990b8cSJulian Elischer 	struct thread	*td;
14144990b8cSJulian Elischer 
14244990b8cSJulian Elischer 	td = (struct thread *)mem;
14371fad9fdSJulian Elischer 	td->td_state = TDS_INACTIVE;
144060563ecSJulian Elischer 	td->td_oncpu	= NOCPU;
145139b7550SJohn Baldwin 	td->td_critnest = 1;
14644990b8cSJulian Elischer }
14744990b8cSJulian Elischer 
14844990b8cSJulian Elischer /*
14944990b8cSJulian Elischer  * Reclaim a thread after use.
15044990b8cSJulian Elischer  */
15144990b8cSJulian Elischer static void
15244990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg)
15344990b8cSJulian Elischer {
15444990b8cSJulian Elischer 	struct thread	*td;
15544990b8cSJulian Elischer 
15644990b8cSJulian Elischer 	td = (struct thread *)mem;
15744990b8cSJulian Elischer 
15844990b8cSJulian Elischer #ifdef INVARIANTS
15944990b8cSJulian Elischer 	/* Verify that this thread is in a safe state to free. */
16044990b8cSJulian Elischer 	switch (td->td_state) {
16171fad9fdSJulian Elischer 	case TDS_INHIBITED:
16271fad9fdSJulian Elischer 	case TDS_RUNNING:
16371fad9fdSJulian Elischer 	case TDS_CAN_RUN:
16444990b8cSJulian Elischer 	case TDS_RUNQ:
16544990b8cSJulian Elischer 		/*
16644990b8cSJulian Elischer 		 * We must never unlink a thread that is in one of
16744990b8cSJulian Elischer 		 * these states, because it is currently active.
16844990b8cSJulian Elischer 		 */
16944990b8cSJulian Elischer 		panic("bad state for thread unlinking");
17044990b8cSJulian Elischer 		/* NOTREACHED */
17171fad9fdSJulian Elischer 	case TDS_INACTIVE:
17244990b8cSJulian Elischer 		break;
17344990b8cSJulian Elischer 	default:
17444990b8cSJulian Elischer 		panic("bad thread state");
17544990b8cSJulian Elischer 		/* NOTREACHED */
17644990b8cSJulian Elischer 	}
17744990b8cSJulian Elischer #endif
17844990b8cSJulian Elischer }
17944990b8cSJulian Elischer 
18044990b8cSJulian Elischer /*
18144990b8cSJulian Elischer  * Initialize type-stable parts of a thread (when newly created).
18244990b8cSJulian Elischer  */
18344990b8cSJulian Elischer static void
18444990b8cSJulian Elischer thread_init(void *mem, int size)
18544990b8cSJulian Elischer {
18644990b8cSJulian Elischer 	struct thread	*td;
18744990b8cSJulian Elischer 
18844990b8cSJulian Elischer 	td = (struct thread *)mem;
189e6e24ff9SJulian Elischer 	mtx_lock(&Giant);
19049a2507bSAlan Cox 	vm_thread_new(td, 0);
191e6e24ff9SJulian Elischer 	mtx_unlock(&Giant);
19244990b8cSJulian Elischer 	cpu_thread_setup(td);
193de028f5aSJeff Roberson 	td->td_sched = (struct td_sched *)&td[1];
19444990b8cSJulian Elischer }
19544990b8cSJulian Elischer 
19644990b8cSJulian Elischer /*
19744990b8cSJulian Elischer  * Tear down type-stable parts of a thread (just before being discarded).
19844990b8cSJulian Elischer  */
19944990b8cSJulian Elischer static void
20044990b8cSJulian Elischer thread_fini(void *mem, int size)
20144990b8cSJulian Elischer {
20244990b8cSJulian Elischer 	struct thread	*td;
20344990b8cSJulian Elischer 
20444990b8cSJulian Elischer 	td = (struct thread *)mem;
20549a2507bSAlan Cox 	vm_thread_dispose(td);
20644990b8cSJulian Elischer }
2075215b187SJeff Roberson 
208de028f5aSJeff Roberson /*
209de028f5aSJeff Roberson  * Initialize type-stable parts of a kse (when newly created).
210de028f5aSJeff Roberson  */
211de028f5aSJeff Roberson static void
212de028f5aSJeff Roberson kse_init(void *mem, int size)
213de028f5aSJeff Roberson {
214de028f5aSJeff Roberson 	struct kse	*ke;
215de028f5aSJeff Roberson 
216de028f5aSJeff Roberson 	ke = (struct kse *)mem;
217de028f5aSJeff Roberson 	ke->ke_sched = (struct ke_sched *)&ke[1];
218de028f5aSJeff Roberson }
2195215b187SJeff Roberson 
220de028f5aSJeff Roberson /*
221de028f5aSJeff Roberson  * Initialize type-stable parts of a ksegrp (when newly created).
222de028f5aSJeff Roberson  */
223de028f5aSJeff Roberson static void
224de028f5aSJeff Roberson ksegrp_init(void *mem, int size)
225de028f5aSJeff Roberson {
226de028f5aSJeff Roberson 	struct ksegrp	*kg;
227de028f5aSJeff Roberson 
228de028f5aSJeff Roberson 	kg = (struct ksegrp *)mem;
229de028f5aSJeff Roberson 	kg->kg_sched = (struct kg_sched *)&kg[1];
230de028f5aSJeff Roberson }
23144990b8cSJulian Elischer 
23244990b8cSJulian Elischer /*
2335215b187SJeff Roberson  * KSE is linked into kse group.
2345c8329edSJulian Elischer  */
2355c8329edSJulian Elischer void
2365c8329edSJulian Elischer kse_link(struct kse *ke, struct ksegrp *kg)
2375c8329edSJulian Elischer {
2385c8329edSJulian Elischer 	struct proc *p = kg->kg_proc;
2395c8329edSJulian Elischer 
2405c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
2415c8329edSJulian Elischer 	kg->kg_kses++;
2425c8329edSJulian Elischer 	ke->ke_state	= KES_UNQUEUED;
2435c8329edSJulian Elischer 	ke->ke_proc	= p;
2445c8329edSJulian Elischer 	ke->ke_ksegrp	= kg;
2455c8329edSJulian Elischer 	ke->ke_thread	= NULL;
2465c8329edSJulian Elischer 	ke->ke_oncpu	= NOCPU;
2475215b187SJeff Roberson 	ke->ke_flags	= 0;
2485c8329edSJulian Elischer }
2495c8329edSJulian Elischer 
2505c8329edSJulian Elischer void
2515c8329edSJulian Elischer kse_unlink(struct kse *ke)
2525c8329edSJulian Elischer {
2535c8329edSJulian Elischer 	struct ksegrp *kg;
2545c8329edSJulian Elischer 
2555c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2565c8329edSJulian Elischer 	kg = ke->ke_ksegrp;
2575c8329edSJulian Elischer 	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
2585215b187SJeff Roberson 	if (ke->ke_state == KES_IDLE) {
2595215b187SJeff Roberson 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
2605215b187SJeff Roberson 		kg->kg_idle_kses--;
2616f8132a8SJulian Elischer 	}
2625215b187SJeff Roberson 	if (--kg->kg_kses == 0)
2635215b187SJeff Roberson 		ksegrp_unlink(kg);
2645c8329edSJulian Elischer 	/*
2655c8329edSJulian Elischer 	 * Aggregate stats from the KSE
2665c8329edSJulian Elischer 	 */
2675c8329edSJulian Elischer 	kse_stash(ke);
2685c8329edSJulian Elischer }
2695c8329edSJulian Elischer 
2705c8329edSJulian Elischer void
2715c8329edSJulian Elischer ksegrp_link(struct ksegrp *kg, struct proc *p)
2725c8329edSJulian Elischer {
2735c8329edSJulian Elischer 
2745c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_threads);
2755c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
2765c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
2775c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
2785215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
2795215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
2805c8329edSJulian Elischer 	kg->kg_proc = p;
2815215b187SJeff Roberson 	/*
2825215b187SJeff Roberson 	 * the following counters are in the -zero- section
2835215b187SJeff Roberson 	 * and may not need clearing
2845215b187SJeff Roberson 	 */
2855c8329edSJulian Elischer 	kg->kg_numthreads = 0;
2865c8329edSJulian Elischer 	kg->kg_runnable   = 0;
2875c8329edSJulian Elischer 	kg->kg_kses       = 0;
2885c8329edSJulian Elischer 	kg->kg_runq_kses  = 0; /* XXXKSE change name */
2895215b187SJeff Roberson 	kg->kg_idle_kses  = 0;
2905215b187SJeff Roberson 	kg->kg_numupcalls = 0;
2915c8329edSJulian Elischer 	/* link it in now that it's consistent */
2925c8329edSJulian Elischer 	p->p_numksegrps++;
2935c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
2945c8329edSJulian Elischer }
2955c8329edSJulian Elischer 
2965c8329edSJulian Elischer void
2975c8329edSJulian Elischer ksegrp_unlink(struct ksegrp *kg)
2985c8329edSJulian Elischer {
2995c8329edSJulian Elischer 	struct proc *p;
3005c8329edSJulian Elischer 
3015c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
3025215b187SJeff Roberson 	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
3035215b187SJeff Roberson 	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
3045215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
3055215b187SJeff Roberson 
3065c8329edSJulian Elischer 	p = kg->kg_proc;
3075c8329edSJulian Elischer 	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
3085c8329edSJulian Elischer 	p->p_numksegrps--;
3095c8329edSJulian Elischer 	/*
3105c8329edSJulian Elischer 	 * Aggregate stats from the KSE
3115c8329edSJulian Elischer 	 */
3125c8329edSJulian Elischer 	ksegrp_stash(kg);
3135c8329edSJulian Elischer }
3145c8329edSJulian Elischer 
3155215b187SJeff Roberson struct kse_upcall *
3165215b187SJeff Roberson upcall_alloc(void)
3175215b187SJeff Roberson {
3185215b187SJeff Roberson 	struct kse_upcall *ku;
3195215b187SJeff Roberson 
32030621e14SDavid Xu 	ku = uma_zalloc(upcall_zone, M_WAITOK);
3215215b187SJeff Roberson 	bzero(ku, sizeof(*ku));
3225215b187SJeff Roberson 	return (ku);
3235215b187SJeff Roberson }
3245215b187SJeff Roberson 
3255215b187SJeff Roberson void
3265215b187SJeff Roberson upcall_free(struct kse_upcall *ku)
3275215b187SJeff Roberson {
3285215b187SJeff Roberson 
3295215b187SJeff Roberson 	uma_zfree(upcall_zone, ku);
3305215b187SJeff Roberson }
3315215b187SJeff Roberson 
3325215b187SJeff Roberson void
3335215b187SJeff Roberson upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
3345215b187SJeff Roberson {
3355215b187SJeff Roberson 
3365215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3375215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
3385215b187SJeff Roberson 	ku->ku_ksegrp = kg;
3395215b187SJeff Roberson 	kg->kg_numupcalls++;
3405215b187SJeff Roberson }
3415215b187SJeff Roberson 
3425215b187SJeff Roberson void
3435215b187SJeff Roberson upcall_unlink(struct kse_upcall *ku)
3445215b187SJeff Roberson {
3455215b187SJeff Roberson 	struct ksegrp *kg = ku->ku_ksegrp;
3465215b187SJeff Roberson 
3475215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3485215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
3495215b187SJeff Roberson 	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
3505215b187SJeff Roberson 	kg->kg_numupcalls--;
3515215b187SJeff Roberson 	upcall_stash(ku);
3525215b187SJeff Roberson }
3535215b187SJeff Roberson 
3545215b187SJeff Roberson void
3555215b187SJeff Roberson upcall_remove(struct thread *td)
3565215b187SJeff Roberson {
3575215b187SJeff Roberson 
3585215b187SJeff Roberson 	if (td->td_upcall) {
3595215b187SJeff Roberson 		td->td_upcall->ku_owner = NULL;
3605215b187SJeff Roberson 		upcall_unlink(td->td_upcall);
3615215b187SJeff Roberson 		td->td_upcall = 0;
3625215b187SJeff Roberson 	}
3635215b187SJeff Roberson }
3645215b187SJeff Roberson 
3655c8329edSJulian Elischer /*
3665215b187SJeff Roberson  * For a newly created process,
3675215b187SJeff Roberson  * link up all the structures and its initial threads etc.
3685c8329edSJulian Elischer  */
3695c8329edSJulian Elischer void
3705c8329edSJulian Elischer proc_linkup(struct proc *p, struct ksegrp *kg,
3715c8329edSJulian Elischer 	    struct kse *ke, struct thread *td)
3725c8329edSJulian Elischer {
3735c8329edSJulian Elischer 
3745c8329edSJulian Elischer 	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
3755c8329edSJulian Elischer 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
3765c8329edSJulian Elischer 	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
3775c8329edSJulian Elischer 	p->p_numksegrps = 0;
3785c8329edSJulian Elischer 	p->p_numthreads = 0;
3795c8329edSJulian Elischer 
3805c8329edSJulian Elischer 	ksegrp_link(kg, p);
3815c8329edSJulian Elischer 	kse_link(ke, kg);
3825c8329edSJulian Elischer 	thread_link(td, kg);
3835c8329edSJulian Elischer }
3845c8329edSJulian Elischer 
3855215b187SJeff Roberson /*
3865215b187SJeff Roberson struct kse_thr_interrupt_args {
3875215b187SJeff Roberson 	struct kse_thr_mailbox * tmbx;
388dd7da9aaSDavid Xu 	int cmd;
389dd7da9aaSDavid Xu 	long data;
3905215b187SJeff Roberson };
3915215b187SJeff Roberson */
3925c8329edSJulian Elischer int
3935c8329edSJulian Elischer kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
3945c8329edSJulian Elischer {
39534e80e02SDavid Xu 	struct proc *p;
39634e80e02SDavid Xu 	struct thread *td2;
3975c8329edSJulian Elischer 
398adac9400SDavid Xu 	p = td->td_proc;
399dd7da9aaSDavid Xu 	if (!(p->p_flag & P_SA))
4008db2431fSDavid Xu 		return (EINVAL);
4019dde3bc9SDavid Xu 
402dd7da9aaSDavid Xu 	switch (uap->cmd) {
403dd7da9aaSDavid Xu 	case KSE_INTR_SENDSIG:
404dd7da9aaSDavid Xu 		if (uap->data < 0 || uap->data > _SIG_MAXSIG)
405dd7da9aaSDavid Xu 			return (EINVAL);
406dd7da9aaSDavid Xu 	case KSE_INTR_INTERRUPT:
407dd7da9aaSDavid Xu 	case KSE_INTR_RESTART:
4089dde3bc9SDavid Xu 		PROC_LOCK(p);
40934e80e02SDavid Xu 		mtx_lock_spin(&sched_lock);
41034e80e02SDavid Xu 		FOREACH_THREAD_IN_PROC(p, td2) {
4119dde3bc9SDavid Xu 			if (td2->td_mailbox == uap->tmbx)
4129dde3bc9SDavid Xu 				break;
4139dde3bc9SDavid Xu 		}
4149dde3bc9SDavid Xu 		if (td2 == NULL) {
4159dde3bc9SDavid Xu 			mtx_unlock_spin(&sched_lock);
4169dde3bc9SDavid Xu 			PROC_UNLOCK(p);
4179dde3bc9SDavid Xu 			return (ESRCH);
4189dde3bc9SDavid Xu 		}
419dd7da9aaSDavid Xu 		if (uap->cmd == KSE_INTR_SENDSIG) {
420dd7da9aaSDavid Xu 			if (uap->data > 0) {
4219dde3bc9SDavid Xu 				td2->td_flags &= ~TDF_INTERRUPT;
4229dde3bc9SDavid Xu 				mtx_unlock_spin(&sched_lock);
423dd7da9aaSDavid Xu 				tdsignal(td2, (int)uap->data, SIGTARGET_TD);
424dd7da9aaSDavid Xu 			} else {
4259dde3bc9SDavid Xu 				mtx_unlock_spin(&sched_lock);
426dd7da9aaSDavid Xu 			}
4279dde3bc9SDavid Xu 		} else {
4289dde3bc9SDavid Xu 			td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
4299dde3bc9SDavid Xu 			if (TD_CAN_UNBIND(td2))
430df9c6cdaSDavid Xu 				td2->td_upcall->ku_flags |= KUF_DOUPCALL;
431dd7da9aaSDavid Xu 			if (uap->cmd == KSE_INTR_INTERRUPT)
4329dde3bc9SDavid Xu 				td2->td_intrval = EINTR;
433dd7da9aaSDavid Xu 			else
4349dde3bc9SDavid Xu 				td2->td_intrval = ERESTART;
43534e80e02SDavid Xu 			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
43634e80e02SDavid Xu 				if (td2->td_flags & TDF_CVWAITQ)
43734e80e02SDavid Xu 					cv_abort(td2);
43834e80e02SDavid Xu 				else
43934e80e02SDavid Xu 					abortsleep(td2);
44034e80e02SDavid Xu 			}
44134e80e02SDavid Xu 			mtx_unlock_spin(&sched_lock);
4429dde3bc9SDavid Xu 		}
4439dde3bc9SDavid Xu 		PROC_UNLOCK(p);
444dd7da9aaSDavid Xu 		break;
445dd7da9aaSDavid Xu 	case KSE_INTR_SIGEXIT:
446dd7da9aaSDavid Xu 		if (uap->data < 1 || uap->data > _SIG_MAXSIG)
447dd7da9aaSDavid Xu 			return (EINVAL);
448dd7da9aaSDavid Xu 		PROC_LOCK(p);
449dd7da9aaSDavid Xu 		sigexit(td, (int)uap->data);
450dd7da9aaSDavid Xu 		break;
451dd7da9aaSDavid Xu 	default:
452dd7da9aaSDavid Xu 		return (EINVAL);
453dd7da9aaSDavid Xu 	}
4547b290dd0SDavid Xu 	return (0);
45534e80e02SDavid Xu }
4565c8329edSJulian Elischer 
4575215b187SJeff Roberson /*
4585215b187SJeff Roberson struct kse_exit_args {
4595215b187SJeff Roberson 	register_t dummy;
4605215b187SJeff Roberson };
4615215b187SJeff Roberson */
4625c8329edSJulian Elischer int
4635c8329edSJulian Elischer kse_exit(struct thread *td, struct kse_exit_args *uap)
4645c8329edSJulian Elischer {
4655c8329edSJulian Elischer 	struct proc *p;
4665c8329edSJulian Elischer 	struct ksegrp *kg;
467450c38d0SDavid Xu 	struct kse *ke;
4682b035cbeSJulian Elischer 	struct kse_upcall *ku, *ku2;
4692b035cbeSJulian Elischer 	int    error, count;
4705c8329edSJulian Elischer 
4715c8329edSJulian Elischer 	p = td->td_proc;
4722b035cbeSJulian Elischer 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
4737b290dd0SDavid Xu 		return (EINVAL);
4745c8329edSJulian Elischer 	kg = td->td_ksegrp;
4752b035cbeSJulian Elischer 	count = 0;
4765c8329edSJulian Elischer 	PROC_LOCK(p);
4775c8329edSJulian Elischer 	mtx_lock_spin(&sched_lock);
4782b035cbeSJulian Elischer 	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
4792b035cbeSJulian Elischer 		if (ku2->ku_flags & KUF_EXITING)
4802b035cbeSJulian Elischer 			count++;
4812b035cbeSJulian Elischer 	}
4822b035cbeSJulian Elischer 	if ((kg->kg_numupcalls - count) == 1 &&
4832b035cbeSJulian Elischer 	    (kg->kg_numthreads > 1)) {
4845c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
4855c8329edSJulian Elischer 		PROC_UNLOCK(p);
4865c8329edSJulian Elischer 		return (EDEADLK);
4875c8329edSJulian Elischer 	}
4882b035cbeSJulian Elischer 	ku->ku_flags |= KUF_EXITING;
4892b035cbeSJulian Elischer 	mtx_unlock_spin(&sched_lock);
4902b035cbeSJulian Elischer 	PROC_UNLOCK(p);
4912b035cbeSJulian Elischer 	error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
4922b035cbeSJulian Elischer 	PROC_LOCK(p);
4932b035cbeSJulian Elischer 	if (error)
4942b035cbeSJulian Elischer 		psignal(p, SIGSEGV);
4952b035cbeSJulian Elischer 	mtx_lock_spin(&sched_lock);
4965215b187SJeff Roberson 	upcall_remove(td);
4972b035cbeSJulian Elischer 	ke = td->td_kse;
498450c38d0SDavid Xu 	if (p->p_numthreads == 1) {
4995215b187SJeff Roberson 		kse_purge(p, td);
5000e2a4d3aSDavid Xu 		p->p_flag &= ~P_SA;
5015c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5025c8329edSJulian Elischer 		PROC_UNLOCK(p);
5035c8329edSJulian Elischer 	} else {
5045215b187SJeff Roberson 		if (kg->kg_numthreads == 1) { /* Shutdown a group */
5055215b187SJeff Roberson 			kse_purge_group(td);
506450c38d0SDavid Xu 			ke->ke_flags |= KEF_EXIT;
5075215b187SJeff Roberson 		}
508e574e444SDavid Xu 		thread_stopped(p);
5095c8329edSJulian Elischer 		thread_exit();
5105c8329edSJulian Elischer 		/* NOTREACHED */
5115c8329edSJulian Elischer 	}
5127b290dd0SDavid Xu 	return (0);
5135c8329edSJulian Elischer }
5145c8329edSJulian Elischer 
515696058c3SJulian Elischer /*
51693a7aa79SJulian Elischer  * Either becomes an upcall or waits for an awakening event and
5175215b187SJeff Roberson  * then becomes an upcall. Only error cases return.
5185215b187SJeff Roberson  */
5195215b187SJeff Roberson /*
5205215b187SJeff Roberson struct kse_release_args {
521eb117d5cSDavid Xu 	struct timespec *timeout;
5225215b187SJeff Roberson };
523696058c3SJulian Elischer */
5245c8329edSJulian Elischer int
5255c8329edSJulian Elischer kse_release(struct thread *td, struct kse_release_args *uap)
5265c8329edSJulian Elischer {
5275c8329edSJulian Elischer 	struct proc *p;
528696058c3SJulian Elischer 	struct ksegrp *kg;
529cd4f6ebbSDavid Xu 	struct kse_upcall *ku;
530cd4f6ebbSDavid Xu 	struct timespec timeout;
531eb117d5cSDavid Xu 	struct timeval tv;
5329dde3bc9SDavid Xu 	sigset_t sigset;
533eb117d5cSDavid Xu 	int error;
5345c8329edSJulian Elischer 
5355c8329edSJulian Elischer 	p = td->td_proc;
536696058c3SJulian Elischer 	kg = td->td_ksegrp;
537cd4f6ebbSDavid Xu 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
5385c8329edSJulian Elischer 		return (EINVAL);
539eb117d5cSDavid Xu 	if (uap->timeout != NULL) {
540eb117d5cSDavid Xu 		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
541eb117d5cSDavid Xu 			return (error);
542eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
543eb117d5cSDavid Xu 	}
544cd4f6ebbSDavid Xu 	if (td->td_flags & TDF_SA)
5451d5a24beSDavid Xu 		td->td_pflags |= TDP_UPCALLING;
5469dde3bc9SDavid Xu 	else {
5479dde3bc9SDavid Xu 		ku->ku_mflags = fuword(&ku->ku_mailbox->km_flags);
5489dde3bc9SDavid Xu 		if (ku->ku_mflags == -1) {
549eb117d5cSDavid Xu 			PROC_LOCK(p);
5509dde3bc9SDavid Xu 			sigexit(td, SIGSEGV);
5519dde3bc9SDavid Xu 		}
5529dde3bc9SDavid Xu 	}
5539dde3bc9SDavid Xu 	PROC_LOCK(p);
5549dde3bc9SDavid Xu 	if (ku->ku_mflags & KMF_WAITSIGEVENT) {
5559dde3bc9SDavid Xu 		/* UTS wants to wait for signal event */
5569dde3bc9SDavid Xu 		if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL))
5579dde3bc9SDavid Xu 			error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
5589dde3bc9SDavid Xu 			    "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
5599dde3bc9SDavid Xu 		p->p_flag &= ~P_SIGEVENT;
5609dde3bc9SDavid Xu 		sigset = p->p_siglist;
5619dde3bc9SDavid Xu 		PROC_UNLOCK(p);
5629dde3bc9SDavid Xu 		error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
5639dde3bc9SDavid Xu 		    sizeof(sigset));
5649dde3bc9SDavid Xu 	} else {
5659dde3bc9SDavid Xu 		 if (! kg->kg_completed && !(ku->ku_flags & KUF_DOUPCALL)) {
5665215b187SJeff Roberson 			kg->kg_upsleeps++;
5679dde3bc9SDavid Xu 			error = msleep(&kg->kg_completed, &p->p_mtx,
5689dde3bc9SDavid Xu 				PPAUSE|PCATCH, "kserel",
5699dde3bc9SDavid Xu 				(uap->timeout ? tvtohz(&tv) : 0));
5705215b187SJeff Roberson 			kg->kg_upsleeps--;
571cd4f6ebbSDavid Xu 		}
5729dde3bc9SDavid Xu 		PROC_UNLOCK(p);
5739dde3bc9SDavid Xu 	}
574cd4f6ebbSDavid Xu 	if (ku->ku_flags & KUF_DOUPCALL) {
575cd4f6ebbSDavid Xu 		mtx_lock_spin(&sched_lock);
576cd4f6ebbSDavid Xu 		ku->ku_flags &= ~KUF_DOUPCALL;
577cd4f6ebbSDavid Xu 		mtx_unlock_spin(&sched_lock);
57893a7aa79SJulian Elischer 	}
579696058c3SJulian Elischer 	return (0);
5805c8329edSJulian Elischer }
5815c8329edSJulian Elischer 
5825c8329edSJulian Elischer /* struct kse_wakeup_args {
5835c8329edSJulian Elischer 	struct kse_mailbox *mbx;
5845c8329edSJulian Elischer }; */
5855c8329edSJulian Elischer int
5865c8329edSJulian Elischer kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
5875c8329edSJulian Elischer {
5885c8329edSJulian Elischer 	struct proc *p;
5895c8329edSJulian Elischer 	struct ksegrp *kg;
5905215b187SJeff Roberson 	struct kse_upcall *ku;
59193a7aa79SJulian Elischer 	struct thread *td2;
5925c8329edSJulian Elischer 
5935c8329edSJulian Elischer 	p = td->td_proc;
59493a7aa79SJulian Elischer 	td2 = NULL;
5955215b187SJeff Roberson 	ku = NULL;
5965c8329edSJulian Elischer 	/* KSE-enabled processes only, please. */
5970e2a4d3aSDavid Xu 	if (!(p->p_flag & P_SA))
5985215b187SJeff Roberson 		return (EINVAL);
5995215b187SJeff Roberson 	PROC_LOCK(p);
60003ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
6015c8329edSJulian Elischer 	if (uap->mbx) {
6025c8329edSJulian Elischer 		FOREACH_KSEGRP_IN_PROC(p, kg) {
6035215b187SJeff Roberson 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
6045215b187SJeff Roberson 				if (ku->ku_mailbox == uap->mbx)
60593a7aa79SJulian Elischer 					break;
60693a7aa79SJulian Elischer 			}
6075215b187SJeff Roberson 			if (ku)
60893a7aa79SJulian Elischer 				break;
6095c8329edSJulian Elischer 		}
6105c8329edSJulian Elischer 	} else {
6115c8329edSJulian Elischer 		kg = td->td_ksegrp;
6125215b187SJeff Roberson 		if (kg->kg_upsleeps) {
6135215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
6145215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
6155215b187SJeff Roberson 			PROC_UNLOCK(p);
6165215b187SJeff Roberson 			return (0);
6175c8329edSJulian Elischer 		}
6185215b187SJeff Roberson 		ku = TAILQ_FIRST(&kg->kg_upcalls);
6195c8329edSJulian Elischer 	}
6205215b187SJeff Roberson 	if (ku) {
6215215b187SJeff Roberson 		if ((td2 = ku->ku_owner) == NULL) {
6225215b187SJeff Roberson 			panic("%s: no owner", __func__);
6235215b187SJeff Roberson 		} else if (TD_ON_SLEEPQ(td2) &&
6249dde3bc9SDavid Xu 		           ((td2->td_wchan == &kg->kg_completed) ||
6259dde3bc9SDavid Xu 			    (td2->td_wchan == &p->p_siglist &&
6269dde3bc9SDavid Xu 			     (ku->ku_mflags & KMF_WAITSIGEVENT)))) {
6275215b187SJeff Roberson 			abortsleep(td2);
6285215b187SJeff Roberson 		} else {
6295215b187SJeff Roberson 			ku->ku_flags |= KUF_DOUPCALL;
63003ea4720SJulian Elischer 		}
6315c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
6325215b187SJeff Roberson 		PROC_UNLOCK(p);
6337b290dd0SDavid Xu 		return (0);
6345c8329edSJulian Elischer 	}
63593a7aa79SJulian Elischer 	mtx_unlock_spin(&sched_lock);
6365215b187SJeff Roberson 	PROC_UNLOCK(p);
63793a7aa79SJulian Elischer 	return (ESRCH);
63893a7aa79SJulian Elischer }
6395c8329edSJulian Elischer 
6405c8329edSJulian Elischer /*
6415c8329edSJulian Elischer  * No new KSEG: first call: use current KSE, don't schedule an upcall
6425215b187SJeff Roberson  * All other situations, do allocate max new KSEs and schedule an upcall.
6435c8329edSJulian Elischer  */
6445c8329edSJulian Elischer /* struct kse_create_args {
6455c8329edSJulian Elischer 	struct kse_mailbox *mbx;
6465c8329edSJulian Elischer 	int newgroup;
6475c8329edSJulian Elischer }; */
6485c8329edSJulian Elischer int
6495c8329edSJulian Elischer kse_create(struct thread *td, struct kse_create_args *uap)
6505c8329edSJulian Elischer {
6515c8329edSJulian Elischer 	struct kse *newke;
6525c8329edSJulian Elischer 	struct ksegrp *newkg;
6535c8329edSJulian Elischer 	struct ksegrp *kg;
6545c8329edSJulian Elischer 	struct proc *p;
6555c8329edSJulian Elischer 	struct kse_mailbox mbx;
6565215b187SJeff Roberson 	struct kse_upcall *newku;
657cd4f6ebbSDavid Xu 	int err, ncpus, sa = 0, first = 0;
658cd4f6ebbSDavid Xu 	struct thread *newtd;
6595c8329edSJulian Elischer 
6605c8329edSJulian Elischer 	p = td->td_proc;
6615c8329edSJulian Elischer 	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
6625c8329edSJulian Elischer 		return (err);
6635c8329edSJulian Elischer 
6645215b187SJeff Roberson 	/* Too bad, why hasn't kernel always a cpu counter !? */
6655215b187SJeff Roberson #ifdef SMP
6665215b187SJeff Roberson 	ncpus = mp_ncpus;
6675215b187SJeff Roberson #else
6685215b187SJeff Roberson 	ncpus = 1;
6695215b187SJeff Roberson #endif
670cd4f6ebbSDavid Xu 	if (virtual_cpu != 0)
6715215b187SJeff Roberson 		ncpus = virtual_cpu;
672cd4f6ebbSDavid Xu 	if (!(mbx.km_flags & KMF_BOUND))
673cd4f6ebbSDavid Xu 		sa = TDF_SA;
674075102ccSDavid Xu 	else
675075102ccSDavid Xu 		ncpus = 1;
676661db6daSDavid Xu 	PROC_LOCK(p);
677cd4f6ebbSDavid Xu 	if (!(p->p_flag & P_SA)) {
678cd4f6ebbSDavid Xu 		first = 1;
6790e2a4d3aSDavid Xu 		p->p_flag |= P_SA;
680cd4f6ebbSDavid Xu 	}
681661db6daSDavid Xu 	PROC_UNLOCK(p);
682cd4f6ebbSDavid Xu 	if (!sa && !uap->newgroup && !first)
683cd4f6ebbSDavid Xu 		return (EINVAL);
6845c8329edSJulian Elischer 	kg = td->td_ksegrp;
6855c8329edSJulian Elischer 	if (uap->newgroup) {
6865215b187SJeff Roberson 		/* Have race condition but it is cheap */
687fdc5ecd2SDavid Xu 		if (p->p_numksegrps >= max_groups_per_proc)
688fdc5ecd2SDavid Xu 			return (EPROCLIM);
6895c8329edSJulian Elischer 		/*
6905c8329edSJulian Elischer 		 * If we want a new KSEGRP it doesn't matter whether
6915c8329edSJulian Elischer 		 * we have already fired up KSE mode before or not.
6925215b187SJeff Roberson 		 * We put the process in KSE mode and create a new KSEGRP.
6935c8329edSJulian Elischer 		 */
6945c8329edSJulian Elischer 		newkg = ksegrp_alloc();
6955c8329edSJulian Elischer 		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
6965c8329edSJulian Elischer 		      kg_startzero, kg_endzero));
6975c8329edSJulian Elischer 		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
6985c8329edSJulian Elischer 		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
6995215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
7005215b187SJeff Roberson 		if (p->p_numksegrps >= max_groups_per_proc) {
7015215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
7029948c47fSDavid Xu 			ksegrp_free(newkg);
7036f8132a8SJulian Elischer 			return (EPROCLIM);
7046f8132a8SJulian Elischer 		}
7059948c47fSDavid Xu 		ksegrp_link(newkg, p);
7065215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
7076f8132a8SJulian Elischer 	} else {
7084184d791SDavid Xu 		if (!first && ((td->td_flags & TDF_SA) ^ sa) != 0)
7094184d791SDavid Xu 			return (EINVAL);
7105215b187SJeff Roberson 		newkg = kg;
7116f8132a8SJulian Elischer 	}
7125215b187SJeff Roberson 
7135215b187SJeff Roberson 	/*
7145215b187SJeff Roberson 	 * Creating upcalls more than number of physical cpu does
7155215b187SJeff Roberson 	 * not help performance.
7165215b187SJeff Roberson 	 */
7175215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus)
7185215b187SJeff Roberson 		return (EPROCLIM);
7195215b187SJeff Roberson 
7205215b187SJeff Roberson 	if (newkg->kg_numupcalls == 0) {
7215215b187SJeff Roberson 		/*
722cd4f6ebbSDavid Xu 		 * Initialize KSE group
723cd4f6ebbSDavid Xu 		 *
724cd4f6ebbSDavid Xu 		 * For multiplxed group, create KSEs as many as physical
725cd4f6ebbSDavid Xu 		 * cpus. This increases concurrent even if userland
726cd4f6ebbSDavid Xu 		 * is not MP safe and can only run on single CPU.
7275215b187SJeff Roberson 		 * In ideal world, every physical cpu should execute a thread.
7285215b187SJeff Roberson 		 * If there is enough KSEs, threads in kernel can be
7295215b187SJeff Roberson 		 * executed parallel on different cpus with full speed,
7305215b187SJeff Roberson 		 * Concurrent in kernel shouldn't be restricted by number of
731cd4f6ebbSDavid Xu 		 * upcalls userland provides. Adding more upcall structures
732cd4f6ebbSDavid Xu 		 * only increases concurrent in userland.
733cd4f6ebbSDavid Xu 		 *
734cd4f6ebbSDavid Xu 		 * For bound thread group, because there is only thread in the
735cd4f6ebbSDavid Xu 		 * group, we only create one KSE for the group. Thread in this
736cd4f6ebbSDavid Xu 		 * kind of group will never schedule an upcall when blocked,
737cd4f6ebbSDavid Xu 		 * this intends to simulate pthread system scope thread.
7385215b187SJeff Roberson 		 */
7395215b187SJeff Roberson 		while (newkg->kg_kses < ncpus) {
7405215b187SJeff Roberson 			newke = kse_alloc();
7415c8329edSJulian Elischer 			bzero(&newke->ke_startzero, RANGEOF(struct kse,
7425c8329edSJulian Elischer 			      ke_startzero, ke_endzero));
7435c8329edSJulian Elischer #if 0
7445215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
7455c8329edSJulian Elischer 			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
7465c8329edSJulian Elischer 			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
7476f8132a8SJulian Elischer 			mtx_unlock_spin(&sched_lock);
7485215b187SJeff Roberson #endif
7495215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
7505c8329edSJulian Elischer 			kse_link(newke, newkg);
7515215b187SJeff Roberson 			/* Add engine */
7525215b187SJeff Roberson 			kse_reassign(newke);
7535c8329edSJulian Elischer 			mtx_unlock_spin(&sched_lock);
7545215b187SJeff Roberson 		}
7555215b187SJeff Roberson 	}
7565215b187SJeff Roberson 	newku = upcall_alloc();
7575215b187SJeff Roberson 	newku->ku_mailbox = uap->mbx;
7585215b187SJeff Roberson 	newku->ku_func = mbx.km_func;
7595215b187SJeff Roberson 	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
7605215b187SJeff Roberson 
7615215b187SJeff Roberson 	/* For the first call this may not have been set */
7625215b187SJeff Roberson 	if (td->td_standin == NULL)
7635215b187SJeff Roberson 		thread_alloc_spare(td, NULL);
7645215b187SJeff Roberson 
7659dde3bc9SDavid Xu 	PROC_LOCK(p);
7665215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus) {
7679dde3bc9SDavid Xu 		PROC_UNLOCK(p);
7683b3df40fSDavid Xu 		upcall_free(newku);
7695215b187SJeff Roberson 		return (EPROCLIM);
7705215b187SJeff Roberson 	}
771af161f22SDavid Xu 	if (first && sa) {
7729dde3bc9SDavid Xu 		SIGSETOR(p->p_siglist, td->td_siglist);
7739dde3bc9SDavid Xu 		SIGEMPTYSET(td->td_siglist);
7749dde3bc9SDavid Xu 		SIGFILLSET(td->td_sigmask);
7759dde3bc9SDavid Xu 		SIG_CANTMASK(td->td_sigmask);
7769dde3bc9SDavid Xu 	}
7779dde3bc9SDavid Xu 	mtx_lock_spin(&sched_lock);
7789dde3bc9SDavid Xu 	PROC_UNLOCK(p);
7795215b187SJeff Roberson 	upcall_link(newku, newkg);
7806ce75196SDavid Xu 	if (mbx.km_quantum)
7816ce75196SDavid Xu 		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
7825215b187SJeff Roberson 
7835215b187SJeff Roberson 	/*
7845215b187SJeff Roberson 	 * Each upcall structure has an owner thread, find which
7855215b187SJeff Roberson 	 * one owns it.
7865215b187SJeff Roberson 	 */
7875215b187SJeff Roberson 	if (uap->newgroup) {
7885215b187SJeff Roberson 		/*
7895215b187SJeff Roberson 		 * Because new ksegrp hasn't thread,
7905215b187SJeff Roberson 		 * create an initial upcall thread to own it.
7915215b187SJeff Roberson 		 */
792cd4f6ebbSDavid Xu 		newtd = thread_schedule_upcall(td, newku);
7935c8329edSJulian Elischer 	} else {
7945c8329edSJulian Elischer 		/*
7955215b187SJeff Roberson 		 * If current thread hasn't an upcall structure,
7965215b187SJeff Roberson 		 * just assign the upcall to it.
7975c8329edSJulian Elischer 		 */
7985215b187SJeff Roberson 		if (td->td_upcall == NULL) {
7995215b187SJeff Roberson 			newku->ku_owner = td;
8005215b187SJeff Roberson 			td->td_upcall = newku;
801cd4f6ebbSDavid Xu 			newtd = td;
8025215b187SJeff Roberson 		} else {
8035c8329edSJulian Elischer 			/*
8045215b187SJeff Roberson 			 * Create a new upcall thread to own it.
8055c8329edSJulian Elischer 			 */
806cd4f6ebbSDavid Xu 			newtd = thread_schedule_upcall(td, newku);
8075215b187SJeff Roberson 		}
8085215b187SJeff Roberson 	}
809cd4f6ebbSDavid Xu 	if (!sa) {
810cd4f6ebbSDavid Xu 		newtd->td_mailbox = mbx.km_curthread;
811cd4f6ebbSDavid Xu 		newtd->td_flags &= ~TDF_SA;
812ab78d4d6SDavid Xu 		if (newtd != td) {
813ab78d4d6SDavid Xu 			mtx_unlock_spin(&sched_lock);
814ab78d4d6SDavid Xu 			cpu_set_upcall_kse(newtd, newku);
815ab78d4d6SDavid Xu 			mtx_lock_spin(&sched_lock);
816ab78d4d6SDavid Xu 		}
817cd4f6ebbSDavid Xu 	} else {
818cd4f6ebbSDavid Xu 		newtd->td_flags |= TDF_SA;
819cd4f6ebbSDavid Xu 	}
820ab78d4d6SDavid Xu 	if (newtd != td)
821ab78d4d6SDavid Xu 		setrunqueue(newtd);
8225215b187SJeff Roberson 	mtx_unlock_spin(&sched_lock);
8235c8329edSJulian Elischer 	return (0);
8245c8329edSJulian Elischer }
8255c8329edSJulian Elischer 
8265c8329edSJulian Elischer /*
82744990b8cSJulian Elischer  * Initialize global thread allocation resources.
82844990b8cSJulian Elischer  */
82944990b8cSJulian Elischer void
83044990b8cSJulian Elischer threadinit(void)
83144990b8cSJulian Elischer {
83244990b8cSJulian Elischer 
833de028f5aSJeff Roberson 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
83444990b8cSJulian Elischer 	    thread_ctor, thread_dtor, thread_init, thread_fini,
83544990b8cSJulian Elischer 	    UMA_ALIGN_CACHE, 0);
836de028f5aSJeff Roberson 	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
837de028f5aSJeff Roberson 	    NULL, NULL, ksegrp_init, NULL,
8384f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
839de028f5aSJeff Roberson 	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
840de028f5aSJeff Roberson 	    NULL, NULL, kse_init, NULL,
8414f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
8425215b187SJeff Roberson 	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
8435215b187SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
84444990b8cSJulian Elischer }
84544990b8cSJulian Elischer 
84644990b8cSJulian Elischer /*
8471faf202eSJulian Elischer  * Stash an embarasingly extra thread into the zombie thread queue.
84844990b8cSJulian Elischer  */
84944990b8cSJulian Elischer void
85044990b8cSJulian Elischer thread_stash(struct thread *td)
85144990b8cSJulian Elischer {
8525215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
85344990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
8545215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
85544990b8cSJulian Elischer }
85644990b8cSJulian Elischer 
85744990b8cSJulian Elischer /*
8585c8329edSJulian Elischer  * Stash an embarasingly extra kse into the zombie kse queue.
8595c8329edSJulian Elischer  */
8605c8329edSJulian Elischer void
8615c8329edSJulian Elischer kse_stash(struct kse *ke)
8625c8329edSJulian Elischer {
8635215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8645c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
8655215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8665215b187SJeff Roberson }
8675215b187SJeff Roberson 
8685215b187SJeff Roberson /*
8695215b187SJeff Roberson  * Stash an embarasingly extra upcall into the zombie upcall queue.
8705215b187SJeff Roberson  */
8715215b187SJeff Roberson 
8725215b187SJeff Roberson void
8735215b187SJeff Roberson upcall_stash(struct kse_upcall *ku)
8745215b187SJeff Roberson {
8755215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8765215b187SJeff Roberson 	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
8775215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8785c8329edSJulian Elischer }
8795c8329edSJulian Elischer 
8805c8329edSJulian Elischer /*
8815c8329edSJulian Elischer  * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
8825c8329edSJulian Elischer  */
8835c8329edSJulian Elischer void
8845c8329edSJulian Elischer ksegrp_stash(struct ksegrp *kg)
8855c8329edSJulian Elischer {
8865215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8875c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
8885215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8895c8329edSJulian Elischer }
8905c8329edSJulian Elischer 
8915c8329edSJulian Elischer /*
8925215b187SJeff Roberson  * Reap zombie kse resource.
89344990b8cSJulian Elischer  */
89444990b8cSJulian Elischer void
89544990b8cSJulian Elischer thread_reap(void)
89644990b8cSJulian Elischer {
8975c8329edSJulian Elischer 	struct thread *td_first, *td_next;
8985c8329edSJulian Elischer 	struct kse *ke_first, *ke_next;
8995c8329edSJulian Elischer 	struct ksegrp *kg_first, * kg_next;
9005215b187SJeff Roberson 	struct kse_upcall *ku_first, *ku_next;
90144990b8cSJulian Elischer 
90244990b8cSJulian Elischer 	/*
9035215b187SJeff Roberson 	 * Don't even bother to lock if none at this instant,
9045215b187SJeff Roberson 	 * we really don't care about the next instant..
90544990b8cSJulian Elischer 	 */
9065c8329edSJulian Elischer 	if ((!TAILQ_EMPTY(&zombie_threads))
9075c8329edSJulian Elischer 	    || (!TAILQ_EMPTY(&zombie_kses))
9085215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_ksegrps))
9095215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
9105215b187SJeff Roberson 		mtx_lock_spin(&kse_zombie_lock);
9115c8329edSJulian Elischer 		td_first = TAILQ_FIRST(&zombie_threads);
9125c8329edSJulian Elischer 		ke_first = TAILQ_FIRST(&zombie_kses);
9135c8329edSJulian Elischer 		kg_first = TAILQ_FIRST(&zombie_ksegrps);
9145215b187SJeff Roberson 		ku_first = TAILQ_FIRST(&zombie_upcalls);
9155c8329edSJulian Elischer 		if (td_first)
9165c8329edSJulian Elischer 			TAILQ_INIT(&zombie_threads);
9175c8329edSJulian Elischer 		if (ke_first)
9185c8329edSJulian Elischer 			TAILQ_INIT(&zombie_kses);
9195c8329edSJulian Elischer 		if (kg_first)
9205c8329edSJulian Elischer 			TAILQ_INIT(&zombie_ksegrps);
9215215b187SJeff Roberson 		if (ku_first)
9225215b187SJeff Roberson 			TAILQ_INIT(&zombie_upcalls);
9235215b187SJeff Roberson 		mtx_unlock_spin(&kse_zombie_lock);
9245c8329edSJulian Elischer 		while (td_first) {
9255c8329edSJulian Elischer 			td_next = TAILQ_NEXT(td_first, td_runq);
9265215b187SJeff Roberson 			if (td_first->td_ucred)
9275215b187SJeff Roberson 				crfree(td_first->td_ucred);
9285c8329edSJulian Elischer 			thread_free(td_first);
9295c8329edSJulian Elischer 			td_first = td_next;
93044990b8cSJulian Elischer 		}
9315c8329edSJulian Elischer 		while (ke_first) {
9325c8329edSJulian Elischer 			ke_next = TAILQ_NEXT(ke_first, ke_procq);
9335c8329edSJulian Elischer 			kse_free(ke_first);
9345c8329edSJulian Elischer 			ke_first = ke_next;
9355c8329edSJulian Elischer 		}
9365c8329edSJulian Elischer 		while (kg_first) {
9375c8329edSJulian Elischer 			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
9385c8329edSJulian Elischer 			ksegrp_free(kg_first);
9395c8329edSJulian Elischer 			kg_first = kg_next;
9405c8329edSJulian Elischer 		}
9415215b187SJeff Roberson 		while (ku_first) {
9425215b187SJeff Roberson 			ku_next = TAILQ_NEXT(ku_first, ku_link);
9435215b187SJeff Roberson 			upcall_free(ku_first);
9445215b187SJeff Roberson 			ku_first = ku_next;
9455215b187SJeff Roberson 		}
94644990b8cSJulian Elischer 	}
94744990b8cSJulian Elischer }
94844990b8cSJulian Elischer 
94944990b8cSJulian Elischer /*
9504f0db5e0SJulian Elischer  * Allocate a ksegrp.
9514f0db5e0SJulian Elischer  */
9524f0db5e0SJulian Elischer struct ksegrp *
9534f0db5e0SJulian Elischer ksegrp_alloc(void)
9544f0db5e0SJulian Elischer {
955a163d034SWarner Losh 	return (uma_zalloc(ksegrp_zone, M_WAITOK));
9564f0db5e0SJulian Elischer }
9574f0db5e0SJulian Elischer 
9584f0db5e0SJulian Elischer /*
9594f0db5e0SJulian Elischer  * Allocate a kse.
9604f0db5e0SJulian Elischer  */
9614f0db5e0SJulian Elischer struct kse *
9624f0db5e0SJulian Elischer kse_alloc(void)
9634f0db5e0SJulian Elischer {
964a163d034SWarner Losh 	return (uma_zalloc(kse_zone, M_WAITOK));
9654f0db5e0SJulian Elischer }
9664f0db5e0SJulian Elischer 
9674f0db5e0SJulian Elischer /*
96844990b8cSJulian Elischer  * Allocate a thread.
96944990b8cSJulian Elischer  */
97044990b8cSJulian Elischer struct thread *
97144990b8cSJulian Elischer thread_alloc(void)
97244990b8cSJulian Elischer {
97344990b8cSJulian Elischer 	thread_reap(); /* check if any zombies to get */
974a163d034SWarner Losh 	return (uma_zalloc(thread_zone, M_WAITOK));
97544990b8cSJulian Elischer }
97644990b8cSJulian Elischer 
97744990b8cSJulian Elischer /*
9784f0db5e0SJulian Elischer  * Deallocate a ksegrp.
9794f0db5e0SJulian Elischer  */
9804f0db5e0SJulian Elischer void
9814f0db5e0SJulian Elischer ksegrp_free(struct ksegrp *td)
9824f0db5e0SJulian Elischer {
9834f0db5e0SJulian Elischer 	uma_zfree(ksegrp_zone, td);
9844f0db5e0SJulian Elischer }
9854f0db5e0SJulian Elischer 
9864f0db5e0SJulian Elischer /*
9874f0db5e0SJulian Elischer  * Deallocate a kse.
9884f0db5e0SJulian Elischer  */
9894f0db5e0SJulian Elischer void
9904f0db5e0SJulian Elischer kse_free(struct kse *td)
9914f0db5e0SJulian Elischer {
9924f0db5e0SJulian Elischer 	uma_zfree(kse_zone, td);
9934f0db5e0SJulian Elischer }
9944f0db5e0SJulian Elischer 
9954f0db5e0SJulian Elischer /*
99644990b8cSJulian Elischer  * Deallocate a thread.
99744990b8cSJulian Elischer  */
99844990b8cSJulian Elischer void
99944990b8cSJulian Elischer thread_free(struct thread *td)
100044990b8cSJulian Elischer {
1001696058c3SJulian Elischer 
1002696058c3SJulian Elischer 	cpu_thread_clean(td);
100344990b8cSJulian Elischer 	uma_zfree(thread_zone, td);
100444990b8cSJulian Elischer }
100544990b8cSJulian Elischer 
100644990b8cSJulian Elischer /*
100744990b8cSJulian Elischer  * Store the thread context in the UTS's mailbox.
10083d0586d4SJulian Elischer  * then add the mailbox at the head of a list we are building in user space.
10093d0586d4SJulian Elischer  * The list is anchored in the ksegrp structure.
101044990b8cSJulian Elischer  */
101144990b8cSJulian Elischer int
1012dd7da9aaSDavid Xu thread_export_context(struct thread *td, int willexit)
101344990b8cSJulian Elischer {
10140d294460SJuli Mallett 	struct proc *p;
10153d0586d4SJulian Elischer 	struct ksegrp *kg;
10163d0586d4SJulian Elischer 	uintptr_t mbx;
10173d0586d4SJulian Elischer 	void *addr;
10189dde3bc9SDavid Xu 	int error = 0, temp, sig;
10192b035cbeSJulian Elischer 	mcontext_t mc;
102044990b8cSJulian Elischer 
10210d294460SJuli Mallett 	p = td->td_proc;
10220d294460SJuli Mallett 	kg = td->td_ksegrp;
10230d294460SJuli Mallett 
1024c76e33b6SJonathan Mini 	/* Export the user/machine context. */
10252b035cbeSJulian Elischer 	get_mcontext(td, &mc, 0);
10262b035cbeSJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
10272b035cbeSJulian Elischer 	error = copyout(&mc, addr, sizeof(mcontext_t));
102893a7aa79SJulian Elischer 	if (error)
102993a7aa79SJulian Elischer 		goto bad;
103044990b8cSJulian Elischer 
10315215b187SJeff Roberson 	/* Exports clock ticks in kernel mode */
10325215b187SJeff Roberson 	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
10334e4422d4SMarcel Moolenaar 	temp = fuword32(addr) + td->td_usticks;
10344e4422d4SMarcel Moolenaar 	if (suword32(addr, temp)) {
10352b035cbeSJulian Elischer 		error = EFAULT;
10365215b187SJeff Roberson 		goto bad;
10372b035cbeSJulian Elischer 	}
10385215b187SJeff Roberson 
10399dde3bc9SDavid Xu 	/*
10409dde3bc9SDavid Xu 	 * Post sync signal, or process SIGKILL and SIGSTOP.
10419dde3bc9SDavid Xu 	 * For sync signal, it is only possible when the signal is not
10429dde3bc9SDavid Xu 	 * caught by userland or process is being debugged.
10439dde3bc9SDavid Xu 	 */
1044dd7da9aaSDavid Xu 	PROC_LOCK(p);
10459dde3bc9SDavid Xu 	if (td->td_flags & TDF_NEEDSIGCHK) {
10469dde3bc9SDavid Xu 		mtx_lock_spin(&sched_lock);
10479dde3bc9SDavid Xu 		td->td_flags &= ~TDF_NEEDSIGCHK;
10489dde3bc9SDavid Xu 		mtx_unlock_spin(&sched_lock);
10499dde3bc9SDavid Xu 		mtx_lock(&p->p_sigacts->ps_mtx);
10509dde3bc9SDavid Xu 		while ((sig = cursig(td)) != 0)
10519dde3bc9SDavid Xu 			postsig(sig);
10529dde3bc9SDavid Xu 		mtx_unlock(&p->p_sigacts->ps_mtx);
10539dde3bc9SDavid Xu 	}
1054dd7da9aaSDavid Xu 	if (willexit)
1055dd7da9aaSDavid Xu 		SIGFILLSET(td->td_sigmask);
1056dd7da9aaSDavid Xu 	PROC_UNLOCK(p);
10579dde3bc9SDavid Xu 
10585215b187SJeff Roberson 	/* Get address in latest mbox of list pointer */
10593d0586d4SJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_next);
10603d0586d4SJulian Elischer 	/*
10613d0586d4SJulian Elischer 	 * Put the saved address of the previous first
10623d0586d4SJulian Elischer 	 * entry into this one
10633d0586d4SJulian Elischer 	 */
10643d0586d4SJulian Elischer 	for (;;) {
10653d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
10663d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
106793a7aa79SJulian Elischer 			error = EFAULT;
10688798d4f9SDavid Xu 			goto bad;
10693d0586d4SJulian Elischer 		}
10700cd3964fSJulian Elischer 		PROC_LOCK(p);
10713d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
10723d0586d4SJulian Elischer 			kg->kg_completed = td->td_mailbox;
10735215b187SJeff Roberson 			/*
10745215b187SJeff Roberson 			 * The thread context may be taken away by
10755215b187SJeff Roberson 			 * other upcall threads when we unlock
10765215b187SJeff Roberson 			 * process lock. it's no longer valid to
10775215b187SJeff Roberson 			 * use it again in any other places.
10785215b187SJeff Roberson 			 */
10795215b187SJeff Roberson 			td->td_mailbox = NULL;
10800cd3964fSJulian Elischer 			PROC_UNLOCK(p);
10813d0586d4SJulian Elischer 			break;
10823d0586d4SJulian Elischer 		}
10830cd3964fSJulian Elischer 		PROC_UNLOCK(p);
10843d0586d4SJulian Elischer 	}
10855215b187SJeff Roberson 	td->td_usticks = 0;
10863d0586d4SJulian Elischer 	return (0);
10878798d4f9SDavid Xu 
10888798d4f9SDavid Xu bad:
10898798d4f9SDavid Xu 	PROC_LOCK(p);
1090dd7da9aaSDavid Xu 	sigexit(td, SIGILL);
109193a7aa79SJulian Elischer 	return (error);
10923d0586d4SJulian Elischer }
109344990b8cSJulian Elischer 
10943d0586d4SJulian Elischer /*
10953d0586d4SJulian Elischer  * Take the list of completed mailboxes for this KSEGRP and put them on this
10965215b187SJeff Roberson  * upcall's mailbox as it's the next one going up.
10973d0586d4SJulian Elischer  */
10983d0586d4SJulian Elischer static int
10995215b187SJeff Roberson thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
11003d0586d4SJulian Elischer {
11010cd3964fSJulian Elischer 	struct proc *p = kg->kg_proc;
11023d0586d4SJulian Elischer 	void *addr;
11033d0586d4SJulian Elischer 	uintptr_t mbx;
11043d0586d4SJulian Elischer 
11055215b187SJeff Roberson 	addr = (void *)(&ku->ku_mailbox->km_completed);
11063d0586d4SJulian Elischer 	for (;;) {
11073d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
11083d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
11090cd3964fSJulian Elischer 			PROC_LOCK(p);
11100cd3964fSJulian Elischer 			psignal(p, SIGSEGV);
11110cd3964fSJulian Elischer 			PROC_UNLOCK(p);
11123d0586d4SJulian Elischer 			return (EFAULT);
11133d0586d4SJulian Elischer 		}
11140cd3964fSJulian Elischer 		PROC_LOCK(p);
11153d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
11163d0586d4SJulian Elischer 			kg->kg_completed = NULL;
11170cd3964fSJulian Elischer 			PROC_UNLOCK(p);
11183d0586d4SJulian Elischer 			break;
11193d0586d4SJulian Elischer 		}
11200cd3964fSJulian Elischer 		PROC_UNLOCK(p);
11213d0586d4SJulian Elischer 	}
11223d0586d4SJulian Elischer 	return (0);
11233d0586d4SJulian Elischer }
112444990b8cSJulian Elischer 
112544990b8cSJulian Elischer /*
11268798d4f9SDavid Xu  * This function should be called at statclock interrupt time
11278798d4f9SDavid Xu  */
11288798d4f9SDavid Xu int
11295215b187SJeff Roberson thread_statclock(int user)
11308798d4f9SDavid Xu {
11318798d4f9SDavid Xu 	struct thread *td = curthread;
1132cd4f6ebbSDavid Xu 	struct ksegrp *kg = td->td_ksegrp;
11338798d4f9SDavid Xu 
1134cd4f6ebbSDavid Xu 	if (kg->kg_numupcalls == 0 || !(td->td_flags & TDF_SA))
1135cd4f6ebbSDavid Xu 		return (0);
11368798d4f9SDavid Xu 	if (user) {
11378798d4f9SDavid Xu 		/* Current always do via ast() */
1138b4508d7dSDavid Xu 		mtx_lock_spin(&sched_lock);
11394a338afdSJulian Elischer 		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1140b4508d7dSDavid Xu 		mtx_unlock_spin(&sched_lock);
11415215b187SJeff Roberson 		td->td_uuticks++;
11428798d4f9SDavid Xu 	} else {
11438798d4f9SDavid Xu 		if (td->td_mailbox != NULL)
11445215b187SJeff Roberson 			td->td_usticks++;
11455215b187SJeff Roberson 		else {
11465215b187SJeff Roberson 			/* XXXKSE
11475215b187SJeff Roberson 		 	 * We will call thread_user_enter() for every
11485215b187SJeff Roberson 			 * kernel entry in future, so if the thread mailbox
11495215b187SJeff Roberson 			 * is NULL, it must be a UTS kernel, don't account
11505215b187SJeff Roberson 			 * clock ticks for it.
11515215b187SJeff Roberson 			 */
11528798d4f9SDavid Xu 		}
11535215b187SJeff Roberson 	}
11545215b187SJeff Roberson 	return (0);
11558798d4f9SDavid Xu }
11568798d4f9SDavid Xu 
11575215b187SJeff Roberson /*
11584b4866edSDavid Xu  * Export state clock ticks for userland
11595215b187SJeff Roberson  */
11608798d4f9SDavid Xu static int
11614b4866edSDavid Xu thread_update_usr_ticks(struct thread *td, int user)
11628798d4f9SDavid Xu {
11638798d4f9SDavid Xu 	struct proc *p = td->td_proc;
11648798d4f9SDavid Xu 	struct kse_thr_mailbox *tmbx;
11655215b187SJeff Roberson 	struct kse_upcall *ku;
11666ce75196SDavid Xu 	struct ksegrp *kg;
11678798d4f9SDavid Xu 	caddr_t addr;
11685215b187SJeff Roberson 	uint uticks;
11698798d4f9SDavid Xu 
11705215b187SJeff Roberson 	if ((ku = td->td_upcall) == NULL)
11715215b187SJeff Roberson 		return (-1);
11728798d4f9SDavid Xu 
11735215b187SJeff Roberson 	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
11748798d4f9SDavid Xu 	if ((tmbx == NULL) || (tmbx == (void *)-1))
11755215b187SJeff Roberson 		return (-1);
11764b4866edSDavid Xu 	if (user) {
11775215b187SJeff Roberson 		uticks = td->td_uuticks;
11785215b187SJeff Roberson 		td->td_uuticks = 0;
11795215b187SJeff Roberson 		addr = (caddr_t)&tmbx->tm_uticks;
11804b4866edSDavid Xu 	} else {
11814b4866edSDavid Xu 		uticks = td->td_usticks;
11825215b187SJeff Roberson 		td->td_usticks = 0;
11834b4866edSDavid Xu 		addr = (caddr_t)&tmbx->tm_sticks;
11844b4866edSDavid Xu 	}
11854b4866edSDavid Xu 	if (uticks) {
11864e4422d4SMarcel Moolenaar 		if (suword32(addr, uticks+fuword32(addr))) {
11875215b187SJeff Roberson 			PROC_LOCK(p);
11885215b187SJeff Roberson 			psignal(p, SIGSEGV);
11895215b187SJeff Roberson 			PROC_UNLOCK(p);
11905215b187SJeff Roberson 			return (-2);
11915215b187SJeff Roberson 		}
11924b4866edSDavid Xu 	}
11936ce75196SDavid Xu 	kg = td->td_ksegrp;
11946ce75196SDavid Xu 	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
11954b4866edSDavid Xu 		mtx_lock_spin(&sched_lock);
11964b4866edSDavid Xu 		td->td_upcall->ku_flags |= KUF_DOUPCALL;
11974b4866edSDavid Xu 		mtx_unlock_spin(&sched_lock);
11984b4866edSDavid Xu 	}
11995215b187SJeff Roberson 	return (0);
12008798d4f9SDavid Xu }
12018798d4f9SDavid Xu 
12028798d4f9SDavid Xu /*
120344990b8cSJulian Elischer  * Discard the current thread and exit from its context.
120444990b8cSJulian Elischer  *
120544990b8cSJulian Elischer  * Because we can't free a thread while we're operating under its context,
1206696058c3SJulian Elischer  * push the current thread into our CPU's deadthread holder. This means
1207696058c3SJulian Elischer  * we needn't worry about someone else grabbing our context before we
1208696058c3SJulian Elischer  * do a cpu_throw().
120944990b8cSJulian Elischer  */
121044990b8cSJulian Elischer void
121144990b8cSJulian Elischer thread_exit(void)
121244990b8cSJulian Elischer {
121344990b8cSJulian Elischer 	struct thread *td;
121444990b8cSJulian Elischer 	struct kse *ke;
121544990b8cSJulian Elischer 	struct proc *p;
121644990b8cSJulian Elischer 	struct ksegrp	*kg;
121744990b8cSJulian Elischer 
121844990b8cSJulian Elischer 	td = curthread;
121944990b8cSJulian Elischer 	kg = td->td_ksegrp;
122044990b8cSJulian Elischer 	p = td->td_proc;
122144990b8cSJulian Elischer 	ke = td->td_kse;
122244990b8cSJulian Elischer 
122344990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
122488151aa3SJulian Elischer 	KASSERT(p != NULL, ("thread exiting without a process"));
122588151aa3SJulian Elischer 	KASSERT(ke != NULL, ("thread exiting without a kse"));
122688151aa3SJulian Elischer 	KASSERT(kg != NULL, ("thread exiting without a kse group"));
122744990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
122844990b8cSJulian Elischer 	CTR1(KTR_PROC, "thread_exit: thread %p", td);
122944990b8cSJulian Elischer 	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
123044990b8cSJulian Elischer 
123148bfcdddSJulian Elischer 	if (td->td_standin != NULL) {
123248bfcdddSJulian Elischer 		thread_stash(td->td_standin);
123348bfcdddSJulian Elischer 		td->td_standin = NULL;
123448bfcdddSJulian Elischer 	}
123548bfcdddSJulian Elischer 
123644990b8cSJulian Elischer 	cpu_thread_exit(td);	/* XXXSMP */
123744990b8cSJulian Elischer 
12381faf202eSJulian Elischer 	/*
12391faf202eSJulian Elischer 	 * The last thread is left attached to the process
12401faf202eSJulian Elischer 	 * So that the whole bundle gets recycled. Skip
12411faf202eSJulian Elischer 	 * all this stuff.
12421faf202eSJulian Elischer 	 */
12431faf202eSJulian Elischer 	if (p->p_numthreads > 1) {
1244d3a0bd78SJulian Elischer 		thread_unlink(td);
12450252d203SDavid Xu 		if (p->p_maxthrwaits)
12460252d203SDavid Xu 			wakeup(&p->p_numthreads);
124744990b8cSJulian Elischer 		/*
124844990b8cSJulian Elischer 		 * The test below is NOT true if we are the
12491faf202eSJulian Elischer 		 * sole exiting thread. P_STOPPED_SNGL is unset
125044990b8cSJulian Elischer 		 * in exit1() after it is the only survivor.
125144990b8cSJulian Elischer 		 */
12521279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
125344990b8cSJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
125471fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
125544990b8cSJulian Elischer 			}
125644990b8cSJulian Elischer 		}
125748bfcdddSJulian Elischer 
12585215b187SJeff Roberson 		/*
12595215b187SJeff Roberson 		 * Because each upcall structure has an owner thread,
12605215b187SJeff Roberson 		 * owner thread exits only when process is in exiting
12615215b187SJeff Roberson 		 * state, so upcall to userland is no longer needed,
12625215b187SJeff Roberson 		 * deleting upcall structure is safe here.
12635215b187SJeff Roberson 		 * So when all threads in a group is exited, all upcalls
12645215b187SJeff Roberson 		 * in the group should be automatically freed.
12655215b187SJeff Roberson 		 */
12665215b187SJeff Roberson 		if (td->td_upcall)
12675215b187SJeff Roberson 			upcall_remove(td);
12686f8132a8SJulian Elischer 
12695215b187SJeff Roberson 		ke->ke_state = KES_UNQUEUED;
12705215b187SJeff Roberson 		ke->ke_thread = NULL;
127148bfcdddSJulian Elischer 		/*
127293a7aa79SJulian Elischer 		 * Decide what to do with the KSE attached to this thread.
127348bfcdddSJulian Elischer 		 */
12745215b187SJeff Roberson 		if (ke->ke_flags & KEF_EXIT)
12756f8132a8SJulian Elischer 			kse_unlink(ke);
12765215b187SJeff Roberson 		else
12776f8132a8SJulian Elischer 			kse_reassign(ke);
12786f8132a8SJulian Elischer 		PROC_UNLOCK(p);
12795215b187SJeff Roberson 		td->td_kse	= NULL;
12805c8329edSJulian Elischer 		td->td_state	= TDS_INACTIVE;
128136f7b36fSDavid Xu #if 0
12825c8329edSJulian Elischer 		td->td_proc	= NULL;
128336f7b36fSDavid Xu #endif
12845c8329edSJulian Elischer 		td->td_ksegrp	= NULL;
12855c8329edSJulian Elischer 		td->td_last_kse	= NULL;
1286696058c3SJulian Elischer 		PCPU_SET(deadthread, td);
12871faf202eSJulian Elischer 	} else {
12881faf202eSJulian Elischer 		PROC_UNLOCK(p);
12891faf202eSJulian Elischer 	}
12904093529dSJeff Roberson 	/* XXX Shouldn't cpu_throw() here. */
1291cc66ebe2SPeter Wemm 	mtx_assert(&sched_lock, MA_OWNED);
1292f2c49dd2SMarcel Moolenaar #if !defined(__alpha__) && !defined(__powerpc__)
1293cc66ebe2SPeter Wemm 	cpu_throw(td, choosethread());
1294cc66ebe2SPeter Wemm #else
129544990b8cSJulian Elischer 	cpu_throw();
1296cc66ebe2SPeter Wemm #endif
1297cc66ebe2SPeter Wemm 	panic("I'm a teapot!");
129844990b8cSJulian Elischer 	/* NOTREACHED */
129944990b8cSJulian Elischer }
130044990b8cSJulian Elischer 
130144990b8cSJulian Elischer /*
1302696058c3SJulian Elischer  * Do any thread specific cleanups that may be needed in wait()
1303696058c3SJulian Elischer  * called with Giant held, proc and schedlock not held.
1304696058c3SJulian Elischer  */
1305696058c3SJulian Elischer void
1306696058c3SJulian Elischer thread_wait(struct proc *p)
1307696058c3SJulian Elischer {
1308696058c3SJulian Elischer 	struct thread *td;
1309696058c3SJulian Elischer 
1310696058c3SJulian Elischer 	KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()"));
1311696058c3SJulian Elischer 	KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()"));
1312696058c3SJulian Elischer 	FOREACH_THREAD_IN_PROC(p, td) {
1313696058c3SJulian Elischer 		if (td->td_standin != NULL) {
1314696058c3SJulian Elischer 			thread_free(td->td_standin);
1315696058c3SJulian Elischer 			td->td_standin = NULL;
1316696058c3SJulian Elischer 		}
1317696058c3SJulian Elischer 		cpu_thread_clean(td);
1318696058c3SJulian Elischer 	}
1319696058c3SJulian Elischer 	thread_reap();	/* check for zombie threads etc. */
1320696058c3SJulian Elischer }
1321696058c3SJulian Elischer 
1322696058c3SJulian Elischer /*
132344990b8cSJulian Elischer  * Link a thread to a process.
13241faf202eSJulian Elischer  * set up anything that needs to be initialized for it to
13251faf202eSJulian Elischer  * be used by the process.
132644990b8cSJulian Elischer  *
132744990b8cSJulian Elischer  * Note that we do not link to the proc's ucred here.
132844990b8cSJulian Elischer  * The thread is linked as if running but no KSE assigned.
132944990b8cSJulian Elischer  */
133044990b8cSJulian Elischer void
133144990b8cSJulian Elischer thread_link(struct thread *td, struct ksegrp *kg)
133244990b8cSJulian Elischer {
133344990b8cSJulian Elischer 	struct proc *p;
133444990b8cSJulian Elischer 
133544990b8cSJulian Elischer 	p = kg->kg_proc;
133671fad9fdSJulian Elischer 	td->td_state    = TDS_INACTIVE;
133744990b8cSJulian Elischer 	td->td_proc     = p;
133844990b8cSJulian Elischer 	td->td_ksegrp   = kg;
133944990b8cSJulian Elischer 	td->td_last_kse = NULL;
13405215b187SJeff Roberson 	td->td_flags    = 0;
13415215b187SJeff Roberson 	td->td_kse      = NULL;
134244990b8cSJulian Elischer 
13431faf202eSJulian Elischer 	LIST_INIT(&td->td_contested);
13441faf202eSJulian Elischer 	callout_init(&td->td_slpcallout, 1);
134544990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
134644990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
134744990b8cSJulian Elischer 	p->p_numthreads++;
134844990b8cSJulian Elischer 	kg->kg_numthreads++;
134944990b8cSJulian Elischer }
135044990b8cSJulian Elischer 
1351d3a0bd78SJulian Elischer void
1352d3a0bd78SJulian Elischer thread_unlink(struct thread *td)
1353d3a0bd78SJulian Elischer {
1354d3a0bd78SJulian Elischer 	struct proc *p = td->td_proc;
1355d3a0bd78SJulian Elischer 	struct ksegrp *kg = td->td_ksegrp;
1356d3a0bd78SJulian Elischer 
1357112afcb2SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
1358d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1359d3a0bd78SJulian Elischer 	p->p_numthreads--;
1360d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1361d3a0bd78SJulian Elischer 	kg->kg_numthreads--;
1362d3a0bd78SJulian Elischer 	/* could clear a few other things here */
1363d3a0bd78SJulian Elischer }
1364d3a0bd78SJulian Elischer 
13655215b187SJeff Roberson /*
13665215b187SJeff Roberson  * Purge a ksegrp resource. When a ksegrp is preparing to
13675215b187SJeff Roberson  * exit, it calls this function.
13685215b187SJeff Roberson  */
1369a6f37ac9SJohn Baldwin static void
13705215b187SJeff Roberson kse_purge_group(struct thread *td)
13715215b187SJeff Roberson {
13725215b187SJeff Roberson 	struct ksegrp *kg;
13735215b187SJeff Roberson 	struct kse *ke;
13745215b187SJeff Roberson 
13755215b187SJeff Roberson 	kg = td->td_ksegrp;
13765215b187SJeff Roberson  	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
13775215b187SJeff Roberson 	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
13785215b187SJeff Roberson 		KASSERT(ke->ke_state == KES_IDLE,
13795215b187SJeff Roberson 			("%s: wrong idle KSE state", __func__));
13805215b187SJeff Roberson 		kse_unlink(ke);
13815215b187SJeff Roberson 	}
13825215b187SJeff Roberson 	KASSERT((kg->kg_kses == 1),
13835215b187SJeff Roberson 		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
13845215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0),
13855215b187SJeff Roberson 	        ("%s: ksegrp still has %d upcall datas",
13865215b187SJeff Roberson 		__func__, kg->kg_numupcalls));
13875215b187SJeff Roberson }
13885215b187SJeff Roberson 
13895215b187SJeff Roberson /*
13905215b187SJeff Roberson  * Purge a process's KSE resource. When a process is preparing to
13915215b187SJeff Roberson  * exit, it calls kse_purge to release any extra KSE resources in
13925215b187SJeff Roberson  * the process.
13935215b187SJeff Roberson  */
1394a6f37ac9SJohn Baldwin static void
13955c8329edSJulian Elischer kse_purge(struct proc *p, struct thread *td)
13965c8329edSJulian Elischer {
13975c8329edSJulian Elischer 	struct ksegrp *kg;
13985215b187SJeff Roberson 	struct kse *ke;
13995c8329edSJulian Elischer 
14005c8329edSJulian Elischer  	KASSERT(p->p_numthreads == 1, ("bad thread number"));
14015c8329edSJulian Elischer 	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
14025c8329edSJulian Elischer 		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
14035c8329edSJulian Elischer 		p->p_numksegrps--;
14045215b187SJeff Roberson 		/*
14055215b187SJeff Roberson 		 * There is no ownership for KSE, after all threads
14065215b187SJeff Roberson 		 * in the group exited, it is possible that some KSEs
14075215b187SJeff Roberson 		 * were left in idle queue, gc them now.
14085215b187SJeff Roberson 		 */
14095215b187SJeff Roberson 		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
14105215b187SJeff Roberson 			KASSERT(ke->ke_state == KES_IDLE,
14115215b187SJeff Roberson 			   ("%s: wrong idle KSE state", __func__));
14125215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
14135215b187SJeff Roberson 			kg->kg_idle_kses--;
14145215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
14155215b187SJeff Roberson 			kg->kg_kses--;
14165215b187SJeff Roberson 			kse_stash(ke);
14175215b187SJeff Roberson 		}
14185c8329edSJulian Elischer 		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
14195c8329edSJulian Elischer 		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
14205215b187SJeff Roberson 		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
14215215b187SJeff Roberson 		KASSERT((kg->kg_numupcalls == 0),
14225215b187SJeff Roberson 		        ("%s: ksegrp still has %d upcall datas",
14235215b187SJeff Roberson 			__func__, kg->kg_numupcalls));
14245215b187SJeff Roberson 
14255215b187SJeff Roberson 		if (kg != td->td_ksegrp)
14265c8329edSJulian Elischer 			ksegrp_stash(kg);
14275c8329edSJulian Elischer 	}
14285c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
14295c8329edSJulian Elischer 	p->p_numksegrps++;
14305c8329edSJulian Elischer }
14315c8329edSJulian Elischer 
14325215b187SJeff Roberson /*
14335215b187SJeff Roberson  * This function is intended to be used to initialize a spare thread
14345215b187SJeff Roberson  * for upcall. Initialize thread's large data area outside sched_lock
14355215b187SJeff Roberson  * for thread_schedule_upcall().
14365215b187SJeff Roberson  */
14375215b187SJeff Roberson void
14385215b187SJeff Roberson thread_alloc_spare(struct thread *td, struct thread *spare)
14395215b187SJeff Roberson {
14405215b187SJeff Roberson 	if (td->td_standin)
14415215b187SJeff Roberson 		return;
14425215b187SJeff Roberson 	if (spare == NULL)
14435215b187SJeff Roberson 		spare = thread_alloc();
14445215b187SJeff Roberson 	td->td_standin = spare;
14455215b187SJeff Roberson 	bzero(&spare->td_startzero,
14465215b187SJeff Roberson 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
14475215b187SJeff Roberson 	spare->td_proc = td->td_proc;
14485215b187SJeff Roberson 	spare->td_ucred = crhold(td->td_ucred);
14495215b187SJeff Roberson }
14505c8329edSJulian Elischer 
145144990b8cSJulian Elischer /*
1452c76e33b6SJonathan Mini  * Create a thread and schedule it for upcall on the KSE given.
145393a7aa79SJulian Elischer  * Use our thread's standin so that we don't have to allocate one.
145444990b8cSJulian Elischer  */
145544990b8cSJulian Elischer struct thread *
14565215b187SJeff Roberson thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
145744990b8cSJulian Elischer {
145844990b8cSJulian Elischer 	struct thread *td2;
145944990b8cSJulian Elischer 
146044990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
146148bfcdddSJulian Elischer 
146248bfcdddSJulian Elischer 	/*
14635215b187SJeff Roberson 	 * Schedule an upcall thread on specified kse_upcall,
14645215b187SJeff Roberson 	 * the kse_upcall must be free.
14655215b187SJeff Roberson 	 * td must have a spare thread.
146648bfcdddSJulian Elischer 	 */
14675215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
146848bfcdddSJulian Elischer 	if ((td2 = td->td_standin) != NULL) {
146948bfcdddSJulian Elischer 		td->td_standin = NULL;
147044990b8cSJulian Elischer 	} else {
14715215b187SJeff Roberson 		panic("no reserve thread when scheduling an upcall");
147248bfcdddSJulian Elischer 		return (NULL);
147344990b8cSJulian Elischer 	}
147444990b8cSJulian Elischer 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
147548bfcdddSJulian Elischer 	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
14761faf202eSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
14771faf202eSJulian Elischer 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
14785215b187SJeff Roberson 	thread_link(td2, ku->ku_ksegrp);
147936f7b36fSDavid Xu 	/* inherit blocked thread's context */
148011e0f8e1SMarcel Moolenaar 	cpu_set_upcall(td2, td);
14815215b187SJeff Roberson 	/* Let the new thread become owner of the upcall */
14825215b187SJeff Roberson 	ku->ku_owner   = td2;
14835215b187SJeff Roberson 	td2->td_upcall = ku;
1484cd4f6ebbSDavid Xu 	td2->td_flags  = TDF_SA;
14851d5a24beSDavid Xu 	td2->td_pflags = TDP_UPCALLING;
14865215b187SJeff Roberson 	td2->td_kse    = NULL;
148748bfcdddSJulian Elischer 	td2->td_state  = TDS_CAN_RUN;
148848bfcdddSJulian Elischer 	td2->td_inhibitors = 0;
14899dde3bc9SDavid Xu 	SIGFILLSET(td2->td_sigmask);
14909dde3bc9SDavid Xu 	SIG_CANTMASK(td2->td_sigmask);
149148bfcdddSJulian Elischer 	return (td2);	/* bogus.. should be a void function */
149244990b8cSJulian Elischer }
149344990b8cSJulian Elischer 
14949dde3bc9SDavid Xu /*
14959dde3bc9SDavid Xu  * It is only used when thread generated a trap and process is being
14969dde3bc9SDavid Xu  * debugged.
14979dde3bc9SDavid Xu  */
149858a3c273SJeff Roberson void
149958a3c273SJeff Roberson thread_signal_add(struct thread *td, int sig)
1500c76e33b6SJonathan Mini {
150158a3c273SJeff Roberson 	struct proc *p;
15029dde3bc9SDavid Xu 	siginfo_t siginfo;
15039dde3bc9SDavid Xu 	struct sigacts *ps;
1504c76e33b6SJonathan Mini 	int error;
1505c76e33b6SJonathan Mini 
1506b0bd5f38SDavid Xu 	p = td->td_proc;
1507b0bd5f38SDavid Xu 	PROC_LOCK_ASSERT(p, MA_OWNED);
15089dde3bc9SDavid Xu 	ps = p->p_sigacts;
15099dde3bc9SDavid Xu 	mtx_assert(&ps->ps_mtx, MA_OWNED);
15109dde3bc9SDavid Xu 
15114b7d5d84SDavid Xu 	cpu_thread_siginfo(sig, 0, &siginfo);
15129dde3bc9SDavid Xu 	mtx_unlock(&ps->ps_mtx);
1513c76e33b6SJonathan Mini 	PROC_UNLOCK(p);
15149dde3bc9SDavid Xu 	error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo));
15159dde3bc9SDavid Xu 	if (error) {
151658a3c273SJeff Roberson 		PROC_LOCK(p);
151758a3c273SJeff Roberson 		sigexit(td, SIGILL);
151858a3c273SJeff Roberson 	}
15199dde3bc9SDavid Xu 	PROC_LOCK(p);
15209dde3bc9SDavid Xu 	SIGADDSET(td->td_sigmask, sig);
15219dde3bc9SDavid Xu 	mtx_lock(&ps->ps_mtx);
1522c76e33b6SJonathan Mini }
1523c76e33b6SJonathan Mini 
15246ce75196SDavid Xu void
15256ce75196SDavid Xu thread_switchout(struct thread *td)
15266ce75196SDavid Xu {
15276ce75196SDavid Xu 	struct kse_upcall *ku;
1528ab78d4d6SDavid Xu 	struct thread *td2;
15296ce75196SDavid Xu 
15306ce75196SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
15316ce75196SDavid Xu 
15326ce75196SDavid Xu 	/*
15336ce75196SDavid Xu 	 * If the outgoing thread is in threaded group and has never
15346ce75196SDavid Xu 	 * scheduled an upcall, decide whether this is a short
15356ce75196SDavid Xu 	 * or long term event and thus whether or not to schedule
15366ce75196SDavid Xu 	 * an upcall.
15376ce75196SDavid Xu 	 * If it is a short term event, just suspend it in
15386ce75196SDavid Xu 	 * a way that takes its KSE with it.
15396ce75196SDavid Xu 	 * Select the events for which we want to schedule upcalls.
15406ce75196SDavid Xu 	 * For now it's just sleep.
15416ce75196SDavid Xu 	 * XXXKSE eventually almost any inhibition could do.
15426ce75196SDavid Xu 	 */
15436ce75196SDavid Xu 	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
15446ce75196SDavid Xu 		/*
15456ce75196SDavid Xu 		 * Release ownership of upcall, and schedule an upcall
15466ce75196SDavid Xu 		 * thread, this new upcall thread becomes the owner of
15476ce75196SDavid Xu 		 * the upcall structure.
15486ce75196SDavid Xu 		 */
15496ce75196SDavid Xu 		ku = td->td_upcall;
15506ce75196SDavid Xu 		ku->ku_owner = NULL;
15516ce75196SDavid Xu 		td->td_upcall = NULL;
15526ce75196SDavid Xu 		td->td_flags &= ~TDF_CAN_UNBIND;
1553ab78d4d6SDavid Xu 		td2 = thread_schedule_upcall(td, ku);
1554ab78d4d6SDavid Xu 		setrunqueue(td2);
15556ce75196SDavid Xu 	}
15566ce75196SDavid Xu }
15576ce75196SDavid Xu 
1558c76e33b6SJonathan Mini /*
15595215b187SJeff Roberson  * Setup done on the thread when it enters the kernel.
15601434d3feSJulian Elischer  * XXXKSE Presently only for syscalls but eventually all kernel entries.
15611434d3feSJulian Elischer  */
15621434d3feSJulian Elischer void
15631434d3feSJulian Elischer thread_user_enter(struct proc *p, struct thread *td)
15641434d3feSJulian Elischer {
15655215b187SJeff Roberson 	struct ksegrp *kg;
15665215b187SJeff Roberson 	struct kse_upcall *ku;
15671ecb38a3SDavid Xu 	struct kse_thr_mailbox *tmbx;
15681434d3feSJulian Elischer 
15695215b187SJeff Roberson 	kg = td->td_ksegrp;
15701ecb38a3SDavid Xu 
15711434d3feSJulian Elischer 	/*
15721434d3feSJulian Elischer 	 * First check that we shouldn't just abort.
15731434d3feSJulian Elischer 	 * But check if we are the single thread first!
15741434d3feSJulian Elischer 	 */
1575cd4f6ebbSDavid Xu 	if (p->p_flag & P_SINGLE_EXIT) {
15761434d3feSJulian Elischer 		PROC_LOCK(p);
15771434d3feSJulian Elischer 		mtx_lock_spin(&sched_lock);
1578e574e444SDavid Xu 		thread_stopped(p);
15791434d3feSJulian Elischer 		thread_exit();
15801434d3feSJulian Elischer 		/* NOTREACHED */
15811434d3feSJulian Elischer 	}
15821434d3feSJulian Elischer 
15831434d3feSJulian Elischer 	/*
15841434d3feSJulian Elischer 	 * If we are doing a syscall in a KSE environment,
15851434d3feSJulian Elischer 	 * note where our mailbox is. There is always the
158693a7aa79SJulian Elischer 	 * possibility that we could do this lazily (in kse_reassign()),
15871434d3feSJulian Elischer 	 * but for now do it every time.
15881434d3feSJulian Elischer 	 */
15895215b187SJeff Roberson 	kg = td->td_ksegrp;
1590cd4f6ebbSDavid Xu 	if (td->td_flags & TDF_SA) {
15915215b187SJeff Roberson 		ku = td->td_upcall;
15925215b187SJeff Roberson 		KASSERT(ku, ("%s: no upcall owned", __func__));
15935215b187SJeff Roberson 		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
15941ecb38a3SDavid Xu 		KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__));
15954e4422d4SMarcel Moolenaar 		ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
15961ecb38a3SDavid Xu 		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
15971ecb38a3SDavid Xu 		if ((tmbx == NULL) || (tmbx == (void *)-1)) {
15985215b187SJeff Roberson 			td->td_mailbox = NULL;
15998798d4f9SDavid Xu 		} else {
16001ecb38a3SDavid Xu 			td->td_mailbox = tmbx;
16011434d3feSJulian Elischer 			if (td->td_standin == NULL)
16025215b187SJeff Roberson 				thread_alloc_spare(td, NULL);
16038798d4f9SDavid Xu 			mtx_lock_spin(&sched_lock);
16041ecb38a3SDavid Xu 			if (ku->ku_mflags & KMF_NOUPCALL)
16051ecb38a3SDavid Xu 				td->td_flags &= ~TDF_CAN_UNBIND;
16061ecb38a3SDavid Xu 			else
160793a7aa79SJulian Elischer 				td->td_flags |= TDF_CAN_UNBIND;
16088798d4f9SDavid Xu 			mtx_unlock_spin(&sched_lock);
16095215b187SJeff Roberson 		}
16101434d3feSJulian Elischer 	}
16111434d3feSJulian Elischer }
16121434d3feSJulian Elischer 
16131434d3feSJulian Elischer /*
1614c76e33b6SJonathan Mini  * The extra work we go through if we are a threaded process when we
1615c76e33b6SJonathan Mini  * return to userland.
1616c76e33b6SJonathan Mini  *
1617c76e33b6SJonathan Mini  * If we are a KSE process and returning to user mode, check for
1618c76e33b6SJonathan Mini  * extra work to do before we return (e.g. for more syscalls
1619c76e33b6SJonathan Mini  * to complete first).  If we were in a critical section, we should
1620c76e33b6SJonathan Mini  * just return to let it finish. Same if we were in the UTS (in
1621c76e33b6SJonathan Mini  * which case the mailbox's context's busy indicator will be set).
1622c76e33b6SJonathan Mini  * The only traps we suport will have set the mailbox.
1623c76e33b6SJonathan Mini  * We will clear it here.
162444990b8cSJulian Elischer  */
1625c76e33b6SJonathan Mini int
1626253fdd5bSJulian Elischer thread_userret(struct thread *td, struct trapframe *frame)
1627c76e33b6SJonathan Mini {
16281ecb38a3SDavid Xu 	int error = 0, upcalls, uts_crit;
16295215b187SJeff Roberson 	struct kse_upcall *ku;
16300252d203SDavid Xu 	struct ksegrp *kg, *kg2;
163148bfcdddSJulian Elischer 	struct proc *p;
1632bfd83250SDavid Xu 	struct timespec ts;
1633c76e33b6SJonathan Mini 
16346f8132a8SJulian Elischer 	p = td->td_proc;
16355215b187SJeff Roberson 	kg = td->td_ksegrp;
1636cd4f6ebbSDavid Xu 	ku = td->td_upcall;
163793a7aa79SJulian Elischer 
1638cd4f6ebbSDavid Xu 	/* Nothing to do with bound thread */
1639cd4f6ebbSDavid Xu 	if (!(td->td_flags & TDF_SA))
16405215b187SJeff Roberson 		return (0);
16415215b187SJeff Roberson 
16425215b187SJeff Roberson 	/*
16435215b187SJeff Roberson 	 * Stat clock interrupt hit in userland, it
16445215b187SJeff Roberson 	 * is returning from interrupt, charge thread's
16455215b187SJeff Roberson 	 * userland time for UTS.
16465215b187SJeff Roberson 	 */
16475215b187SJeff Roberson 	if (td->td_flags & TDF_USTATCLOCK) {
16484b4866edSDavid Xu 		thread_update_usr_ticks(td, 1);
164993a7aa79SJulian Elischer 		mtx_lock_spin(&sched_lock);
16505215b187SJeff Roberson 		td->td_flags &= ~TDF_USTATCLOCK;
16510dbb100bSDavid Xu 		mtx_unlock_spin(&sched_lock);
16524b4866edSDavid Xu 		if (kg->kg_completed ||
16534b4866edSDavid Xu 		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
16544b4866edSDavid Xu 			thread_user_enter(p, td);
16555215b187SJeff Roberson 	}
16565215b187SJeff Roberson 
16571ecb38a3SDavid Xu 	uts_crit = (td->td_mailbox == NULL);
16585215b187SJeff Roberson 	/*
16595215b187SJeff Roberson 	 * Optimisation:
16605215b187SJeff Roberson 	 * This thread has not started any upcall.
16615215b187SJeff Roberson 	 * If there is no work to report other than ourself,
16625215b187SJeff Roberson 	 * then it can return direct to userland.
16635215b187SJeff Roberson 	 */
16645215b187SJeff Roberson 	if (TD_CAN_UNBIND(td)) {
16655215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
16665215b187SJeff Roberson 		td->td_flags &= ~TDF_CAN_UNBIND;
16674093529dSJeff Roberson 		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
166821e0492aSDavid Xu 		    (kg->kg_completed == NULL) &&
16696ce75196SDavid Xu 		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
167095bee4c3SDavid Xu 		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
16714093529dSJeff Roberson 			mtx_unlock_spin(&sched_lock);
16724b4866edSDavid Xu 			thread_update_usr_ticks(td, 0);
16739a4b78c9SDavid Xu 			nanotime(&ts);
16749a4b78c9SDavid Xu 			error = copyout(&ts,
16759a4b78c9SDavid Xu 				(caddr_t)&ku->ku_mailbox->km_timeofday,
16769a4b78c9SDavid Xu 				sizeof(ts));
167721e0492aSDavid Xu 			td->td_mailbox = 0;
16781ecb38a3SDavid Xu 			ku->ku_mflags = 0;
16799a4b78c9SDavid Xu 			if (error)
16809a4b78c9SDavid Xu 				goto out;
168193a7aa79SJulian Elischer 			return (0);
168293a7aa79SJulian Elischer 		}
16834093529dSJeff Roberson 		mtx_unlock_spin(&sched_lock);
1684dd7da9aaSDavid Xu 		thread_export_context(td, 0);
168593a7aa79SJulian Elischer 		/*
16865215b187SJeff Roberson 		 * There is something to report, and we own an upcall
16875215b187SJeff Roberson 		 * strucuture, we can go to userland.
16885215b187SJeff Roberson 		 * Turn ourself into an upcall thread.
168993a7aa79SJulian Elischer 		 */
16901d5a24beSDavid Xu 		td->td_pflags |= TDP_UPCALLING;
16911ecb38a3SDavid Xu 	} else if (td->td_mailbox && (ku == NULL)) {
1692dd7da9aaSDavid Xu 		thread_export_context(td, 1);
1693e574e444SDavid Xu 		PROC_LOCK(p);
16946f8132a8SJulian Elischer 		/*
16955215b187SJeff Roberson 		 * There are upcall threads waiting for
16965215b187SJeff Roberson 		 * work to do, wake one of them up.
16975215b187SJeff Roberson 		 * XXXKSE Maybe wake all of them up.
16986f8132a8SJulian Elischer 		 */
1699dd7da9aaSDavid Xu 		if (kg->kg_upsleeps)
17005215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
1701e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1702e574e444SDavid Xu 		thread_stopped(p);
170393a7aa79SJulian Elischer 		thread_exit();
17045215b187SJeff Roberson 		/* NOTREACHED */
170548bfcdddSJulian Elischer 	}
170693a7aa79SJulian Elischer 
1707cd4f6ebbSDavid Xu 	KASSERT(ku != NULL, ("upcall is NULL\n"));
1708a87891eeSDavid Xu 	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1709a87891eeSDavid Xu 
1710a87891eeSDavid Xu 	if (p->p_numthreads > max_threads_per_proc) {
1711a87891eeSDavid Xu 		max_threads_hits++;
1712a87891eeSDavid Xu 		PROC_LOCK(p);
1713112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
17147677ce18SDavid Xu 		p->p_maxthrwaits++;
1715a87891eeSDavid Xu 		while (p->p_numthreads > max_threads_per_proc) {
1716a87891eeSDavid Xu 			upcalls = 0;
1717a87891eeSDavid Xu 			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1718a87891eeSDavid Xu 				if (kg2->kg_numupcalls == 0)
1719a87891eeSDavid Xu 					upcalls++;
1720a87891eeSDavid Xu 				else
1721a87891eeSDavid Xu 					upcalls += kg2->kg_numupcalls;
1722a87891eeSDavid Xu 			}
1723a87891eeSDavid Xu 			if (upcalls >= max_threads_per_proc)
1724a87891eeSDavid Xu 				break;
17255073e68fSDavid Xu 			mtx_unlock_spin(&sched_lock);
172636407becSDavid Xu 			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
17277677ce18SDavid Xu 			    "maxthreads", NULL)) {
17287677ce18SDavid Xu 				mtx_lock_spin(&sched_lock);
172936407becSDavid Xu 				break;
17307677ce18SDavid Xu 			} else {
1731112afcb2SJohn Baldwin 				mtx_lock_spin(&sched_lock);
1732a87891eeSDavid Xu 			}
17337677ce18SDavid Xu 		}
17347677ce18SDavid Xu 		p->p_maxthrwaits--;
1735112afcb2SJohn Baldwin 		mtx_unlock_spin(&sched_lock);
1736a87891eeSDavid Xu 		PROC_UNLOCK(p);
1737a87891eeSDavid Xu 	}
1738a87891eeSDavid Xu 
17391d5a24beSDavid Xu 	if (td->td_pflags & TDP_UPCALLING) {
17401ecb38a3SDavid Xu 		uts_crit = 0;
17416ce75196SDavid Xu 		kg->kg_nextupcall = ticks+kg->kg_upquantum;
174248bfcdddSJulian Elischer 		/*
174344990b8cSJulian Elischer 		 * There is no more work to do and we are going to ride
17445215b187SJeff Roberson 		 * this thread up to userland as an upcall.
174548bfcdddSJulian Elischer 		 * Do the last parts of the setup needed for the upcall.
174644990b8cSJulian Elischer 		 */
1747c76e33b6SJonathan Mini 		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1748ed32df81SJulian Elischer 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1749c76e33b6SJonathan Mini 
17501d5a24beSDavid Xu 		td->td_pflags &= ~TDP_UPCALLING;
1751cd4f6ebbSDavid Xu 		if (ku->ku_flags & KUF_DOUPCALL) {
17525215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
17535215b187SJeff Roberson 			ku->ku_flags &= ~KUF_DOUPCALL;
17545215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
1755cd4f6ebbSDavid Xu 		}
1756c76e33b6SJonathan Mini 		/*
17571ecb38a3SDavid Xu 		 * Set user context to the UTS
17581ecb38a3SDavid Xu 		 */
17591ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
17601ecb38a3SDavid Xu 			cpu_set_upcall_kse(td, ku);
17611ecb38a3SDavid Xu 			error = suword(&ku->ku_mailbox->km_curthread, 0);
17621ecb38a3SDavid Xu 			if (error)
17631ecb38a3SDavid Xu 				goto out;
17641ecb38a3SDavid Xu 		}
17651ecb38a3SDavid Xu 
17661ecb38a3SDavid Xu 		/*
176793a7aa79SJulian Elischer 		 * Unhook the list of completed threads.
176893a7aa79SJulian Elischer 		 * anything that completes after this gets to
176993a7aa79SJulian Elischer 		 * come in next time.
177093a7aa79SJulian Elischer 		 * Put the list of completed thread mailboxes on
177193a7aa79SJulian Elischer 		 * this KSE's mailbox.
1772c76e33b6SJonathan Mini 		 */
17731ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
17741ecb38a3SDavid Xu 		    (error = thread_link_mboxes(kg, ku)) != 0)
17750252d203SDavid Xu 			goto out;
17761ecb38a3SDavid Xu 	}
17771ecb38a3SDavid Xu 	if (!uts_crit) {
1778bfd83250SDavid Xu 		nanotime(&ts);
17791ecb38a3SDavid Xu 		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
1780bfd83250SDavid Xu 	}
17810252d203SDavid Xu 
17820252d203SDavid Xu out:
17830252d203SDavid Xu 	if (error) {
17843d0586d4SJulian Elischer 		/*
1785fc8cdd87SDavid Xu 		 * Things are going to be so screwed we should just kill
1786fc8cdd87SDavid Xu 		 * the process.
17873d0586d4SJulian Elischer 		 * how do we do that?
17883d0586d4SJulian Elischer 		 */
178948bfcdddSJulian Elischer 		PROC_LOCK(td->td_proc);
179048bfcdddSJulian Elischer 		psignal(td->td_proc, SIGSEGV);
179148bfcdddSJulian Elischer 		PROC_UNLOCK(td->td_proc);
17920252d203SDavid Xu 	} else {
17930252d203SDavid Xu 		/*
17940252d203SDavid Xu 		 * Optimisation:
17950252d203SDavid Xu 		 * Ensure that we have a spare thread available,
17960252d203SDavid Xu 		 * for when we re-enter the kernel.
17970252d203SDavid Xu 		 */
17980252d203SDavid Xu 		if (td->td_standin == NULL)
17990252d203SDavid Xu 			thread_alloc_spare(td, NULL);
18000252d203SDavid Xu 	}
18010252d203SDavid Xu 
18021ecb38a3SDavid Xu 	ku->ku_mflags = 0;
18030252d203SDavid Xu 	/*
18040252d203SDavid Xu 	 * Clear thread mailbox first, then clear system tick count.
18050252d203SDavid Xu 	 * The order is important because thread_statclock() use
18060252d203SDavid Xu 	 * mailbox pointer to see if it is an userland thread or
18070252d203SDavid Xu 	 * an UTS kernel thread.
18080252d203SDavid Xu 	 */
180993a7aa79SJulian Elischer 	td->td_mailbox = NULL;
18105215b187SJeff Roberson 	td->td_usticks = 0;
181148bfcdddSJulian Elischer 	return (error);	/* go sync */
181244990b8cSJulian Elischer }
181344990b8cSJulian Elischer 
181444990b8cSJulian Elischer /*
181544990b8cSJulian Elischer  * Enforce single-threading.
181644990b8cSJulian Elischer  *
181744990b8cSJulian Elischer  * Returns 1 if the caller must abort (another thread is waiting to
181844990b8cSJulian Elischer  * exit the process or similar). Process is locked!
181944990b8cSJulian Elischer  * Returns 0 when you are successfully the only thread running.
182044990b8cSJulian Elischer  * A process has successfully single threaded in the suspend mode when
182144990b8cSJulian Elischer  * There are no threads in user mode. Threads in the kernel must be
182244990b8cSJulian Elischer  * allowed to continue until they get to the user boundary. They may even
182344990b8cSJulian Elischer  * copy out their return values and data before suspending. They may however be
182444990b8cSJulian Elischer  * accellerated in reaching the user boundary as we will wake up
182544990b8cSJulian Elischer  * any sleeping threads that are interruptable. (PCATCH).
182644990b8cSJulian Elischer  */
182744990b8cSJulian Elischer int
182844990b8cSJulian Elischer thread_single(int force_exit)
182944990b8cSJulian Elischer {
183044990b8cSJulian Elischer 	struct thread *td;
183144990b8cSJulian Elischer 	struct thread *td2;
183244990b8cSJulian Elischer 	struct proc *p;
183344990b8cSJulian Elischer 
183444990b8cSJulian Elischer 	td = curthread;
183544990b8cSJulian Elischer 	p = td->td_proc;
1836696058c3SJulian Elischer 	mtx_assert(&Giant, MA_OWNED);
183744990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
183844990b8cSJulian Elischer 	KASSERT((td != NULL), ("curthread is NULL"));
183944990b8cSJulian Elischer 
18400e2a4d3aSDavid Xu 	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
184144990b8cSJulian Elischer 		return (0);
184244990b8cSJulian Elischer 
1843e3b9bf71SJulian Elischer 	/* Is someone already single threading? */
1844e3b9bf71SJulian Elischer 	if (p->p_singlethread)
184544990b8cSJulian Elischer 		return (1);
184644990b8cSJulian Elischer 
184793a7aa79SJulian Elischer 	if (force_exit == SINGLE_EXIT) {
184844990b8cSJulian Elischer 		p->p_flag |= P_SINGLE_EXIT;
184993a7aa79SJulian Elischer 	} else
185044990b8cSJulian Elischer 		p->p_flag &= ~P_SINGLE_EXIT;
18511279572aSDavid Xu 	p->p_flag |= P_STOPPED_SINGLE;
185271fad9fdSJulian Elischer 	mtx_lock_spin(&sched_lock);
1853112afcb2SJohn Baldwin 	p->p_singlethread = td;
1854112afcb2SJohn Baldwin 	while ((p->p_numthreads - p->p_suspcount) != 1) {
185544990b8cSJulian Elischer 		FOREACH_THREAD_IN_PROC(p, td2) {
185644990b8cSJulian Elischer 			if (td2 == td)
185744990b8cSJulian Elischer 				continue;
1858588257e8SDavid Xu 			td2->td_flags |= TDF_ASTPENDING;
185971fad9fdSJulian Elischer 			if (TD_IS_INHIBITED(td2)) {
18601279572aSDavid Xu 				if (force_exit == SINGLE_EXIT) {
18619d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2)) {
186271fad9fdSJulian Elischer 						thread_unsuspend_one(td2);
186371fad9fdSJulian Elischer 					}
186433862f40SDavid Xu 					if (TD_ON_SLEEPQ(td2) &&
186533862f40SDavid Xu 					    (td2->td_flags & TDF_SINTR)) {
1866e3b9bf71SJulian Elischer 						if (td2->td_flags & TDF_CVWAITQ)
186733862f40SDavid Xu 							cv_abort(td2);
1868e3b9bf71SJulian Elischer 						else
186933862f40SDavid Xu 							abortsleep(td2);
187071fad9fdSJulian Elischer 					}
18719d102777SJulian Elischer 				} else {
18729d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2))
18739d102777SJulian Elischer 						continue;
18745215b187SJeff Roberson 					/*
18755215b187SJeff Roberson 					 * maybe other inhibitted states too?
18765215b187SJeff Roberson 					 * XXXKSE Is it totally safe to
18775215b187SJeff Roberson 					 * suspend a non-interruptable thread?
18785215b187SJeff Roberson 					 */
187993a7aa79SJulian Elischer 					if (td2->td_inhibitors &
18805215b187SJeff Roberson 					    (TDI_SLEEPING | TDI_SWAPPED))
18819d102777SJulian Elischer 						thread_suspend_one(td2);
188244990b8cSJulian Elischer 				}
188344990b8cSJulian Elischer 			}
18849d102777SJulian Elischer 		}
18859d102777SJulian Elischer 		/*
18869d102777SJulian Elischer 		 * Maybe we suspended some threads.. was it enough?
18879d102777SJulian Elischer 		 */
1888112afcb2SJohn Baldwin 		if ((p->p_numthreads - p->p_suspcount) == 1)
18899d102777SJulian Elischer 			break;
18909d102777SJulian Elischer 
189144990b8cSJulian Elischer 		/*
189244990b8cSJulian Elischer 		 * Wake us up when everyone else has suspended.
1893e3b9bf71SJulian Elischer 		 * In the mean time we suspend as well.
189444990b8cSJulian Elischer 		 */
189571fad9fdSJulian Elischer 		thread_suspend_one(td);
189611b20c68SDavid Xu 		DROP_GIANT();
189744990b8cSJulian Elischer 		PROC_UNLOCK(p);
1898696058c3SJulian Elischer 		p->p_stats->p_ru.ru_nvcsw++;
189944990b8cSJulian Elischer 		mi_switch();
190044990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
190111b20c68SDavid Xu 		PICKUP_GIANT();
190244990b8cSJulian Elischer 		PROC_LOCK(p);
1903112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
190444990b8cSJulian Elischer 	}
19055215b187SJeff Roberson 	if (force_exit == SINGLE_EXIT) {
1906112afcb2SJohn Baldwin 		if (td->td_upcall)
19075215b187SJeff Roberson 			upcall_remove(td);
19085c8329edSJulian Elischer 		kse_purge(p, td);
19095215b187SJeff Roberson 	}
1910112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
191144990b8cSJulian Elischer 	return (0);
191244990b8cSJulian Elischer }
191344990b8cSJulian Elischer 
191444990b8cSJulian Elischer /*
191544990b8cSJulian Elischer  * Called in from locations that can safely check to see
191644990b8cSJulian Elischer  * whether we have to suspend or at least throttle for a
191744990b8cSJulian Elischer  * single-thread event (e.g. fork).
191844990b8cSJulian Elischer  *
191944990b8cSJulian Elischer  * Such locations include userret().
192044990b8cSJulian Elischer  * If the "return_instead" argument is non zero, the thread must be able to
192144990b8cSJulian Elischer  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
192244990b8cSJulian Elischer  *
192344990b8cSJulian Elischer  * The 'return_instead' argument tells the function if it may do a
192444990b8cSJulian Elischer  * thread_exit() or suspend, or whether the caller must abort and back
192544990b8cSJulian Elischer  * out instead.
192644990b8cSJulian Elischer  *
192744990b8cSJulian Elischer  * If the thread that set the single_threading request has set the
192844990b8cSJulian Elischer  * P_SINGLE_EXIT bit in the process flags then this call will never return
192944990b8cSJulian Elischer  * if 'return_instead' is false, but will exit.
193044990b8cSJulian Elischer  *
193144990b8cSJulian Elischer  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
193244990b8cSJulian Elischer  *---------------+--------------------+---------------------
193344990b8cSJulian Elischer  *       0       | returns 0          |   returns 0 or 1
193444990b8cSJulian Elischer  *               | when ST ends       |   immediatly
193544990b8cSJulian Elischer  *---------------+--------------------+---------------------
193644990b8cSJulian Elischer  *       1       | thread exits       |   returns 1
193744990b8cSJulian Elischer  *               |                    |  immediatly
193844990b8cSJulian Elischer  * 0 = thread_exit() or suspension ok,
193944990b8cSJulian Elischer  * other = return error instead of stopping the thread.
194044990b8cSJulian Elischer  *
194144990b8cSJulian Elischer  * While a full suspension is under effect, even a single threading
194244990b8cSJulian Elischer  * thread would be suspended if it made this call (but it shouldn't).
194344990b8cSJulian Elischer  * This call should only be made from places where
194444990b8cSJulian Elischer  * thread_exit() would be safe as that may be the outcome unless
194544990b8cSJulian Elischer  * return_instead is set.
194644990b8cSJulian Elischer  */
194744990b8cSJulian Elischer int
194844990b8cSJulian Elischer thread_suspend_check(int return_instead)
194944990b8cSJulian Elischer {
1950ecafb24bSJuli Mallett 	struct thread *td;
1951ecafb24bSJuli Mallett 	struct proc *p;
195244990b8cSJulian Elischer 
195344990b8cSJulian Elischer 	td = curthread;
195444990b8cSJulian Elischer 	p = td->td_proc;
195544990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
195644990b8cSJulian Elischer 	while (P_SHOULDSTOP(p)) {
19571279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
195844990b8cSJulian Elischer 			KASSERT(p->p_singlethread != NULL,
195944990b8cSJulian Elischer 			    ("singlethread not set"));
196044990b8cSJulian Elischer 			/*
1961e3b9bf71SJulian Elischer 			 * The only suspension in action is a
1962e3b9bf71SJulian Elischer 			 * single-threading. Single threader need not stop.
1963b6d5995eSJulian Elischer 			 * XXX Should be safe to access unlocked
1964b6d5995eSJulian Elischer 			 * as it can only be set to be true by us.
196544990b8cSJulian Elischer 			 */
1966e3b9bf71SJulian Elischer 			if (p->p_singlethread == td)
196744990b8cSJulian Elischer 				return (0);	/* Exempt from stopping. */
196844990b8cSJulian Elischer 		}
1969e3b9bf71SJulian Elischer 		if (return_instead)
197044990b8cSJulian Elischer 			return (1);
197144990b8cSJulian Elischer 
1972e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1973e574e444SDavid Xu 		thread_stopped(p);
197444990b8cSJulian Elischer 		/*
197544990b8cSJulian Elischer 		 * If the process is waiting for us to exit,
197644990b8cSJulian Elischer 		 * this thread should just suicide.
19771279572aSDavid Xu 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
197844990b8cSJulian Elischer 		 */
197944990b8cSJulian Elischer 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
198044990b8cSJulian Elischer 			while (mtx_owned(&Giant))
198144990b8cSJulian Elischer 				mtx_unlock(&Giant);
19820e2a4d3aSDavid Xu 			if (p->p_flag & P_SA)
198344990b8cSJulian Elischer 				thread_exit();
19842c10d16aSJeff Roberson 			else
19852c10d16aSJeff Roberson 				thr_exit1();
198644990b8cSJulian Elischer 		}
198744990b8cSJulian Elischer 
198844990b8cSJulian Elischer 		/*
198944990b8cSJulian Elischer 		 * When a thread suspends, it just
199044990b8cSJulian Elischer 		 * moves to the processes's suspend queue
199144990b8cSJulian Elischer 		 * and stays there.
199244990b8cSJulian Elischer 		 */
199371fad9fdSJulian Elischer 		thread_suspend_one(td);
19941279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1995cf19bf91SJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
199671fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
1997cf19bf91SJulian Elischer 			}
1998cf19bf91SJulian Elischer 		}
19991ecc6456SDavid Xu 		DROP_GIANT();
2000a6f37ac9SJohn Baldwin 		PROC_UNLOCK(p);
200120568366SJulian Elischer 		p->p_stats->p_ru.ru_nivcsw++;
200244990b8cSJulian Elischer 		mi_switch();
200344990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
20041ecc6456SDavid Xu 		PICKUP_GIANT();
200544990b8cSJulian Elischer 		PROC_LOCK(p);
200644990b8cSJulian Elischer 	}
200744990b8cSJulian Elischer 	return (0);
200844990b8cSJulian Elischer }
200944990b8cSJulian Elischer 
201035c32a76SDavid Xu void
201135c32a76SDavid Xu thread_suspend_one(struct thread *td)
201235c32a76SDavid Xu {
201335c32a76SDavid Xu 	struct proc *p = td->td_proc;
201435c32a76SDavid Xu 
201535c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2016112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
2017e574e444SDavid Xu 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
201835c32a76SDavid Xu 	p->p_suspcount++;
201971fad9fdSJulian Elischer 	TD_SET_SUSPENDED(td);
202035c32a76SDavid Xu 	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
202171fad9fdSJulian Elischer 	/*
202271fad9fdSJulian Elischer 	 * Hack: If we are suspending but are on the sleep queue
202371fad9fdSJulian Elischer 	 * then we are in msleep or the cv equivalent. We
202471fad9fdSJulian Elischer 	 * want to look like we have two Inhibitors.
20259d102777SJulian Elischer 	 * May already be set.. doesn't matter.
202671fad9fdSJulian Elischer 	 */
202771fad9fdSJulian Elischer 	if (TD_ON_SLEEPQ(td))
202871fad9fdSJulian Elischer 		TD_SET_SLEEPING(td);
202935c32a76SDavid Xu }
203035c32a76SDavid Xu 
203135c32a76SDavid Xu void
203235c32a76SDavid Xu thread_unsuspend_one(struct thread *td)
203335c32a76SDavid Xu {
203435c32a76SDavid Xu 	struct proc *p = td->td_proc;
203535c32a76SDavid Xu 
203635c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2037112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
203835c32a76SDavid Xu 	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
203971fad9fdSJulian Elischer 	TD_CLR_SUSPENDED(td);
204035c32a76SDavid Xu 	p->p_suspcount--;
204171fad9fdSJulian Elischer 	setrunnable(td);
204235c32a76SDavid Xu }
204335c32a76SDavid Xu 
204444990b8cSJulian Elischer /*
204544990b8cSJulian Elischer  * Allow all threads blocked by single threading to continue running.
204644990b8cSJulian Elischer  */
204744990b8cSJulian Elischer void
204844990b8cSJulian Elischer thread_unsuspend(struct proc *p)
204944990b8cSJulian Elischer {
205044990b8cSJulian Elischer 	struct thread *td;
205144990b8cSJulian Elischer 
2052b6d5995eSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
205344990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
205444990b8cSJulian Elischer 	if (!P_SHOULDSTOP(p)) {
205544990b8cSJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
205635c32a76SDavid Xu 			thread_unsuspend_one(td);
205744990b8cSJulian Elischer 		}
20581279572aSDavid Xu 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
205944990b8cSJulian Elischer 	    (p->p_numthreads == p->p_suspcount)) {
206044990b8cSJulian Elischer 		/*
206144990b8cSJulian Elischer 		 * Stopping everything also did the job for the single
206244990b8cSJulian Elischer 		 * threading request. Now we've downgraded to single-threaded,
206344990b8cSJulian Elischer 		 * let it continue.
206444990b8cSJulian Elischer 		 */
206535c32a76SDavid Xu 		thread_unsuspend_one(p->p_singlethread);
206644990b8cSJulian Elischer 	}
206744990b8cSJulian Elischer }
206844990b8cSJulian Elischer 
206944990b8cSJulian Elischer void
207044990b8cSJulian Elischer thread_single_end(void)
207144990b8cSJulian Elischer {
207244990b8cSJulian Elischer 	struct thread *td;
207344990b8cSJulian Elischer 	struct proc *p;
207444990b8cSJulian Elischer 
207544990b8cSJulian Elischer 	td = curthread;
207644990b8cSJulian Elischer 	p = td->td_proc;
207744990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
20781279572aSDavid Xu 	p->p_flag &= ~P_STOPPED_SINGLE;
2079112afcb2SJohn Baldwin 	mtx_lock_spin(&sched_lock);
208044990b8cSJulian Elischer 	p->p_singlethread = NULL;
208149539972SJulian Elischer 	/*
208249539972SJulian Elischer 	 * If there are other threads they mey now run,
208349539972SJulian Elischer 	 * unless of course there is a blanket 'stop order'
208449539972SJulian Elischer 	 * on the process. The single threader must be allowed
208549539972SJulian Elischer 	 * to continue however as this is a bad place to stop.
208649539972SJulian Elischer 	 */
208749539972SJulian Elischer 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
208849539972SJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
208971fad9fdSJulian Elischer 			thread_unsuspend_one(td);
209044990b8cSJulian Elischer 		}
209149539972SJulian Elischer 	}
2092112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
209349539972SJulian Elischer }
209449539972SJulian Elischer 
209544990b8cSJulian Elischer 
2096